blob: e8a9f609f48763f1bd7222e23ffce8fdbec06457 [file] [log] [blame]
Matteo Scandoloa4285862020-12-01 18:10:10 -08001/*
2Copyright 2014 The Kubernetes Authors.
3
4Licensed under the Apache License, Version 2.0 (the "License");
5you may not use this file except in compliance with the License.
6You may obtain a copy of the License at
7
8 http://www.apache.org/licenses/LICENSE-2.0
9
10Unless required by applicable law or agreed to in writing, software
11distributed under the License is distributed on an "AS IS" BASIS,
12WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13See the License for the specific language governing permissions and
14limitations under the License.
15*/
16
17package runtime
18
19import (
20 "fmt"
21 "net/http"
22 "runtime"
23 "sync"
24 "time"
25
26 "k8s.io/klog/v2"
27)
28
29var (
30 // ReallyCrash controls the behavior of HandleCrash and now defaults
31 // true. It's still exposed so components can optionally set to false
32 // to restore prior behavior.
33 ReallyCrash = true
34)
35
36// PanicHandlers is a list of functions which will be invoked when a panic happens.
37var PanicHandlers = []func(interface{}){logPanic}
38
39// HandleCrash simply catches a crash and logs an error. Meant to be called via
40// defer. Additional context-specific handlers can be provided, and will be
41// called in case of panic. HandleCrash actually crashes, after calling the
42// handlers and logging the panic message.
43//
44// E.g., you can provide one or more additional handlers for something like shutting down go routines gracefully.
45func HandleCrash(additionalHandlers ...func(interface{})) {
46 if r := recover(); r != nil {
47 for _, fn := range PanicHandlers {
48 fn(r)
49 }
50 for _, fn := range additionalHandlers {
51 fn(r)
52 }
53 if ReallyCrash {
54 // Actually proceed to panic.
55 panic(r)
56 }
57 }
58}
59
60// logPanic logs the caller tree when a panic occurs (except in the special case of http.ErrAbortHandler).
61func logPanic(r interface{}) {
62 if r == http.ErrAbortHandler {
63 // honor the http.ErrAbortHandler sentinel panic value:
64 // ErrAbortHandler is a sentinel panic value to abort a handler.
65 // While any panic from ServeHTTP aborts the response to the client,
66 // panicking with ErrAbortHandler also suppresses logging of a stack trace to the server's error log.
67 return
68 }
69
70 // Same as stdlib http server code. Manually allocate stack trace buffer size
71 // to prevent excessively large logs
72 const size = 64 << 10
73 stacktrace := make([]byte, size)
74 stacktrace = stacktrace[:runtime.Stack(stacktrace, false)]
75 if _, ok := r.(string); ok {
76 klog.Errorf("Observed a panic: %s\n%s", r, stacktrace)
77 } else {
78 klog.Errorf("Observed a panic: %#v (%v)\n%s", r, r, stacktrace)
79 }
80}
81
82// ErrorHandlers is a list of functions which will be invoked when an unreturnable
83// error occurs.
84// TODO(lavalamp): for testability, this and the below HandleError function
85// should be packaged up into a testable and reusable object.
86var ErrorHandlers = []func(error){
87 logError,
88 (&rudimentaryErrorBackoff{
89 lastErrorTime: time.Now(),
90 // 1ms was the number folks were able to stomach as a global rate limit.
91 // If you need to log errors more than 1000 times a second you
92 // should probably consider fixing your code instead. :)
93 minPeriod: time.Millisecond,
94 }).OnError,
95}
96
97// HandlerError is a method to invoke when a non-user facing piece of code cannot
98// return an error and needs to indicate it has been ignored. Invoking this method
99// is preferable to logging the error - the default behavior is to log but the
100// errors may be sent to a remote server for analysis.
101func HandleError(err error) {
102 // this is sometimes called with a nil error. We probably shouldn't fail and should do nothing instead
103 if err == nil {
104 return
105 }
106
107 for _, fn := range ErrorHandlers {
108 fn(err)
109 }
110}
111
112// logError prints an error with the call stack of the location it was reported
113func logError(err error) {
114 klog.ErrorDepth(2, err)
115}
116
117type rudimentaryErrorBackoff struct {
118 minPeriod time.Duration // immutable
119 // TODO(lavalamp): use the clock for testability. Need to move that
120 // package for that to be accessible here.
121 lastErrorTimeLock sync.Mutex
122 lastErrorTime time.Time
123}
124
125// OnError will block if it is called more often than the embedded period time.
126// This will prevent overly tight hot error loops.
127func (r *rudimentaryErrorBackoff) OnError(error) {
128 r.lastErrorTimeLock.Lock()
129 defer r.lastErrorTimeLock.Unlock()
130 d := time.Since(r.lastErrorTime)
131 if d < r.minPeriod {
132 // If the time moves backwards for any reason, do nothing
133 time.Sleep(r.minPeriod - d)
134 }
135 r.lastErrorTime = time.Now()
136}
137
138// GetCaller returns the caller of the function that calls it.
139func GetCaller() string {
140 var pc [1]uintptr
141 runtime.Callers(3, pc[:])
142 f := runtime.FuncForPC(pc[0])
143 if f == nil {
144 return fmt.Sprintf("Unable to find caller")
145 }
146 return f.Name()
147}
148
149// RecoverFromPanic replaces the specified error with an error containing the
150// original error, and the call tree when a panic occurs. This enables error
151// handlers to handle errors and panics the same way.
152func RecoverFromPanic(err *error) {
153 if r := recover(); r != nil {
154 // Same as stdlib http server code. Manually allocate stack trace buffer size
155 // to prevent excessively large logs
156 const size = 64 << 10
157 stacktrace := make([]byte, size)
158 stacktrace = stacktrace[:runtime.Stack(stacktrace, false)]
159
160 *err = fmt.Errorf(
161 "recovered from panic %q. (err=%v) Call stack:\n%s",
162 r,
163 *err,
164 stacktrace)
165 }
166}
167
168// Must panics on non-nil errors. Useful to handling programmer level errors.
169func Must(err error) {
170 if err != nil {
171 panic(err)
172 }
173}