blob: 3c886f46c3f195dea4e16e0fcfdf06a612120db1 [file] [log] [blame]
Zack Williamse940c7a2019-08-21 14:25:39 -07001/*
2Copyright 2014 The Kubernetes Authors.
3
4Licensed under the Apache License, Version 2.0 (the "License");
5you may not use this file except in compliance with the License.
6You may obtain a copy of the License at
7
8 http://www.apache.org/licenses/LICENSE-2.0
9
10Unless required by applicable law or agreed to in writing, software
11distributed under the License is distributed on an "AS IS" BASIS,
12WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13See the License for the specific language governing permissions and
14limitations under the License.
15*/
16
17package runtime
18
19import (
20 "fmt"
21 "runtime"
22 "sync"
23 "time"
24
David Bainbridge86971522019-09-26 22:09:39 +000025 "k8s.io/klog"
Zack Williamse940c7a2019-08-21 14:25:39 -070026)
27
28var (
29 // ReallyCrash controls the behavior of HandleCrash and now defaults
30 // true. It's still exposed so components can optionally set to false
31 // to restore prior behavior.
32 ReallyCrash = true
33)
34
35// PanicHandlers is a list of functions which will be invoked when a panic happens.
36var PanicHandlers = []func(interface{}){logPanic}
37
38// HandleCrash simply catches a crash and logs an error. Meant to be called via
39// defer. Additional context-specific handlers can be provided, and will be
40// called in case of panic. HandleCrash actually crashes, after calling the
41// handlers and logging the panic message.
42//
43// TODO: remove this function. We are switching to a world where it's safe for
44// apiserver to panic, since it will be restarted by kubelet. At the beginning
45// of the Kubernetes project, nothing was going to restart apiserver and so
46// catching panics was important. But it's actually much simpler for monitoring
47// software if we just exit when an unexpected panic happens.
48func HandleCrash(additionalHandlers ...func(interface{})) {
49 if r := recover(); r != nil {
50 for _, fn := range PanicHandlers {
51 fn(r)
52 }
53 for _, fn := range additionalHandlers {
54 fn(r)
55 }
56 if ReallyCrash {
57 // Actually proceed to panic.
58 panic(r)
59 }
60 }
61}
62
63// logPanic logs the caller tree when a panic occurs.
64func logPanic(r interface{}) {
David Bainbridge86971522019-09-26 22:09:39 +000065 // Same as stdlib http server code. Manually allocate stack trace buffer size
66 // to prevent excessively large logs
67 const size = 64 << 10
68 stacktrace := make([]byte, size)
69 stacktrace = stacktrace[:runtime.Stack(stacktrace, false)]
Zack Williamse940c7a2019-08-21 14:25:39 -070070 if _, ok := r.(string); ok {
David Bainbridge86971522019-09-26 22:09:39 +000071 klog.Errorf("Observed a panic: %s\n%s", r, stacktrace)
Zack Williamse940c7a2019-08-21 14:25:39 -070072 } else {
David Bainbridge86971522019-09-26 22:09:39 +000073 klog.Errorf("Observed a panic: %#v (%v)\n%s", r, r, stacktrace)
Zack Williamse940c7a2019-08-21 14:25:39 -070074 }
75}
76
Zack Williamse940c7a2019-08-21 14:25:39 -070077// ErrorHandlers is a list of functions which will be invoked when an unreturnable
78// error occurs.
79// TODO(lavalamp): for testability, this and the below HandleError function
80// should be packaged up into a testable and reusable object.
81var ErrorHandlers = []func(error){
82 logError,
83 (&rudimentaryErrorBackoff{
84 lastErrorTime: time.Now(),
85 // 1ms was the number folks were able to stomach as a global rate limit.
86 // If you need to log errors more than 1000 times a second you
87 // should probably consider fixing your code instead. :)
88 minPeriod: time.Millisecond,
89 }).OnError,
90}
91
92// HandlerError is a method to invoke when a non-user facing piece of code cannot
93// return an error and needs to indicate it has been ignored. Invoking this method
94// is preferable to logging the error - the default behavior is to log but the
95// errors may be sent to a remote server for analysis.
96func HandleError(err error) {
97 // this is sometimes called with a nil error. We probably shouldn't fail and should do nothing instead
98 if err == nil {
99 return
100 }
101
102 for _, fn := range ErrorHandlers {
103 fn(err)
104 }
105}
106
107// logError prints an error with the call stack of the location it was reported
108func logError(err error) {
David Bainbridge86971522019-09-26 22:09:39 +0000109 klog.ErrorDepth(2, err)
Zack Williamse940c7a2019-08-21 14:25:39 -0700110}
111
112type rudimentaryErrorBackoff struct {
113 minPeriod time.Duration // immutable
114 // TODO(lavalamp): use the clock for testability. Need to move that
115 // package for that to be accessible here.
116 lastErrorTimeLock sync.Mutex
117 lastErrorTime time.Time
118}
119
120// OnError will block if it is called more often than the embedded period time.
121// This will prevent overly tight hot error loops.
122func (r *rudimentaryErrorBackoff) OnError(error) {
123 r.lastErrorTimeLock.Lock()
124 defer r.lastErrorTimeLock.Unlock()
125 d := time.Since(r.lastErrorTime)
126 if d < r.minPeriod {
127 // If the time moves backwards for any reason, do nothing
128 time.Sleep(r.minPeriod - d)
129 }
130 r.lastErrorTime = time.Now()
131}
132
133// GetCaller returns the caller of the function that calls it.
134func GetCaller() string {
135 var pc [1]uintptr
136 runtime.Callers(3, pc[:])
137 f := runtime.FuncForPC(pc[0])
138 if f == nil {
139 return fmt.Sprintf("Unable to find caller")
140 }
141 return f.Name()
142}
143
144// RecoverFromPanic replaces the specified error with an error containing the
145// original error, and the call tree when a panic occurs. This enables error
146// handlers to handle errors and panics the same way.
147func RecoverFromPanic(err *error) {
148 if r := recover(); r != nil {
David Bainbridge86971522019-09-26 22:09:39 +0000149 // Same as stdlib http server code. Manually allocate stack trace buffer size
150 // to prevent excessively large logs
151 const size = 64 << 10
152 stacktrace := make([]byte, size)
153 stacktrace = stacktrace[:runtime.Stack(stacktrace, false)]
Zack Williamse940c7a2019-08-21 14:25:39 -0700154
155 *err = fmt.Errorf(
David Bainbridge86971522019-09-26 22:09:39 +0000156 "recovered from panic %q. (err=%v) Call stack:\n%s",
Zack Williamse940c7a2019-08-21 14:25:39 -0700157 r,
158 *err,
David Bainbridge86971522019-09-26 22:09:39 +0000159 stacktrace)
Zack Williamse940c7a2019-08-21 14:25:39 -0700160 }
161}
162
163// Must panics on non-nil errors. Useful to handling programmer level errors.
164func Must(err error) {
165 if err != nil {
166 panic(err)
167 }
168}