blob: 84a2d5f12684a9e93a6a6190b19a5ffdc5674166 [file] [log] [blame]
Scott Baker2c1c4822019-10-16 11:02:41 -07001/*
2 * Copyright 2019-present Open Networking Foundation
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16package probe
17
18import (
19 "context"
20 "fmt"
Scott Baker2c1c4822019-10-16 11:02:41 -070021 "net/http"
22 "sync"
khenaidoo26721882021-08-11 17:42:52 -040023
24 "github.com/opencord/voltha-lib-go/v7/pkg/log"
Scott Baker2c1c4822019-10-16 11:02:41 -070025)
26
27// ProbeContextKey used to fetch the Probe instance from a context
28type ProbeContextKeyType string
29
30// ServiceStatus typed values for service status
31type ServiceStatus int
32
33const (
34 // ServiceStatusUnknown initial state of services
35 ServiceStatusUnknown ServiceStatus = iota
36
37 // ServiceStatusPreparing to optionally be used for prep, such as connecting
38 ServiceStatusPreparing
39
40 // ServiceStatusPrepared to optionally be used when prep is complete, but before run
41 ServiceStatusPrepared
42
43 // ServiceStatusRunning service is functional
44 ServiceStatusRunning
45
46 // ServiceStatusStopped service has stopped, but not because of error
47 ServiceStatusStopped
48
49 // ServiceStatusFailed service has stopped because of an error
50 ServiceStatusFailed
Scott Baker104b67d2019-10-29 15:56:27 -070051
52 // ServiceStatusNotReady service has started but is unable to accept requests
53 ServiceStatusNotReady
Scott Baker2c1c4822019-10-16 11:02:41 -070054)
55
56const (
57 // ProbeContextKey value of context key to fetch probe
58 ProbeContextKey = ProbeContextKeyType("status-update-probe")
59)
60
61// String convert ServiceStatus values to strings
62func (s ServiceStatus) String() string {
63 switch s {
64 default:
65 fallthrough
66 case ServiceStatusUnknown:
67 return "Unknown"
68 case ServiceStatusPreparing:
69 return "Preparing"
70 case ServiceStatusPrepared:
71 return "Prepared"
72 case ServiceStatusRunning:
73 return "Running"
74 case ServiceStatusStopped:
75 return "Stopped"
76 case ServiceStatusFailed:
77 return "Failed"
Scott Baker104b67d2019-10-29 15:56:27 -070078 case ServiceStatusNotReady:
79 return "NotReady"
Scott Baker2c1c4822019-10-16 11:02:41 -070080 }
81}
82
83// ServiceStatusUpdate status update event
84type ServiceStatusUpdate struct {
85 Name string
86 Status ServiceStatus
87}
88
89// Probe reciever on which to implement probe capabilities
90type Probe struct {
91 readyFunc func(map[string]ServiceStatus) bool
92 healthFunc func(map[string]ServiceStatus) bool
93
94 mutex sync.RWMutex
95 status map[string]ServiceStatus
96 isReady bool
97 isHealthy bool
98}
99
100// WithReadyFunc override the default ready calculation function
101func (p *Probe) WithReadyFunc(readyFunc func(map[string]ServiceStatus) bool) *Probe {
102 p.readyFunc = readyFunc
103 return p
104}
105
106// WithHealthFunc override the default health calculation function
107func (p *Probe) WithHealthFunc(healthFunc func(map[string]ServiceStatus) bool) *Probe {
108 p.healthFunc = healthFunc
109 return p
110}
111
112// RegisterService register one or more service names with the probe, status will be track against service name
Neha Sharma94f16a92020-06-26 04:17:55 +0000113func (p *Probe) RegisterService(ctx context.Context, names ...string) {
Scott Baker2c1c4822019-10-16 11:02:41 -0700114 p.mutex.Lock()
115 defer p.mutex.Unlock()
116 if p.status == nil {
117 p.status = make(map[string]ServiceStatus)
118 }
119 for _, name := range names {
120 if _, ok := p.status[name]; !ok {
121 p.status[name] = ServiceStatusUnknown
Neha Sharma94f16a92020-06-26 04:17:55 +0000122 logger.Debugw(ctx, "probe-service-registered", log.Fields{"service-name": name})
Scott Baker2c1c4822019-10-16 11:02:41 -0700123 }
124 }
125
126 if p.readyFunc != nil {
127 p.isReady = p.readyFunc(p.status)
128 } else {
129 p.isReady = defaultReadyFunc(p.status)
130 }
131
132 if p.healthFunc != nil {
133 p.isHealthy = p.healthFunc(p.status)
134 } else {
135 p.isHealthy = defaultHealthFunc(p.status)
136 }
137}
138
139// UpdateStatus utility function to send a service update to the probe
Neha Sharma94f16a92020-06-26 04:17:55 +0000140func (p *Probe) UpdateStatus(ctx context.Context, name string, status ServiceStatus) {
Scott Baker2c1c4822019-10-16 11:02:41 -0700141 p.mutex.Lock()
142 defer p.mutex.Unlock()
143 if p.status == nil {
144 p.status = make(map[string]ServiceStatus)
145 }
Scott Baker104b67d2019-10-29 15:56:27 -0700146
147 // if status hasn't changed, avoid doing useless work
148 existingStatus, ok := p.status[name]
149 if ok && (existingStatus == status) {
150 return
151 }
152
Scott Baker2c1c4822019-10-16 11:02:41 -0700153 p.status[name] = status
154 if p.readyFunc != nil {
155 p.isReady = p.readyFunc(p.status)
156 } else {
157 p.isReady = defaultReadyFunc(p.status)
158 }
159
160 if p.healthFunc != nil {
161 p.isHealthy = p.healthFunc(p.status)
162 } else {
163 p.isHealthy = defaultHealthFunc(p.status)
164 }
Neha Sharma94f16a92020-06-26 04:17:55 +0000165 logger.Debugw(ctx, "probe-service-status-updated",
Scott Baker2c1c4822019-10-16 11:02:41 -0700166 log.Fields{
167 "service-name": name,
168 "status": status.String(),
169 "ready": p.isReady,
170 "health": p.isHealthy,
171 })
172}
173
Scott Baker104b67d2019-10-29 15:56:27 -0700174func (p *Probe) GetStatus(name string) ServiceStatus {
175 p.mutex.Lock()
176 defer p.mutex.Unlock()
177
178 if p.status == nil {
179 p.status = make(map[string]ServiceStatus)
180 }
181
182 currentStatus, ok := p.status[name]
183 if ok {
184 return currentStatus
185 }
186
187 return ServiceStatusUnknown
188}
189
190func GetProbeFromContext(ctx context.Context) *Probe {
191 if ctx != nil {
192 if value := ctx.Value(ProbeContextKey); value != nil {
193 if p, ok := value.(*Probe); ok {
194 return p
195 }
196 }
197 }
198 return nil
199}
200
Scott Baker2c1c4822019-10-16 11:02:41 -0700201// UpdateStatusFromContext a convenience function to pull the Probe reference from the
202// Context, if it exists, and then calling UpdateStatus on that Probe reference. If Context
203// is nil or if a Probe reference is not associated with the ProbeContextKey then nothing
204// happens
205func UpdateStatusFromContext(ctx context.Context, name string, status ServiceStatus) {
Scott Baker104b67d2019-10-29 15:56:27 -0700206 p := GetProbeFromContext(ctx)
207 if p != nil {
Neha Sharma94f16a92020-06-26 04:17:55 +0000208 p.UpdateStatus(ctx, name, status)
Scott Baker2c1c4822019-10-16 11:02:41 -0700209 }
210}
211
212// pulled out to a function to help better enable unit testing
213func (p *Probe) readzFunc(w http.ResponseWriter, req *http.Request) {
214 p.mutex.RLock()
215 defer p.mutex.RUnlock()
216 if p.isReady {
217 w.WriteHeader(http.StatusOK)
218 } else {
219 w.WriteHeader(http.StatusTeapot)
220 }
221}
222func (p *Probe) healthzFunc(w http.ResponseWriter, req *http.Request) {
223 p.mutex.RLock()
224 defer p.mutex.RUnlock()
225 if p.isHealthy {
226 w.WriteHeader(http.StatusOK)
227 } else {
228 w.WriteHeader(http.StatusTeapot)
229 }
230}
231func (p *Probe) detailzFunc(w http.ResponseWriter, req *http.Request) {
Neha Sharma94f16a92020-06-26 04:17:55 +0000232 ctx := context.Background()
Scott Baker2c1c4822019-10-16 11:02:41 -0700233 p.mutex.RLock()
234 defer p.mutex.RUnlock()
235 w.Header().Set("Content-Type", "application/json")
David K. Bainbridge7c75cac2020-02-19 08:53:46 -0800236 if _, err := w.Write([]byte("{")); err != nil {
Neha Sharma94f16a92020-06-26 04:17:55 +0000237 logger.Errorw(ctx, "write-response", log.Fields{"error": err})
David K. Bainbridge7c75cac2020-02-19 08:53:46 -0800238 w.WriteHeader(http.StatusInternalServerError)
239 return
240 }
Scott Baker2c1c4822019-10-16 11:02:41 -0700241 comma := ""
242 for c, s := range p.status {
David K. Bainbridge7c75cac2020-02-19 08:53:46 -0800243 if _, err := w.Write([]byte(fmt.Sprintf("%s\"%s\": \"%s\"", comma, c, s.String()))); err != nil {
Neha Sharma94f16a92020-06-26 04:17:55 +0000244 logger.Errorw(ctx, "write-response", log.Fields{"error": err})
David K. Bainbridge7c75cac2020-02-19 08:53:46 -0800245 w.WriteHeader(http.StatusInternalServerError)
246 return
247 }
Scott Baker2c1c4822019-10-16 11:02:41 -0700248 comma = ", "
249 }
David K. Bainbridge7c75cac2020-02-19 08:53:46 -0800250 if _, err := w.Write([]byte("}")); err != nil {
Neha Sharma94f16a92020-06-26 04:17:55 +0000251 logger.Errorw(ctx, "write-response", log.Fields{"error": err})
David K. Bainbridge7c75cac2020-02-19 08:53:46 -0800252 w.WriteHeader(http.StatusInternalServerError)
253 return
254 }
Scott Baker2c1c4822019-10-16 11:02:41 -0700255 w.WriteHeader(http.StatusOK)
Scott Baker2c1c4822019-10-16 11:02:41 -0700256}
257
258// ListenAndServe implements 3 HTTP endpoints on the given port for healthz, readz, and detailz. Returns only on error
Neha Sharma94f16a92020-06-26 04:17:55 +0000259func (p *Probe) ListenAndServe(ctx context.Context, address string) {
Scott Baker2c1c4822019-10-16 11:02:41 -0700260 mux := http.NewServeMux()
261
262 // Returns the result of the readyFunc calculation
263 mux.HandleFunc("/readz", p.readzFunc)
264
265 // Returns the result of the healthFunc calculation
266 mux.HandleFunc("/healthz", p.healthzFunc)
267
268 // Returns the details of the services and their status as JSON
269 mux.HandleFunc("/detailz", p.detailzFunc)
270 s := &http.Server{
271 Addr: address,
272 Handler: mux,
273 }
Neha Sharma94f16a92020-06-26 04:17:55 +0000274 logger.Fatal(ctx, s.ListenAndServe())
Scott Baker2c1c4822019-10-16 11:02:41 -0700275}
276
Scott Baker104b67d2019-10-29 15:56:27 -0700277func (p *Probe) IsReady() bool {
278 return p.isReady
279}
280
Scott Baker2c1c4822019-10-16 11:02:41 -0700281// defaultReadyFunc if all services are running then ready, else not
282func defaultReadyFunc(services map[string]ServiceStatus) bool {
283 if len(services) == 0 {
284 return false
285 }
286 for _, status := range services {
287 if status != ServiceStatusRunning {
288 return false
289 }
290 }
291 return true
292}
293
294// defaultHealthFunc if no service is stopped or failed, then healthy, else not.
295// service is start as unknown, so they are considered healthy
296func defaultHealthFunc(services map[string]ServiceStatus) bool {
297 if len(services) == 0 {
298 return false
299 }
300 for _, status := range services {
301 if status == ServiceStatusStopped || status == ServiceStatusFailed {
302 return false
303 }
304 }
305 return true
306}