blob: 732d6df9e1779f190bd78739e007e2fab53db070 [file] [log] [blame]
Scott Baker2c1c4822019-10-16 11:02:41 -07001/*
2 * Copyright 2019-present Open Networking Foundation
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16package probe
17
18import (
19 "context"
20 "fmt"
serkant.uluderyab38671c2019-11-01 09:35:38 -070021 "github.com/opencord/voltha-lib-go/v3/pkg/log"
Scott Baker2c1c4822019-10-16 11:02:41 -070022 "net/http"
23 "sync"
24)
25
26// ProbeContextKey used to fetch the Probe instance from a context
27type ProbeContextKeyType string
28
29// ServiceStatus typed values for service status
30type ServiceStatus int
31
32const (
33 // ServiceStatusUnknown initial state of services
34 ServiceStatusUnknown ServiceStatus = iota
35
36 // ServiceStatusPreparing to optionally be used for prep, such as connecting
37 ServiceStatusPreparing
38
39 // ServiceStatusPrepared to optionally be used when prep is complete, but before run
40 ServiceStatusPrepared
41
42 // ServiceStatusRunning service is functional
43 ServiceStatusRunning
44
45 // ServiceStatusStopped service has stopped, but not because of error
46 ServiceStatusStopped
47
48 // ServiceStatusFailed service has stopped because of an error
49 ServiceStatusFailed
Scott Baker104b67d2019-10-29 15:56:27 -070050
51 // ServiceStatusNotReady service has started but is unable to accept requests
52 ServiceStatusNotReady
Scott Baker2c1c4822019-10-16 11:02:41 -070053)
54
55const (
56 // ProbeContextKey value of context key to fetch probe
57 ProbeContextKey = ProbeContextKeyType("status-update-probe")
58)
59
60// String convert ServiceStatus values to strings
61func (s ServiceStatus) String() string {
62 switch s {
63 default:
64 fallthrough
65 case ServiceStatusUnknown:
66 return "Unknown"
67 case ServiceStatusPreparing:
68 return "Preparing"
69 case ServiceStatusPrepared:
70 return "Prepared"
71 case ServiceStatusRunning:
72 return "Running"
73 case ServiceStatusStopped:
74 return "Stopped"
75 case ServiceStatusFailed:
76 return "Failed"
Scott Baker104b67d2019-10-29 15:56:27 -070077 case ServiceStatusNotReady:
78 return "NotReady"
Scott Baker2c1c4822019-10-16 11:02:41 -070079 }
80}
81
82// ServiceStatusUpdate status update event
83type ServiceStatusUpdate struct {
84 Name string
85 Status ServiceStatus
86}
87
88// Probe reciever on which to implement probe capabilities
89type Probe struct {
90 readyFunc func(map[string]ServiceStatus) bool
91 healthFunc func(map[string]ServiceStatus) bool
92
93 mutex sync.RWMutex
94 status map[string]ServiceStatus
95 isReady bool
96 isHealthy bool
97}
98
99// WithReadyFunc override the default ready calculation function
100func (p *Probe) WithReadyFunc(readyFunc func(map[string]ServiceStatus) bool) *Probe {
101 p.readyFunc = readyFunc
102 return p
103}
104
105// WithHealthFunc override the default health calculation function
106func (p *Probe) WithHealthFunc(healthFunc func(map[string]ServiceStatus) bool) *Probe {
107 p.healthFunc = healthFunc
108 return p
109}
110
111// RegisterService register one or more service names with the probe, status will be track against service name
Neha Sharma3c425fb2020-06-08 16:42:32 +0000112func (p *Probe) RegisterService(ctx context.Context, names ...string) {
Scott Baker2c1c4822019-10-16 11:02:41 -0700113 p.mutex.Lock()
114 defer p.mutex.Unlock()
115 if p.status == nil {
116 p.status = make(map[string]ServiceStatus)
117 }
118 for _, name := range names {
119 if _, ok := p.status[name]; !ok {
120 p.status[name] = ServiceStatusUnknown
Neha Sharma3c425fb2020-06-08 16:42:32 +0000121 logger.Debugw(ctx, "probe-service-registered", log.Fields{"service-name": name})
Scott Baker2c1c4822019-10-16 11:02:41 -0700122 }
123 }
124
125 if p.readyFunc != nil {
126 p.isReady = p.readyFunc(p.status)
127 } else {
128 p.isReady = defaultReadyFunc(p.status)
129 }
130
131 if p.healthFunc != nil {
132 p.isHealthy = p.healthFunc(p.status)
133 } else {
134 p.isHealthy = defaultHealthFunc(p.status)
135 }
136}
137
138// UpdateStatus utility function to send a service update to the probe
Neha Sharma3c425fb2020-06-08 16:42:32 +0000139func (p *Probe) UpdateStatus(ctx context.Context, name string, status ServiceStatus) {
Scott Baker2c1c4822019-10-16 11:02:41 -0700140 p.mutex.Lock()
141 defer p.mutex.Unlock()
142 if p.status == nil {
143 p.status = make(map[string]ServiceStatus)
144 }
Scott Baker104b67d2019-10-29 15:56:27 -0700145
146 // if status hasn't changed, avoid doing useless work
147 existingStatus, ok := p.status[name]
148 if ok && (existingStatus == status) {
149 return
150 }
151
Scott Baker2c1c4822019-10-16 11:02:41 -0700152 p.status[name] = status
153 if p.readyFunc != nil {
154 p.isReady = p.readyFunc(p.status)
155 } else {
156 p.isReady = defaultReadyFunc(p.status)
157 }
158
159 if p.healthFunc != nil {
160 p.isHealthy = p.healthFunc(p.status)
161 } else {
162 p.isHealthy = defaultHealthFunc(p.status)
163 }
Neha Sharma3c425fb2020-06-08 16:42:32 +0000164 logger.Debugw(ctx, "probe-service-status-updated",
Scott Baker2c1c4822019-10-16 11:02:41 -0700165 log.Fields{
166 "service-name": name,
167 "status": status.String(),
168 "ready": p.isReady,
169 "health": p.isHealthy,
170 })
171}
172
Scott Baker104b67d2019-10-29 15:56:27 -0700173func (p *Probe) GetStatus(name string) ServiceStatus {
174 p.mutex.Lock()
175 defer p.mutex.Unlock()
176
177 if p.status == nil {
178 p.status = make(map[string]ServiceStatus)
179 }
180
181 currentStatus, ok := p.status[name]
182 if ok {
183 return currentStatus
184 }
185
186 return ServiceStatusUnknown
187}
188
189func GetProbeFromContext(ctx context.Context) *Probe {
190 if ctx != nil {
191 if value := ctx.Value(ProbeContextKey); value != nil {
192 if p, ok := value.(*Probe); ok {
193 return p
194 }
195 }
196 }
197 return nil
198}
199
Scott Baker2c1c4822019-10-16 11:02:41 -0700200// UpdateStatusFromContext a convenience function to pull the Probe reference from the
201// Context, if it exists, and then calling UpdateStatus on that Probe reference. If Context
202// is nil or if a Probe reference is not associated with the ProbeContextKey then nothing
203// happens
204func UpdateStatusFromContext(ctx context.Context, name string, status ServiceStatus) {
Scott Baker104b67d2019-10-29 15:56:27 -0700205 p := GetProbeFromContext(ctx)
206 if p != nil {
Neha Sharma3c425fb2020-06-08 16:42:32 +0000207 p.UpdateStatus(ctx, name, status)
Scott Baker2c1c4822019-10-16 11:02:41 -0700208 }
209}
210
211// pulled out to a function to help better enable unit testing
212func (p *Probe) readzFunc(w http.ResponseWriter, req *http.Request) {
213 p.mutex.RLock()
214 defer p.mutex.RUnlock()
215 if p.isReady {
216 w.WriteHeader(http.StatusOK)
217 } else {
218 w.WriteHeader(http.StatusTeapot)
219 }
220}
221func (p *Probe) healthzFunc(w http.ResponseWriter, req *http.Request) {
222 p.mutex.RLock()
223 defer p.mutex.RUnlock()
224 if p.isHealthy {
225 w.WriteHeader(http.StatusOK)
226 } else {
227 w.WriteHeader(http.StatusTeapot)
228 }
229}
230func (p *Probe) detailzFunc(w http.ResponseWriter, req *http.Request) {
Neha Sharma3c425fb2020-06-08 16:42:32 +0000231 ctx := context.Background()
Scott Baker2c1c4822019-10-16 11:02:41 -0700232 p.mutex.RLock()
233 defer p.mutex.RUnlock()
234 w.Header().Set("Content-Type", "application/json")
David K. Bainbridge7c75cac2020-02-19 08:53:46 -0800235 if _, err := w.Write([]byte("{")); err != nil {
Neha Sharma3c425fb2020-06-08 16:42:32 +0000236 logger.Errorw(ctx, "write-response", log.Fields{"error": err})
David K. Bainbridge7c75cac2020-02-19 08:53:46 -0800237 w.WriteHeader(http.StatusInternalServerError)
238 return
239 }
Scott Baker2c1c4822019-10-16 11:02:41 -0700240 comma := ""
241 for c, s := range p.status {
David K. Bainbridge7c75cac2020-02-19 08:53:46 -0800242 if _, err := w.Write([]byte(fmt.Sprintf("%s\"%s\": \"%s\"", comma, c, s.String()))); err != nil {
Neha Sharma3c425fb2020-06-08 16:42:32 +0000243 logger.Errorw(ctx, "write-response", log.Fields{"error": err})
David K. Bainbridge7c75cac2020-02-19 08:53:46 -0800244 w.WriteHeader(http.StatusInternalServerError)
245 return
246 }
Scott Baker2c1c4822019-10-16 11:02:41 -0700247 comma = ", "
248 }
David K. Bainbridge7c75cac2020-02-19 08:53:46 -0800249 if _, err := w.Write([]byte("}")); err != nil {
Neha Sharma3c425fb2020-06-08 16:42:32 +0000250 logger.Errorw(ctx, "write-response", log.Fields{"error": err})
David K. Bainbridge7c75cac2020-02-19 08:53:46 -0800251 w.WriteHeader(http.StatusInternalServerError)
252 return
253 }
Scott Baker2c1c4822019-10-16 11:02:41 -0700254 w.WriteHeader(http.StatusOK)
Scott Baker2c1c4822019-10-16 11:02:41 -0700255}
256
257// ListenAndServe implements 3 HTTP endpoints on the given port for healthz, readz, and detailz. Returns only on error
Neha Sharma3c425fb2020-06-08 16:42:32 +0000258func (p *Probe) ListenAndServe(ctx context.Context, address string) {
Scott Baker2c1c4822019-10-16 11:02:41 -0700259 mux := http.NewServeMux()
260
261 // Returns the result of the readyFunc calculation
262 mux.HandleFunc("/readz", p.readzFunc)
263
264 // Returns the result of the healthFunc calculation
265 mux.HandleFunc("/healthz", p.healthzFunc)
266
267 // Returns the details of the services and their status as JSON
268 mux.HandleFunc("/detailz", p.detailzFunc)
269 s := &http.Server{
270 Addr: address,
271 Handler: mux,
272 }
Neha Sharma3c425fb2020-06-08 16:42:32 +0000273 logger.Fatal(ctx, s.ListenAndServe())
Scott Baker2c1c4822019-10-16 11:02:41 -0700274}
275
Scott Baker104b67d2019-10-29 15:56:27 -0700276func (p *Probe) IsReady() bool {
277 return p.isReady
278}
279
Scott Baker2c1c4822019-10-16 11:02:41 -0700280// defaultReadyFunc if all services are running then ready, else not
281func defaultReadyFunc(services map[string]ServiceStatus) bool {
282 if len(services) == 0 {
283 return false
284 }
285 for _, status := range services {
286 if status != ServiceStatusRunning {
287 return false
288 }
289 }
290 return true
291}
292
293// defaultHealthFunc if no service is stopped or failed, then healthy, else not.
294// service is start as unknown, so they are considered healthy
295func defaultHealthFunc(services map[string]ServiceStatus) bool {
296 if len(services) == 0 {
297 return false
298 }
299 for _, status := range services {
300 if status == ServiceStatusStopped || status == ServiceStatusFailed {
301 return false
302 }
303 }
304 return true
305}