blob: 8a8e48509db0372573d793024761b06ca58bca11 [file] [log] [blame]
David K. Bainbridgeb4a9ab02019-09-20 15:12:16 -07001/*
2 * Copyright 2019-present Open Networking Foundation
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16package probe
17
18import (
19 "context"
20 "fmt"
21 "github.com/opencord/voltha-go/common/log"
22 "net/http"
23 "sync"
24)
25
26// ProbeContextKey used to fetch the Probe instance from a context
27type ProbeContextKeyType string
28
29// ServiceStatus typed values for service status
30type ServiceStatus int
31
32const (
33 // ServiceStatusUnknown initial state of services
34 ServiceStatusUnknown ServiceStatus = iota
35
36 // ServiceStatusPreparing to optionally be used for prep, such as connecting
37 ServiceStatusPreparing
38
39 // ServiceStatusPrepared to optionally be used when prep is complete, but before run
40 ServiceStatusPrepared
41
42 // ServiceStatusRunning service is functional
43 ServiceStatusRunning
44
45 // ServiceStatusStopped service has stopped, but not because of error
46 ServiceStatusStopped
47
48 // ServiceStatusFailed service has stopped because of an error
49 ServiceStatusFailed
50)
51
52const (
53 // ProbeContextKey value of context key to fetch probe
54 ProbeContextKey = ProbeContextKeyType("status-update-probe")
55)
56
57// String convert ServiceStatus values to strings
58func (s ServiceStatus) String() string {
59 switch s {
60 default:
61 fallthrough
62 case ServiceStatusUnknown:
63 return "Unknown"
64 case ServiceStatusPreparing:
65 return "Preparing"
66 case ServiceStatusPrepared:
67 return "Prepared"
68 case ServiceStatusRunning:
69 return "Running"
70 case ServiceStatusStopped:
71 return "Stopped"
72 case ServiceStatusFailed:
73 return "Failed"
74 }
75}
76
77// ServiceStatusUpdate status update event
78type ServiceStatusUpdate struct {
79 Name string
80 Status ServiceStatus
81}
82
83// Probe reciever on which to implement probe capabilities
84type Probe struct {
85 readyFunc func(map[string]ServiceStatus) bool
86 healthFunc func(map[string]ServiceStatus) bool
87
88 mutex sync.RWMutex
89 status map[string]ServiceStatus
90 isReady bool
91 isHealthy bool
92}
93
94// WithReadyFunc override the default ready calculation function
95func (p *Probe) WithReadyFunc(readyFunc func(map[string]ServiceStatus) bool) *Probe {
96 p.readyFunc = readyFunc
97 return p
98}
99
100// WithHealthFunc override the default health calculation function
101func (p *Probe) WithHealthFunc(healthFunc func(map[string]ServiceStatus) bool) *Probe {
102 p.healthFunc = healthFunc
103 return p
104}
105
106// RegisterService register one or more service names with the probe, status will be track against service name
107func (p *Probe) RegisterService(names ...string) {
108 p.mutex.Lock()
109 defer p.mutex.Unlock()
110 if p.status == nil {
111 p.status = make(map[string]ServiceStatus)
112 }
113 for _, name := range names {
114 if _, ok := p.status[name]; !ok {
115 p.status[name] = ServiceStatusUnknown
116 log.Debugw("probe-service-registered", log.Fields{"service-name": name})
117 }
118 }
David Bainbridgef794fc52019-10-03 22:37:12 +0000119
120 if p.readyFunc != nil {
121 p.isReady = p.readyFunc(p.status)
122 } else {
123 p.isReady = defaultReadyFunc(p.status)
124 }
125
126 if p.healthFunc != nil {
127 p.isHealthy = p.healthFunc(p.status)
128 } else {
129 p.isHealthy = defaultHealthFunc(p.status)
130 }
David K. Bainbridgeb4a9ab02019-09-20 15:12:16 -0700131}
132
133// UpdateStatus utility function to send a service update to the probe
134func (p *Probe) UpdateStatus(name string, status ServiceStatus) {
135 p.mutex.Lock()
136 defer p.mutex.Unlock()
137 if p.status == nil {
138 p.status = make(map[string]ServiceStatus)
139 }
140 p.status[name] = status
141 if p.readyFunc != nil {
142 p.isReady = p.readyFunc(p.status)
143 } else {
144 p.isReady = defaultReadyFunc(p.status)
145 }
146
147 if p.healthFunc != nil {
148 p.isHealthy = p.healthFunc(p.status)
149 } else {
150 p.isHealthy = defaultHealthFunc(p.status)
151 }
152 log.Debugw("probe-service-status-updated",
153 log.Fields{
154 "service-name": name,
155 "status": status.String(),
156 "ready": p.isReady,
157 "health": p.isHealthy,
158 })
159}
160
161// UpdateStatusFromContext a convenience function to pull the Probe reference from the
162// Context, if it exists, and then calling UpdateStatus on that Probe reference. If Context
163// is nil or if a Probe reference is not associated with the ProbeContextKey then nothing
164// happens
165func UpdateStatusFromContext(ctx context.Context, name string, status ServiceStatus) {
166 if ctx != nil {
167 if value := ctx.Value(ProbeContextKey); value != nil {
168 if p, ok := value.(*Probe); ok {
169 p.UpdateStatus(name, status)
170 }
171 }
172 }
173}
174
175// pulled out to a function to help better enable unit testing
176func (p *Probe) readzFunc(w http.ResponseWriter, req *http.Request) {
177 p.mutex.RLock()
178 defer p.mutex.RUnlock()
179 if p.isReady {
180 w.WriteHeader(http.StatusOK)
181 } else {
182 w.WriteHeader(http.StatusTeapot)
183 }
184}
185func (p *Probe) healthzFunc(w http.ResponseWriter, req *http.Request) {
186 p.mutex.RLock()
187 defer p.mutex.RUnlock()
188 if p.isHealthy {
189 w.WriteHeader(http.StatusOK)
190 } else {
191 w.WriteHeader(http.StatusTeapot)
192 }
193}
194func (p *Probe) detailzFunc(w http.ResponseWriter, req *http.Request) {
195 p.mutex.RLock()
196 defer p.mutex.RUnlock()
197 w.Header().Set("Content-Type", "application/json")
198 w.Write([]byte("{"))
199 comma := ""
200 for c, s := range p.status {
201 w.Write([]byte(fmt.Sprintf("%s\"%s\": \"%s\"", comma, c, s.String())))
202 comma = ", "
203 }
204 w.Write([]byte("}"))
205 w.WriteHeader(http.StatusOK)
206
207}
208
209// ListenAndServe implements 3 HTTP endpoints on the given port for healthz, readz, and detailz. Returns only on error
Kent Hagermanc4618832019-10-07 12:24:36 -0400210func (p *Probe) ListenAndServe(address string) {
David K. Bainbridgeb4a9ab02019-09-20 15:12:16 -0700211 mux := http.NewServeMux()
212
213 // Returns the result of the readyFunc calculation
214 mux.HandleFunc("/readz", p.readzFunc)
215
216 // Returns the result of the healthFunc calculation
217 mux.HandleFunc("/healthz", p.healthzFunc)
218
219 // Returns the details of the services and their status as JSON
220 mux.HandleFunc("/detailz", p.detailzFunc)
221 s := &http.Server{
Kent Hagermanc4618832019-10-07 12:24:36 -0400222 Addr: address,
David K. Bainbridgeb4a9ab02019-09-20 15:12:16 -0700223 Handler: mux,
224 }
225 log.Fatal(s.ListenAndServe())
226}
227
228// defaultReadyFunc if all services are running then ready, else not
229func defaultReadyFunc(services map[string]ServiceStatus) bool {
230 if len(services) == 0 {
231 return false
232 }
233 for _, status := range services {
234 if status != ServiceStatusRunning {
235 return false
236 }
237 }
238 return true
239}
240
241// defaultHealthFunc if no service is stopped or failed, then healthy, else not.
242// service is start as unknown, so they are considered healthy
243func defaultHealthFunc(services map[string]ServiceStatus) bool {
244 if len(services) == 0 {
245 return false
246 }
247 for _, status := range services {
248 if status == ServiceStatusStopped || status == ServiceStatusFailed {
249 return false
250 }
251 }
252 return true
253}