VOL-1723 - add readiness probe capability to rw-core
Change-Id: I1cf42e88712586f140a2dfa9d0b638b48261caac
diff --git a/common/grpc/server.go b/common/grpc/server.go
index d5685f1..e3f3d99 100644
--- a/common/grpc/server.go
+++ b/common/grpc/server.go
@@ -118,7 +118,9 @@
Stop servicing GRPC requests
*/
func (s *GrpcServer) Stop() {
- s.gs.Stop()
+ if s.gs != nil {
+ s.gs.Stop()
+ }
}
/*
diff --git a/common/probe/probe.go b/common/probe/probe.go
new file mode 100644
index 0000000..984036c
--- /dev/null
+++ b/common/probe/probe.go
@@ -0,0 +1,241 @@
+/*
+ * Copyright 2019-present Open Networking Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package probe
+
+import (
+ "context"
+ "fmt"
+ "github.com/opencord/voltha-go/common/log"
+ "net/http"
+ "sync"
+)
+
+// ProbeContextKey used to fetch the Probe instance from a context
+type ProbeContextKeyType string
+
+// ServiceStatus typed values for service status
+type ServiceStatus int
+
+const (
+ // ServiceStatusUnknown initial state of services
+ ServiceStatusUnknown ServiceStatus = iota
+
+ // ServiceStatusPreparing to optionally be used for prep, such as connecting
+ ServiceStatusPreparing
+
+ // ServiceStatusPrepared to optionally be used when prep is complete, but before run
+ ServiceStatusPrepared
+
+ // ServiceStatusRunning service is functional
+ ServiceStatusRunning
+
+ // ServiceStatusStopped service has stopped, but not because of error
+ ServiceStatusStopped
+
+ // ServiceStatusFailed service has stopped because of an error
+ ServiceStatusFailed
+)
+
+const (
+ // ProbeContextKey value of context key to fetch probe
+ ProbeContextKey = ProbeContextKeyType("status-update-probe")
+)
+
+// String convert ServiceStatus values to strings
+func (s ServiceStatus) String() string {
+ switch s {
+ default:
+ fallthrough
+ case ServiceStatusUnknown:
+ return "Unknown"
+ case ServiceStatusPreparing:
+ return "Preparing"
+ case ServiceStatusPrepared:
+ return "Prepared"
+ case ServiceStatusRunning:
+ return "Running"
+ case ServiceStatusStopped:
+ return "Stopped"
+ case ServiceStatusFailed:
+ return "Failed"
+ }
+}
+
+// ServiceStatusUpdate status update event
+type ServiceStatusUpdate struct {
+ Name string
+ Status ServiceStatus
+}
+
+// Probe reciever on which to implement probe capabilities
+type Probe struct {
+ readyFunc func(map[string]ServiceStatus) bool
+ healthFunc func(map[string]ServiceStatus) bool
+
+ mutex sync.RWMutex
+ status map[string]ServiceStatus
+ isReady bool
+ isHealthy bool
+}
+
+// WithReadyFunc override the default ready calculation function
+func (p *Probe) WithReadyFunc(readyFunc func(map[string]ServiceStatus) bool) *Probe {
+ p.readyFunc = readyFunc
+ return p
+}
+
+// WithHealthFunc override the default health calculation function
+func (p *Probe) WithHealthFunc(healthFunc func(map[string]ServiceStatus) bool) *Probe {
+ p.healthFunc = healthFunc
+ return p
+}
+
+// RegisterService register one or more service names with the probe, status will be track against service name
+func (p *Probe) RegisterService(names ...string) {
+ p.mutex.Lock()
+ defer p.mutex.Unlock()
+ if p.status == nil {
+ p.status = make(map[string]ServiceStatus)
+ }
+ for _, name := range names {
+ if _, ok := p.status[name]; !ok {
+ p.status[name] = ServiceStatusUnknown
+ log.Debugw("probe-service-registered", log.Fields{"service-name": name})
+ }
+ }
+}
+
+// UpdateStatus utility function to send a service update to the probe
+func (p *Probe) UpdateStatus(name string, status ServiceStatus) {
+ p.mutex.Lock()
+ defer p.mutex.Unlock()
+ if p.status == nil {
+ p.status = make(map[string]ServiceStatus)
+ }
+ p.status[name] = status
+ if p.readyFunc != nil {
+ p.isReady = p.readyFunc(p.status)
+ } else {
+ p.isReady = defaultReadyFunc(p.status)
+ }
+
+ if p.healthFunc != nil {
+ p.isHealthy = p.healthFunc(p.status)
+ } else {
+ p.isHealthy = defaultHealthFunc(p.status)
+ }
+ log.Debugw("probe-service-status-updated",
+ log.Fields{
+ "service-name": name,
+ "status": status.String(),
+ "ready": p.isReady,
+ "health": p.isHealthy,
+ })
+}
+
+// UpdateStatusFromContext a convenience function to pull the Probe reference from the
+// Context, if it exists, and then calling UpdateStatus on that Probe reference. If Context
+// is nil or if a Probe reference is not associated with the ProbeContextKey then nothing
+// happens
+func UpdateStatusFromContext(ctx context.Context, name string, status ServiceStatus) {
+ if ctx != nil {
+ if value := ctx.Value(ProbeContextKey); value != nil {
+ if p, ok := value.(*Probe); ok {
+ p.UpdateStatus(name, status)
+ }
+ }
+ }
+}
+
+// pulled out to a function to help better enable unit testing
+func (p *Probe) readzFunc(w http.ResponseWriter, req *http.Request) {
+ p.mutex.RLock()
+ defer p.mutex.RUnlock()
+ if p.isReady {
+ w.WriteHeader(http.StatusOK)
+ } else {
+ w.WriteHeader(http.StatusTeapot)
+ }
+}
+func (p *Probe) healthzFunc(w http.ResponseWriter, req *http.Request) {
+ p.mutex.RLock()
+ defer p.mutex.RUnlock()
+ if p.isHealthy {
+ w.WriteHeader(http.StatusOK)
+ } else {
+ w.WriteHeader(http.StatusTeapot)
+ }
+}
+func (p *Probe) detailzFunc(w http.ResponseWriter, req *http.Request) {
+ p.mutex.RLock()
+ defer p.mutex.RUnlock()
+ w.Header().Set("Content-Type", "application/json")
+ w.Write([]byte("{"))
+ comma := ""
+ for c, s := range p.status {
+ w.Write([]byte(fmt.Sprintf("%s\"%s\": \"%s\"", comma, c, s.String())))
+ comma = ", "
+ }
+ w.Write([]byte("}"))
+ w.WriteHeader(http.StatusOK)
+
+}
+
+// ListenAndServe implements 3 HTTP endpoints on the given port for healthz, readz, and detailz. Returns only on error
+func (p *Probe) ListenAndServe(port int) {
+ mux := http.NewServeMux()
+
+ // Returns the result of the readyFunc calculation
+ mux.HandleFunc("/readz", p.readzFunc)
+
+ // Returns the result of the healthFunc calculation
+ mux.HandleFunc("/healthz", p.healthzFunc)
+
+ // Returns the details of the services and their status as JSON
+ mux.HandleFunc("/detailz", p.detailzFunc)
+ s := &http.Server{
+ Addr: fmt.Sprintf(":%d", port),
+ Handler: mux,
+ }
+ log.Fatal(s.ListenAndServe())
+}
+
+// defaultReadyFunc if all services are running then ready, else not
+func defaultReadyFunc(services map[string]ServiceStatus) bool {
+ if len(services) == 0 {
+ return false
+ }
+ for _, status := range services {
+ if status != ServiceStatusRunning {
+ return false
+ }
+ }
+ return true
+}
+
+// defaultHealthFunc if no service is stopped or failed, then healthy, else not.
+// service is start as unknown, so they are considered healthy
+func defaultHealthFunc(services map[string]ServiceStatus) bool {
+ if len(services) == 0 {
+ return false
+ }
+ for _, status := range services {
+ if status == ServiceStatusStopped || status == ServiceStatusFailed {
+ return false
+ }
+ }
+ return true
+}
diff --git a/common/probe/probe_test.go b/common/probe/probe_test.go
new file mode 100644
index 0000000..18c4835
--- /dev/null
+++ b/common/probe/probe_test.go
@@ -0,0 +1,372 @@
+/*
+ * Copyright 2019-present Open Networking Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package probe
+
+import (
+ "context"
+ "encoding/json"
+ "github.com/opencord/voltha-go/common/log"
+ "github.com/stretchr/testify/assert"
+ "io/ioutil"
+ "net/http"
+ "net/http/httptest"
+ "testing"
+)
+
+func init() {
+ log.AddPackage(log.JSON, log.WarnLevel, nil)
+}
+
+func TestServiceStatusString(t *testing.T) {
+ assert.Equal(t, "Unknown", ServiceStatusUnknown.String(), "ServiceStatusUnknown")
+ assert.Equal(t, "Preparing", ServiceStatusPreparing.String(), "ServiceStatusPreparing")
+ assert.Equal(t, "Prepared", ServiceStatusPrepared.String(), "ServiceStatusPrepared")
+ assert.Equal(t, "Running", ServiceStatusRunning.String(), "ServiceStatusRunning")
+ assert.Equal(t, "Stopped", ServiceStatusStopped.String(), "ServiceStatusStopped")
+ assert.Equal(t, "Failed", ServiceStatusFailed.String(), "ServiceStatusFailed")
+}
+
+func AlwaysTrue(map[string]ServiceStatus) bool {
+ return true
+}
+
+func AlwaysFalse(map[string]ServiceStatus) bool {
+ return false
+}
+
+func TestWithFuncs(t *testing.T) {
+ p := (&Probe{}).WithReadyFunc(AlwaysTrue).WithHealthFunc(AlwaysFalse)
+
+ assert.NotNil(t, p.readyFunc, "ready func not set")
+ assert.True(t, p.readyFunc(nil), "ready func not set correctly")
+ assert.NotNil(t, p.healthFunc, "health func not set")
+ assert.False(t, p.healthFunc(nil), "health func not set correctly")
+}
+
+func TestWithReadyFuncOnly(t *testing.T) {
+ p := (&Probe{}).WithReadyFunc(AlwaysTrue)
+
+ assert.NotNil(t, p.readyFunc, "ready func not set")
+ assert.True(t, p.readyFunc(nil), "ready func not set correctly")
+ assert.Nil(t, p.healthFunc, "health func set")
+}
+
+func TestWithHealthFuncOnly(t *testing.T) {
+ p := (&Probe{}).WithHealthFunc(AlwaysTrue)
+
+ assert.Nil(t, p.readyFunc, "ready func set")
+ assert.NotNil(t, p.healthFunc, "health func not set")
+ assert.True(t, p.healthFunc(nil), "health func not set correctly")
+}
+
+func TestRegisterOneService(t *testing.T) {
+ p := &Probe{}
+
+ p.RegisterService("one")
+
+ assert.Equal(t, 1, len(p.status), "wrong number of services")
+
+ _, ok := p.status["one"]
+ assert.True(t, ok, "service not found")
+}
+
+func TestRegisterMultipleServices(t *testing.T) {
+ p := &Probe{}
+
+ p.RegisterService("one", "two", "three", "four")
+
+ assert.Equal(t, 4, len(p.status), "wrong number of services")
+
+ _, ok := p.status["one"]
+ assert.True(t, ok, "service one not found")
+ _, ok = p.status["two"]
+ assert.True(t, ok, "service two not found")
+ _, ok = p.status["three"]
+ assert.True(t, ok, "service three not found")
+ _, ok = p.status["four"]
+ assert.True(t, ok, "service four not found")
+}
+
+func TestRegisterMultipleServicesIncremental(t *testing.T) {
+ p := &Probe{}
+
+ p.RegisterService("one")
+ p.RegisterService("two")
+ p.RegisterService("three", "four")
+
+ assert.Equal(t, 4, len(p.status), "wrong number of services")
+
+ _, ok := p.status["one"]
+ assert.True(t, ok, "service one not found")
+ _, ok = p.status["two"]
+ assert.True(t, ok, "service two not found")
+ _, ok = p.status["three"]
+ assert.True(t, ok, "service three not found")
+ _, ok = p.status["four"]
+ assert.True(t, ok, "service four not found")
+}
+
+func TestRegisterMultipleServicesDuplicates(t *testing.T) {
+ p := &Probe{}
+
+ p.RegisterService("one", "one", "one", "two")
+
+ assert.Equal(t, 2, len(p.status), "wrong number of services")
+
+ _, ok := p.status["one"]
+ assert.True(t, ok, "service one not found")
+ _, ok = p.status["two"]
+ assert.True(t, ok, "service two not found")
+}
+
+func TestRegisterMultipleServicesDuplicatesIncremental(t *testing.T) {
+ p := &Probe{}
+
+ p.RegisterService("one")
+ p.RegisterService("one")
+ p.RegisterService("one", "two")
+
+ assert.Equal(t, 2, len(p.status), "wrong number of services")
+
+ _, ok := p.status["one"]
+ assert.True(t, ok, "service one not found")
+ _, ok = p.status["two"]
+ assert.True(t, ok, "service two not found")
+}
+
+func TestUpdateStatus(t *testing.T) {
+ p := &Probe{}
+
+ p.RegisterService("one", "two")
+ p.UpdateStatus("one", ServiceStatusRunning)
+
+ assert.Equal(t, ServiceStatusRunning, p.status["one"], "status not set")
+ assert.Equal(t, ServiceStatusUnknown, p.status["two"], "status set")
+}
+
+func TestRegisterOverwriteStatus(t *testing.T) {
+ p := &Probe{}
+
+ p.RegisterService("one", "two")
+ p.UpdateStatus("one", ServiceStatusRunning)
+
+ assert.Equal(t, ServiceStatusRunning, p.status["one"], "status not set")
+ assert.Equal(t, ServiceStatusUnknown, p.status["two"], "status set")
+
+ p.RegisterService("one", "three")
+ assert.Equal(t, 3, len(p.status), "wrong number of services")
+ assert.Equal(t, ServiceStatusRunning, p.status["one"], "status overridden")
+ assert.Equal(t, ServiceStatusUnknown, p.status["two"], "status set")
+ assert.Equal(t, ServiceStatusUnknown, p.status["three"], "status set")
+}
+
+func TestDetailzWithServies(t *testing.T) {
+ p := (&Probe{}).WithReadyFunc(AlwaysTrue).WithHealthFunc(AlwaysTrue)
+ p.RegisterService("one", "two")
+
+ req := httptest.NewRequest("GET", "http://example.com/detailz", nil)
+ w := httptest.NewRecorder()
+ p.detailzFunc(w, req)
+ resp := w.Result()
+ body, _ := ioutil.ReadAll(resp.Body)
+
+ assert.Equal(t, http.StatusOK, resp.StatusCode, "invalid status code for no services")
+ assert.Equal(t, "application/json", resp.Header.Get("Content-Type"), "wrong content type")
+ var vals map[string]string
+ err := json.Unmarshal(body, &vals)
+ assert.Nil(t, err, "unable to unmarshal values")
+ assert.Equal(t, "Unknown", vals["one"], "wrong value")
+ assert.Equal(t, "Unknown", vals["two"], "wrong value")
+}
+
+func TestReadzNoServices(t *testing.T) {
+ p := (&Probe{}).WithReadyFunc(AlwaysTrue)
+ req := httptest.NewRequest("GET", "http://example.com/readz", nil)
+ w := httptest.NewRecorder()
+ p.readzFunc(w, req)
+ resp := w.Result()
+
+ assert.Equal(t, http.StatusTeapot, resp.StatusCode, "invalid status code for no services")
+}
+
+func TestReadzWithServices(t *testing.T) {
+ p := (&Probe{}).WithReadyFunc(AlwaysTrue).WithHealthFunc(AlwaysTrue)
+ p.RegisterService("one", "two")
+
+ req := httptest.NewRequest("GET", "http://example.com/readz", nil)
+ w := httptest.NewRecorder()
+ p.readzFunc(w, req)
+ resp := w.Result()
+ assert.Equal(t, http.StatusTeapot, resp.StatusCode, "invalid status code for no services")
+}
+
+func TestReadzNpServicesDefault(t *testing.T) {
+ p := &Probe{}
+
+ req := httptest.NewRequest("GET", "http://example.com/readz", nil)
+ w := httptest.NewRecorder()
+ p.readzFunc(w, req)
+ resp := w.Result()
+ assert.Equal(t, http.StatusTeapot, resp.StatusCode, "invalid status code")
+}
+
+func TestReadzWithServicesDefault(t *testing.T) {
+ p := &Probe{}
+ p.RegisterService("one", "two")
+ p.UpdateStatus("one", ServiceStatusRunning)
+ p.UpdateStatus("two", ServiceStatusRunning)
+
+ req := httptest.NewRequest("GET", "http://example.com/readz", nil)
+ w := httptest.NewRecorder()
+ p.readzFunc(w, req)
+ resp := w.Result()
+ assert.Equal(t, http.StatusOK, resp.StatusCode, "invalid status code")
+}
+
+func TestReadzWithServicesDefaultOne(t *testing.T) {
+ p := &Probe{}
+ p.RegisterService("one", "two")
+ p.UpdateStatus("one", ServiceStatusRunning)
+
+ req := httptest.NewRequest("GET", "http://example.com/readz", nil)
+ w := httptest.NewRecorder()
+ p.readzFunc(w, req)
+ resp := w.Result()
+ assert.Equal(t, http.StatusTeapot, resp.StatusCode, "invalid status code")
+}
+
+func TestHealthzNoServices(t *testing.T) {
+ p := (&Probe{}).WithReadyFunc(AlwaysTrue)
+ req := httptest.NewRequest("GET", "http://example.com/healthz", nil)
+ w := httptest.NewRecorder()
+ p.healthzFunc(w, req)
+ resp := w.Result()
+
+ assert.Equal(t, http.StatusTeapot, resp.StatusCode, "invalid status code for no services")
+}
+
+func TestHealthzWithServices(t *testing.T) {
+ p := (&Probe{}).WithReadyFunc(AlwaysTrue).WithHealthFunc(AlwaysTrue)
+ p.RegisterService("one", "two")
+
+ req := httptest.NewRequest("GET", "http://example.com/healthz", nil)
+ w := httptest.NewRecorder()
+ p.healthzFunc(w, req)
+ resp := w.Result()
+ assert.Equal(t, http.StatusTeapot, resp.StatusCode, "invalid status code for no services")
+}
+
+func TestHealthzNoServicesDefault(t *testing.T) {
+ p := &Probe{}
+
+ req := httptest.NewRequest("GET", "http://example.com/healthz", nil)
+ w := httptest.NewRecorder()
+ p.healthzFunc(w, req)
+ resp := w.Result()
+ assert.Equal(t, http.StatusTeapot, resp.StatusCode, "invalid status code")
+}
+
+func TestHealthzWithServicesDefault(t *testing.T) {
+ p := &Probe{}
+ p.RegisterService("one", "two")
+ p.UpdateStatus("one", ServiceStatusRunning)
+ p.UpdateStatus("two", ServiceStatusRunning)
+
+ req := httptest.NewRequest("GET", "http://example.com/healthz", nil)
+ w := httptest.NewRecorder()
+ p.healthzFunc(w, req)
+ resp := w.Result()
+ assert.Equal(t, http.StatusOK, resp.StatusCode, "invalid status code")
+}
+
+func TestHealthzWithServicesDefaultFailed(t *testing.T) {
+ p := &Probe{}
+ p.RegisterService("one", "two")
+ p.UpdateStatus("one", ServiceStatusFailed)
+
+ req := httptest.NewRequest("GET", "http://example.com/healthz", nil)
+ w := httptest.NewRecorder()
+ p.healthzFunc(w, req)
+ resp := w.Result()
+ assert.Equal(t, http.StatusTeapot, resp.StatusCode, "invalid status code")
+}
+
+func TestSetFuncsToNil(t *testing.T) {
+ p := (&Probe{}).WithReadyFunc(AlwaysTrue).WithHealthFunc(AlwaysFalse)
+ p.WithReadyFunc(nil).WithHealthFunc(nil)
+ assert.Nil(t, p.readyFunc, "ready func not reset to nil")
+ assert.Nil(t, p.healthFunc, "health func not reset to nil")
+}
+
+func TestUpdateStatusFromContext(t *testing.T) {
+ p := &Probe{}
+ p.RegisterService("one")
+ ctx := context.WithValue(context.Background(), ProbeContextKey, p)
+ UpdateStatusFromContext(ctx, "one", ServiceStatusRunning)
+
+ assert.Equal(t, 1, len(p.status), "wrong number of services")
+ _, ok := p.status["one"]
+ assert.True(t, ok, "unable to find registered service")
+ assert.Equal(t, ServiceStatusRunning, p.status["one"], "status not set correctly from context")
+
+}
+
+func TestUpdateStatusFromNilContext(t *testing.T) {
+ p := &Probe{}
+ p.RegisterService("one")
+ UpdateStatusFromContext(nil, "one", ServiceStatusRunning)
+
+ assert.Equal(t, 1, len(p.status), "wrong number of services")
+ _, ok := p.status["one"]
+ assert.True(t, ok, "unable to find registered service")
+ assert.Equal(t, ServiceStatusUnknown, p.status["one"], "status not set correctly from context")
+
+}
+
+func TestUpdateStatusFromContextWithoutProbe(t *testing.T) {
+ p := &Probe{}
+ p.RegisterService("one")
+ ctx := context.Background()
+ UpdateStatusFromContext(ctx, "one", ServiceStatusRunning)
+
+ assert.Equal(t, 1, len(p.status), "wrong number of services")
+ _, ok := p.status["one"]
+ assert.True(t, ok, "unable to find registered service")
+ assert.Equal(t, ServiceStatusUnknown, p.status["one"], "status not set correctly from context")
+
+}
+
+func TestUpdateStatusFromContextWrongType(t *testing.T) {
+ p := &Probe{}
+ p.RegisterService("one")
+ ctx := context.WithValue(context.Background(), ProbeContextKey, "Teapot")
+ UpdateStatusFromContext(ctx, "one", ServiceStatusRunning)
+
+ assert.Equal(t, 1, len(p.status), "wrong number of services")
+ _, ok := p.status["one"]
+ assert.True(t, ok, "unable to find registered service")
+ assert.Equal(t, ServiceStatusUnknown, p.status["one"], "status not set correctly from context")
+}
+
+func TestUpdateStatusNoRegistered(t *testing.T) {
+ p := (&Probe{}).WithReadyFunc(AlwaysTrue).WithHealthFunc(AlwaysFalse)
+
+ p.UpdateStatus("one", ServiceStatusRunning)
+ assert.Equal(t, 1, len(p.status), "wrong number of services")
+ _, ok := p.status["one"]
+ assert.True(t, ok, "unable to find registered service")
+ assert.Equal(t, ServiceStatusRunning, p.status["one"], "status not set correctly from context")
+}