[VOL-2099] Modify backend to Monitor and report KVStore Liveness state
Added all Unit Tests for backend.go
Change-Id: If6a43a763f14a81f78968c4617702672c2ab7eac
diff --git a/go.mod b/go.mod
index 5549427..9d74066 100644
--- a/go.mod
+++ b/go.mod
@@ -38,6 +38,7 @@
golang.org/x/net v0.0.0-20190930134127-c5a3c61f89f3 // indirect
golang.org/x/sys v0.0.0-20191001151750-bb3f8db39f24 // indirect
golang.org/x/text v0.3.2 // indirect
+ google.golang.org/appengine v1.4.0 // indirect
google.golang.org/genproto v0.0.0-20190927181202-20e1ac93f88c // indirect
google.golang.org/grpc v1.24.0
gopkg.in/jcmturner/goidentity.v3 v3.0.0 // indirect
diff --git a/pkg/db/backend.go b/pkg/db/backend.go
index c319d99..b2547c2 100644
--- a/pkg/db/backend.go
+++ b/pkg/db/backend.go
@@ -17,23 +17,36 @@
package db
import (
+ "context"
"errors"
"fmt"
"github.com/opencord/voltha-lib-go/v2/pkg/db/kvstore"
"github.com/opencord/voltha-lib-go/v2/pkg/log"
+ "google.golang.org/grpc/codes"
+ "google.golang.org/grpc/status"
"strconv"
"sync"
+ "time"
+)
+
+const (
+ // Default Minimal Interval for posting alive state of backend kvstore on Liveness Channel
+ DefaultLivenessChannelInterval = time.Second * 30
)
// Backend structure holds details for accessing the kv store
type Backend struct {
sync.RWMutex
- Client kvstore.Client
- StoreType string
- Host string
- Port int
- Timeout int
- PathPrefix string
+ Client kvstore.Client
+ StoreType string
+ Host string
+ Port int
+ Timeout int
+ PathPrefix string
+ alive bool // Is this backend connection alive?
+ liveness chan bool // channel to post alive state
+ LivenessChannelInterval time.Duration // regularly push alive state beyond this interval
+ lastLivenessTime time.Time // Instant of last alive state push
}
// NewBackend creates a new instance of a Backend structure
@@ -41,11 +54,13 @@
var err error
b := &Backend{
- StoreType: storeType,
- Host: host,
- Port: port,
- Timeout: timeout,
- PathPrefix: pathPrefix,
+ StoreType: storeType,
+ Host: host,
+ Port: port,
+ Timeout: timeout,
+ LivenessChannelInterval: DefaultLivenessChannelInterval,
+ PathPrefix: pathPrefix,
+ alive: false, // connection considered down at start
}
address := host + ":" + strconv.Itoa(port)
@@ -76,6 +91,100 @@
return path
}
+func (b *Backend) updateLiveness(alive bool) {
+ // Periodically push stream of liveness data to the channel,
+ // so that in a live state, the core does not timeout and
+ // send a forced liveness message. Push alive state if the
+ // last push to channel was beyond livenessChannelInterval
+ if b.liveness != nil {
+
+ if b.alive != alive {
+ log.Debug("update-liveness-channel-reason-change")
+ b.liveness <- alive
+ b.lastLivenessTime = time.Now()
+ } else if time.Now().Sub(b.lastLivenessTime) > b.LivenessChannelInterval {
+ log.Debug("update-liveness-channel-reason-interval")
+ b.liveness <- alive
+ b.lastLivenessTime = time.Now()
+ }
+ }
+
+ // Emit log message only for alive state change
+ if b.alive != alive {
+ log.Debugw("change-kvstore-alive-status", log.Fields{"alive": alive})
+ b.alive = alive
+ }
+}
+
+// Perform a dummy Key Lookup on kvstore to test Connection Liveness and
+// post on Liveness channel
+func (b *Backend) PerformLivenessCheck(timeout int) bool {
+ alive := b.Client.IsConnectionUp(timeout)
+ log.Debugw("kvstore-liveness-check-result", log.Fields{"alive": alive})
+
+ b.updateLiveness(alive)
+ return alive
+}
+
+// Enable the liveness monitor channel. This channel will report
+// a "true" or "false" on every kvstore operation which indicates whether
+// or not the connection is still Live. This channel is then picked up
+// by the service (i.e. rw_core / ro_core) to update readiness status
+// and/or take other actions.
+func (b *Backend) EnableLivenessChannel() chan bool {
+ log.Debug("enable-kvstore-liveness-channel")
+
+ if b.liveness == nil {
+ log.Debug("create-kvstore-liveness-channel")
+
+ // Channel size of 10 to avoid any possibility of blocking in Load conditions
+ b.liveness = make(chan bool, 10)
+
+ // Post initial alive state
+ b.liveness <- b.alive
+ b.lastLivenessTime = time.Now()
+ }
+
+ return b.liveness
+}
+
+// Extract Alive status of Kvstore based on type of error
+func (b *Backend) isErrorIndicatingAliveKvstore(err error) bool {
+ // Alive unless observed an error indicating so
+ alive := true
+
+ if err != nil {
+
+ // timeout indicates kvstore not reachable/alive
+ if err == context.DeadlineExceeded {
+ alive = false
+ }
+
+ // Need to analyze client-specific errors based on backend type
+ if b.StoreType == "etcd" {
+
+ // For etcd backend, consider not-alive only for errors indicating
+ // timedout request or unavailable/corrupted cluster. For all remaining
+ // error codes listed in https://godoc.org/google.golang.org/grpc/codes#Code,
+ // we would not infer a not-alive backend because such a error may also
+ // occur due to bad client requests or sequence of operations
+ switch status.Code(err) {
+ case codes.DeadlineExceeded:
+ fallthrough
+ case codes.Unavailable:
+ fallthrough
+ case codes.DataLoss:
+ alive = false
+ }
+
+ //} else {
+ // TODO: Implement for consul backend; would it be needed ever?
+ }
+ }
+
+ return alive
+}
+
// List retrieves one or more items that match the specified key
func (b *Backend) List(key string) (map[string]*kvstore.KVPair, error) {
b.Lock()
@@ -84,7 +193,11 @@
formattedPath := b.makePath(key)
log.Debugw("listing-key", log.Fields{"key": key, "path": formattedPath})
- return b.Client.List(formattedPath, b.Timeout)
+ pair, err := b.Client.List(formattedPath, b.Timeout)
+
+ b.updateLiveness(b.isErrorIndicatingAliveKvstore(err))
+
+ return pair, err
}
// Get retrieves an item that matches the specified key
@@ -95,7 +208,11 @@
formattedPath := b.makePath(key)
log.Debugw("getting-key", log.Fields{"key": key, "path": formattedPath})
- return b.Client.Get(formattedPath, b.Timeout)
+ pair, err := b.Client.Get(formattedPath, b.Timeout)
+
+ b.updateLiveness(b.isErrorIndicatingAliveKvstore(err))
+
+ return pair, err
}
// Put stores an item value under the specifed key
@@ -106,7 +223,11 @@
formattedPath := b.makePath(key)
log.Debugw("putting-key", log.Fields{"key": key, "value": string(value.([]byte)), "path": formattedPath})
- return b.Client.Put(formattedPath, value, b.Timeout)
+ err := b.Client.Put(formattedPath, value, b.Timeout)
+
+ b.updateLiveness(b.isErrorIndicatingAliveKvstore(err))
+
+ return err
}
// Delete removes an item under the specified key
@@ -117,7 +238,11 @@
formattedPath := b.makePath(key)
log.Debugw("deleting-key", log.Fields{"key": key, "path": formattedPath})
- return b.Client.Delete(formattedPath, b.Timeout)
+ err := b.Client.Delete(formattedPath, b.Timeout)
+
+ b.updateLiveness(b.isErrorIndicatingAliveKvstore(err))
+
+ return err
}
// CreateWatch starts watching events for the specified key
diff --git a/pkg/db/backend_test.go b/pkg/db/backend_test.go
new file mode 100644
index 0000000..3e1dce3
--- /dev/null
+++ b/pkg/db/backend_test.go
@@ -0,0 +1,371 @@
+/*
+ * Copyright 2019-present Open Networking Foundation
+
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+
+ * http://www.apache.org/licenses/LICENSE-2.0
+
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package db
+
+import (
+ "context"
+ "github.com/opencord/voltha-lib-go/v2/pkg/log"
+ "github.com/opencord/voltha-lib-go/v2/pkg/mocks"
+ "github.com/stretchr/testify/assert"
+ "google.golang.org/grpc/codes"
+ "google.golang.org/grpc/status"
+ "os"
+ "testing"
+ "time"
+)
+
+func init() {
+ log.AddPackage(log.JSON, log.PanicLevel, nil)
+}
+
+const (
+ embedEtcdServerHost = "localhost"
+ embedEtcdServerPort = 2379
+ dummyEtcdServerPort = 12379
+ defaultTimeout = 1
+ defaultPathPrefix = "Prefix"
+)
+
+func TestMain(m *testing.M) {
+ etcdServer := mocks.StartEtcdServer(nil)
+
+ res := m.Run()
+
+ etcdServer.Stop()
+ os.Exit(res)
+}
+
+func provisionBackendWithEmbeddedEtcdServer(t *testing.T) *Backend {
+ backend := NewBackend("etcd", embedEtcdServerHost, embedEtcdServerPort, defaultTimeout, defaultPathPrefix)
+ assert.NotNil(t, backend)
+ assert.NotNil(t, backend.Client)
+ return backend
+}
+
+func provisionBackendWithDummyEtcdServer(t *testing.T) *Backend {
+ backend := NewBackend("etcd", embedEtcdServerHost, dummyEtcdServerPort, defaultTimeout, defaultPathPrefix)
+ assert.NotNil(t, backend)
+ assert.NotNil(t, backend.Client)
+ return backend
+}
+
+// Create instance using Etcd Kvstore
+func TestNewBackend_EtcdKvStore(t *testing.T) {
+ backend := NewBackend("etcd", embedEtcdServerHost, embedEtcdServerPort, defaultTimeout, defaultPathPrefix)
+
+ // Verify all attributes of backend have got set correctly
+ assert.NotNil(t, backend)
+ assert.NotNil(t, backend.Client)
+ assert.Equal(t, backend.StoreType, "etcd")
+ assert.Equal(t, backend.Host, embedEtcdServerHost)
+ assert.Equal(t, backend.Port, embedEtcdServerPort)
+ assert.Equal(t, backend.Timeout, defaultTimeout)
+ assert.Equal(t, backend.PathPrefix, defaultPathPrefix)
+ assert.Equal(t, backend.alive, false) // backend is not alive at start
+ assert.Nil(t, backend.liveness) // no liveness channel is created at start
+ assert.Equal(t, backend.LivenessChannelInterval, DefaultLivenessChannelInterval)
+}
+
+// Create instance using Consul Kvstore
+func TestNewBackend_ConsulKvStore(t *testing.T) {
+ backend := NewBackend("consul", embedEtcdServerHost, embedEtcdServerPort, defaultTimeout, defaultPathPrefix)
+
+ // Verify kvstore type attribute of backend has got set correctly
+ assert.NotNil(t, backend)
+ assert.NotNil(t, backend.Client)
+ assert.Equal(t, backend.StoreType, "consul")
+}
+
+// Create instance using Invalid Kvstore; instance creation should fail
+func TestNewBackend_InvalidKvstore(t *testing.T) {
+ backend := NewBackend("unknown", embedEtcdServerHost, embedEtcdServerPort, defaultTimeout, defaultPathPrefix)
+
+ assert.NotNil(t, backend)
+ assert.Nil(t, backend.Client)
+}
+
+func TestMakePath(t *testing.T) {
+ backend := provisionBackendWithEmbeddedEtcdServer(t)
+ path := backend.makePath("Suffix")
+ assert.Equal(t, defaultPathPrefix+"/Suffix", path)
+}
+
+// Liveness Check against Embedded Etcd Server should return alive state
+func TestPerformLivenessCheck_EmbeddedEtcdServer(t *testing.T) {
+ backend := provisionBackendWithEmbeddedEtcdServer(t)
+ alive := backend.PerformLivenessCheck(defaultTimeout)
+ assert.True(t, alive)
+}
+
+// Liveness Check against Dummy Etcd Server should return not-alive state
+func TestPerformLivenessCheck_DummyEtcdServer(t *testing.T) {
+ backend := provisionBackendWithDummyEtcdServer(t)
+ alive := backend.PerformLivenessCheck(defaultTimeout)
+ assert.False(t, alive)
+}
+
+// Enabling Liveness Channel before First Liveness Check
+func TestEnableLivenessChannel_EmbeddedEtcdServer_BeforeLivenessCheck(t *testing.T) {
+ backend := provisionBackendWithEmbeddedEtcdServer(t)
+
+ alive := backend.EnableLivenessChannel()
+ assert.NotNil(t, alive)
+ assert.Equal(t, 1, len(alive))
+ assert.Equal(t, false, <-alive)
+ assert.NotNil(t, backend.liveness)
+}
+
+// Enabling Liveness Channel after First Liveness Check
+func TestEnableLivenessChannel_EmbeddedEtcdServer_AfterLivenessCheck(t *testing.T) {
+ backend := provisionBackendWithEmbeddedEtcdServer(t)
+ backend.PerformLivenessCheck(defaultTimeout)
+
+ alive := backend.EnableLivenessChannel()
+ assert.NotNil(t, alive)
+ assert.Equal(t, 1, len(alive))
+ assert.Equal(t, true, <-alive)
+ assert.NotNil(t, backend.liveness)
+}
+
+// Update Liveness with alive status change
+func TestUpdateLiveness_AliveStatusChange(t *testing.T) {
+ backend := provisionBackendWithEmbeddedEtcdServer(t)
+ // Enable Liveness Channel and verify initial state is not-alive
+ aliveState := backend.EnableLivenessChannel()
+ assert.NotNil(t, aliveState)
+ assert.Equal(t, 1, len(backend.liveness))
+ assert.Equal(t, false, <-backend.liveness)
+ lastUpdateTime := backend.lastLivenessTime
+
+ // Update with changed alive state. Verify alive state push & liveness time update
+ backend.updateLiveness(true)
+ assert.Equal(t, 1, len(backend.liveness))
+ assert.Equal(t, true, <-backend.liveness)
+ assert.NotEqual(t, lastUpdateTime, backend.lastLivenessTime)
+}
+
+// Update Liveness with same alive status reporting
+func TestUpdateLiveness_AliveStatusUnchanged(t *testing.T) {
+ backend := provisionBackendWithEmbeddedEtcdServer(t)
+ // Enable Liveness Channel and verify initial state is not-alive
+ aliveState := backend.EnableLivenessChannel()
+ assert.NotNil(t, aliveState)
+ assert.Equal(t, false, <-backend.liveness)
+ lastUpdateTime := backend.lastLivenessTime
+
+ // Update with same alive state. Verify no further alive state push
+ backend.updateLiveness(false)
+ assert.Equal(t, 0, len(backend.liveness))
+ assert.Equal(t, lastUpdateTime, backend.lastLivenessTime)
+
+ // Now set lastUpdateTime 10 min back and push again
+ tenMinDuration, _ := time.ParseDuration("10m")
+ backend.lastLivenessTime = time.Now().Add(-tenMinDuration)
+ lastUpdateTime = backend.lastLivenessTime
+
+ backend.updateLiveness(false)
+ assert.Equal(t, 1, len(backend.liveness))
+ assert.Equal(t, false, <-backend.liveness)
+ assert.NotEqual(t, lastUpdateTime, backend.lastLivenessTime)
+}
+
+func TestIsErrorIndicatingAliveKvstore(t *testing.T) {
+ tests := []struct {
+ name string
+ arg error
+ want bool
+ }{
+ {"No Error", nil, true},
+ {"Request Canceled", context.Canceled, true},
+ {"Request Timeout", context.DeadlineExceeded, false},
+ {"Etcd Error - InvalidArgument", status.New(codes.InvalidArgument, "").Err(), true},
+ {"Etcd Error - DeadlineExceeded", status.New(codes.DeadlineExceeded, "").Err(), false},
+ {"Etcd Error - Unavailable", status.New(codes.Unavailable, "").Err(), false},
+ {"Etcd Error - DataLoss", status.New(codes.DataLoss, "").Err(), false},
+ {"Etcd Error - NotFound", status.New(codes.NotFound, "").Err(), true},
+ {"Etcd Error - PermissionDenied ", status.New(codes.PermissionDenied, "").Err(), true},
+ {"Etcd Error - FailedPrecondition ", status.New(codes.FailedPrecondition, "").Err(), true},
+ }
+
+ backend := provisionBackendWithEmbeddedEtcdServer(t)
+
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ if backend.isErrorIndicatingAliveKvstore(tt.arg) != tt.want {
+ t.Errorf("isErrorIndicatingAliveKvstore failed for %s: expected %t but got %t", tt.name, tt.want, !tt.want)
+ }
+ })
+ }
+}
+
+func TestPut_EmbeddedEtcdServer(t *testing.T) {
+ backend := provisionBackendWithEmbeddedEtcdServer(t)
+ err := backend.Put("key1", []uint8("value1"))
+ assert.Nil(t, err)
+
+ // Assert alive state has become true
+ assert.True(t, backend.alive)
+
+ // Assert that kvstore has this value stored
+ kvpair, err := backend.Get("key1")
+ assert.NotNil(t, kvpair)
+ assert.Equal(t, defaultPathPrefix+"/key1", kvpair.Key)
+ assert.Equal(t, []uint8("value1"), kvpair.Value)
+
+ // Assert that Put overrides the Value
+ err = backend.Put("key1", []uint8("value11"))
+ assert.Nil(t, err)
+ kvpair, err = backend.Get("key1")
+ assert.NotNil(t, kvpair)
+ assert.Equal(t, defaultPathPrefix+"/key1", kvpair.Key)
+ assert.Equal(t, []uint8("value11"), kvpair.Value)
+}
+
+// Put operation should fail against Dummy Non-existent Etcd Server
+func TestPut_DummyEtcdServer(t *testing.T) {
+ backend := provisionBackendWithDummyEtcdServer(t)
+ err := backend.Put("key1", []uint8("value1"))
+ assert.NotNil(t, err)
+
+ // Assert alive state is still false
+ assert.False(t, backend.alive)
+}
+
+// Test Get for existing and non-existing key
+func TestGet_EmbeddedEtcdServer(t *testing.T) {
+ backend := provisionBackendWithEmbeddedEtcdServer(t)
+ err := backend.Put("key2", []uint8("value2"))
+
+ // Assert alive state has become true
+ assert.True(t, backend.alive)
+
+ // Assert that kvstore has this key stored
+ kvpair, err := backend.Get("key2")
+ assert.NotNil(t, kvpair)
+ assert.Nil(t, err)
+ assert.Equal(t, defaultPathPrefix+"/key2", kvpair.Key)
+ assert.Equal(t, []uint8("value2"), kvpair.Value)
+
+ // Assert that Get works fine for absent key3
+ kvpair, err = backend.Get("key3")
+ assert.Nil(t, kvpair)
+ assert.Nil(t, err) // no error as lookup is successful
+}
+
+// Get operation should fail against Dummy Non-existent Etcd Server
+func TestGet_DummyEtcdServer(t *testing.T) {
+ backend := provisionBackendWithDummyEtcdServer(t)
+ kvpair, err := backend.Get("key2")
+ assert.NotNil(t, err)
+ assert.Nil(t, kvpair)
+
+ // Assert alive state is still false
+ assert.False(t, backend.alive)
+}
+
+// Test Delete for existing and non-existing key
+func TestDelete_EmbeddedEtcdServer(t *testing.T) {
+ backend := provisionBackendWithEmbeddedEtcdServer(t)
+ err := backend.Put("key3", []uint8("value3"))
+
+ // Assert alive state has become true
+ assert.True(t, backend.alive)
+
+ // Assert that kvstore has this key stored
+ kvpair, err := backend.Get("key3")
+ assert.NotNil(t, kvpair)
+
+ // Delete and Assert that key has been removed
+ err = backend.Delete("key3")
+ assert.Nil(t, err)
+ kvpair, err = backend.Get("key3")
+ assert.Nil(t, kvpair)
+
+ // Assert that Delete silently ignores absent key3
+ err = backend.Delete("key3")
+ assert.Nil(t, err)
+}
+
+// Delete operation should fail against Dummy Non-existent Etcd Server
+func TestDelete_DummyEtcdServer(t *testing.T) {
+ backend := provisionBackendWithDummyEtcdServer(t)
+ err := backend.Delete("key3")
+ assert.NotNil(t, err)
+
+ // Assert alive state is still false
+ assert.False(t, backend.alive)
+}
+
+// Test List for series of values under a key path
+func TestList_EmbeddedEtcdServer(t *testing.T) {
+ key41 := "key4/subkey1"
+ key42 := "key4/subkey2"
+
+ backend := provisionBackendWithEmbeddedEtcdServer(t)
+ backend.Put(key41, []uint8("value4-1"))
+ backend.Put(key42, []uint8("value4-2"))
+
+ // Assert alive state has become true
+ assert.True(t, backend.alive)
+
+ // Assert that Get does not retrieve these Subkeys
+ kvpair, err := backend.Get("key4")
+ assert.Nil(t, kvpair)
+ assert.Nil(t, err)
+
+ // Assert that List operation retrieves these Child Keys
+ kvmap, err := backend.List("key4")
+ assert.NotNil(t, kvmap)
+ assert.Nil(t, err)
+ assert.Equal(t, 2, len(kvmap))
+ fullkey41 := defaultPathPrefix + "/" + key41
+ fullkey42 := defaultPathPrefix + "/" + key42
+ assert.Equal(t, fullkey41, kvmap[fullkey41].Key)
+ assert.Equal(t, []uint8("value4-1"), kvmap[fullkey41].Value)
+ assert.Equal(t, fullkey42, kvmap[fullkey42].Key)
+ assert.Equal(t, []uint8("value4-2"), kvmap[fullkey42].Value)
+}
+
+// List operation should fail against Dummy Non-existent Etcd Server
+func TestList_DummyEtcdServer(t *testing.T) {
+ backend := provisionBackendWithDummyEtcdServer(t)
+ kvmap, err := backend.List("key4")
+ assert.Nil(t, kvmap)
+ assert.NotNil(t, err)
+
+ // Assert alive state is still false
+ assert.False(t, backend.alive)
+}
+
+// Test Create and Delete Watch for Embedded Etcd Server
+func TestCreateWatch_EmbeddedEtcdServer(t *testing.T) {
+ backend := provisionBackendWithEmbeddedEtcdServer(t)
+ eventChan := backend.CreateWatch("key5")
+ assert.NotNil(t, eventChan)
+ assert.Equal(t, 0, len(eventChan))
+
+ // Assert this method does not change alive state
+ assert.False(t, backend.alive)
+
+ // Put a value for watched key and event should appear
+ err := backend.Put("key5", []uint8("value5"))
+ assert.Nil(t, err)
+ time.Sleep(time.Millisecond * 100)
+ assert.Equal(t, 1, len(eventChan))
+
+ backend.DeleteWatch("key5", eventChan)
+}