VOL-2009[RO Core doesn't retry KV store connection on startup]

Change-Id: I01ed30d41d968f1bf9e052014eae420973d85266
diff --git a/ro_core/config/config.go b/ro_core/config/config.go
index eea6080..03dedf2 100644
--- a/ro_core/config/config.go
+++ b/ro_core/config/config.go
@@ -25,55 +25,61 @@
 
 // RO Core service default constants
 const (
-	ConsulStoreName               = "consul"
-	EtcdStoreName                 = "etcd"
-	default_InstanceID            = "rocore001"
-	default_GrpcPort              = 50057
-	default_GrpcHost              = ""
-	default_KVStoreType           = EtcdStoreName
-	default_KVStoreTimeout        = 5 //in seconds
-	default_KVStoreHost           = "127.0.0.1"
-	default_KVStorePort           = 2379 // Consul = 8500; Etcd = 2379
-	default_KVTxnKeyDelTime       = 60
-	default_LogLevel              = 0
-	default_Banner                = false
-	default_DisplayVersionOnly    = false
-	default_CoreTopic             = "rocore"
-	default_ROCoreEndpoint        = "rocore"
-	default_ROCoreKey             = "pki/voltha.key"
-	default_ROCoreCert            = "pki/voltha.crt"
-	default_ROCoreCA              = "pki/voltha-CA.pem"
-	default_Affinity_Router_Topic = "affinityRouter"
-	default_ProbeHost             = ""
-	default_ProbePort             = 8080
-	default_LiveProbeInterval     = 60 * time.Second
-	default_NotLiveProbeInterval  = 5 * time.Second // Probe more frequently to detect Recovery early
+	ConsulStoreName                 = "consul"
+	EtcdStoreName                   = "etcd"
+	default_InstanceID              = "rocore001"
+	default_GrpcPort                = 50057
+	default_GrpcHost                = ""
+	default_KVStoreType             = EtcdStoreName
+	default_KVStoreTimeout          = 5 //in seconds
+	default_KVStoreHost             = "127.0.0.1"
+	default_KVStorePort             = 2379 // Consul = 8500; Etcd = 2379
+	default_KVTxnKeyDelTime         = 60
+	default_LogLevel                = 0
+	default_Banner                  = false
+	default_DisplayVersionOnly      = false
+	default_CoreTopic               = "rocore"
+	default_ROCoreEndpoint          = "rocore"
+	default_ROCoreKey               = "pki/voltha.key"
+	default_ROCoreCert              = "pki/voltha.crt"
+	default_ROCoreCA                = "pki/voltha-CA.pem"
+	default_Affinity_Router_Topic   = "affinityRouter"
+	default_ProbeHost               = ""
+	default_ProbePort               = 8080
+	default_LiveProbeInterval       = 60 * time.Second
+	default_NotLiveProbeInterval    = 5 * time.Second // Probe more frequently to detect Recovery early
+	default_CoreTimeout             = 59 * time.Second
+	default_MaxConnectionRetries    = -1              // retries forever
+	default_ConnectionRetryInterval = 2 * time.Second // in seconds
 )
 
 // ROCoreFlags represents the set of configurations used by the read-only core service
 type ROCoreFlags struct {
 	// Command line parameters
-	InstanceID           string
-	ROCoreEndpoint       string
-	GrpcHost             string
-	GrpcPort             int
-	KVStoreType          string
-	KVStoreTimeout       int // in seconds
-	KVStoreHost          string
-	KVStorePort          int
-	KVTxnKeyDelTime      int
-	CoreTopic            string
-	LogLevel             int
-	Banner               bool
-	DisplayVersionOnly   bool
-	ROCoreKey            string
-	ROCoreCert           string
-	ROCoreCA             string
-	AffinityRouterTopic  string
-	ProbeHost            string
-	ProbePort            int
-	LiveProbeInterval    time.Duration
-	NotLiveProbeInterval time.Duration
+	InstanceID              string
+	ROCoreEndpoint          string
+	GrpcHost                string
+	GrpcPort                int
+	KVStoreType             string
+	KVStoreTimeout          int // in seconds
+	KVStoreHost             string
+	KVStorePort             int
+	KVTxnKeyDelTime         int
+	CoreTopic               string
+	LogLevel                int
+	Banner                  bool
+	DisplayVersionOnly      bool
+	ROCoreKey               string
+	ROCoreCert              string
+	ROCoreCA                string
+	AffinityRouterTopic     string
+	ProbeHost               string
+	ProbePort               int
+	LiveProbeInterval       time.Duration
+	NotLiveProbeInterval    time.Duration
+	CoreTimeout             time.Duration
+	MaxConnectionRetries    int
+	ConnectionRetryInterval time.Duration
 }
 
 func init() {
@@ -83,27 +89,30 @@
 // NewROCoreFlags returns a new ROCore config
 func NewROCoreFlags() *ROCoreFlags {
 	var roCoreFlag = ROCoreFlags{ // Default values
-		InstanceID:           default_InstanceID,
-		ROCoreEndpoint:       default_ROCoreEndpoint,
-		GrpcHost:             default_GrpcHost,
-		GrpcPort:             default_GrpcPort,
-		KVStoreType:          default_KVStoreType,
-		KVStoreTimeout:       default_KVStoreTimeout,
-		KVStoreHost:          default_KVStoreHost,
-		KVStorePort:          default_KVStorePort,
-		KVTxnKeyDelTime:      default_KVTxnKeyDelTime,
-		CoreTopic:            default_CoreTopic,
-		LogLevel:             default_LogLevel,
-		Banner:               default_Banner,
-		DisplayVersionOnly:   default_DisplayVersionOnly,
-		ROCoreKey:            default_ROCoreKey,
-		ROCoreCert:           default_ROCoreCert,
-		ROCoreCA:             default_ROCoreCA,
-		AffinityRouterTopic:  default_Affinity_Router_Topic,
-		ProbeHost:            default_ProbeHost,
-		ProbePort:            default_ProbePort,
-		LiveProbeInterval:    default_LiveProbeInterval,
-		NotLiveProbeInterval: default_NotLiveProbeInterval,
+		InstanceID:              default_InstanceID,
+		ROCoreEndpoint:          default_ROCoreEndpoint,
+		GrpcHost:                default_GrpcHost,
+		GrpcPort:                default_GrpcPort,
+		KVStoreType:             default_KVStoreType,
+		KVStoreTimeout:          default_KVStoreTimeout,
+		KVStoreHost:             default_KVStoreHost,
+		KVStorePort:             default_KVStorePort,
+		KVTxnKeyDelTime:         default_KVTxnKeyDelTime,
+		CoreTopic:               default_CoreTopic,
+		LogLevel:                default_LogLevel,
+		Banner:                  default_Banner,
+		DisplayVersionOnly:      default_DisplayVersionOnly,
+		ROCoreKey:               default_ROCoreKey,
+		ROCoreCert:              default_ROCoreCert,
+		ROCoreCA:                default_ROCoreCA,
+		AffinityRouterTopic:     default_Affinity_Router_Topic,
+		ProbeHost:               default_ProbeHost,
+		ProbePort:               default_ProbePort,
+		LiveProbeInterval:       default_LiveProbeInterval,
+		NotLiveProbeInterval:    default_NotLiveProbeInterval,
+		CoreTimeout:             default_CoreTimeout,
+		MaxConnectionRetries:    default_MaxConnectionRetries,
+		ConnectionRetryInterval: default_ConnectionRetryInterval,
 	}
 	return &roCoreFlag
 }
@@ -164,6 +173,15 @@
 	help = fmt.Sprintf("Time interval between liveness probes while in a not live state")
 	flag.DurationVar(&(cf.NotLiveProbeInterval), "not_live_probe_interval", default_NotLiveProbeInterval, help)
 
+	help = fmt.Sprintf("The maximum time the core will wait while attempting to connect to a dependent component duration")
+	flag.DurationVar(&(cf.CoreTimeout), "core_timeout", default_CoreTimeout, help)
+
+	help = fmt.Sprintf("The number of retries to connect to a dependent component")
+	flag.IntVar(&(cf.MaxConnectionRetries), "max_connection_retries", default_MaxConnectionRetries, help)
+
+	help = fmt.Sprintf("The duration between each connection retry attempt ")
+	flag.DurationVar(&(cf.ConnectionRetryInterval), "connection_retry_interval", default_ConnectionRetryInterval, help)
+
 	flag.Parse()
 
 	containerName := getContainerInfo()
diff --git a/ro_core/core/core.go b/ro_core/core/core.go
index d022266..797c05a 100644
--- a/ro_core/core/core.go
+++ b/ro_core/core/core.go
@@ -26,6 +26,8 @@
 	"github.com/opencord/voltha-lib-go/v2/pkg/probe"
 	"github.com/opencord/voltha-protos/v2/go/voltha"
 	"google.golang.org/grpc"
+	"google.golang.org/grpc/codes"
+	"google.golang.org/grpc/status"
 	"time"
 )
 
@@ -80,8 +82,48 @@
 	return &core
 }
 
+// waitUntilKVStoreReachableOrMaxTries will wait until it can connect to a KV store or until maxtries has been reached
+func (core *Core) waitUntilKVStoreReachableOrMaxTries(ctx context.Context, maxRetries int, retryInterval time.Duration) error {
+	log.Infow("verifying-KV-store-connectivity", log.Fields{"host": core.config.KVStoreHost,
+		"port": core.config.KVStorePort, "retries": maxRetries, "retryInterval": retryInterval})
+
+	// Get timeout in seconds with 1 second set as minimum
+	timeout := int(core.config.CoreTimeout.Seconds())
+	if timeout < 1 {
+		timeout = 1
+	}
+	count := 0
+	for {
+		if !core.kvClient.IsConnectionUp(timeout) {
+			log.Info("KV-store-unreachable")
+			if maxRetries != -1 {
+				if count >= maxRetries {
+					return status.Error(codes.Unavailable, "kv store unreachable")
+				}
+			}
+			count += 1
+			//      Take a nap before retrying
+			time.Sleep(retryInterval)
+			log.Infow("retry-KV-store-connectivity", log.Fields{"retryCount": count, "maxRetries": maxRetries, "retryInterval": retryInterval})
+
+		} else {
+			break
+		}
+	}
+	log.Info("KV-store-reachable")
+	return nil
+}
+
 func (core *Core) Start(ctx context.Context) {
 	log.Info("starting-adaptercore", log.Fields{"coreId": core.instanceId})
+
+	// Wait until connection to KV Store is up
+	if err := core.waitUntilKVStoreReachableOrMaxTries(ctx, core.config.MaxConnectionRetries, core.config.ConnectionRetryInterval); err != nil {
+		log.Fatal("Unable-to-connect-to-KV-store")
+	}
+
+	probe.UpdateStatusFromContext(ctx, "kv-store", probe.ServiceStatusRunning)
+
 	core.genericMgr = newModelProxyManager(core.clusterDataProxy)
 	core.deviceMgr = newDeviceManager(core.clusterDataProxy, core.instanceId)
 	core.logicalDeviceMgr = newLogicalDeviceManager(core.deviceMgr, core.clusterDataProxy)
diff --git a/ro_core/core/core_test.go b/ro_core/core/core_test.go
index 948e63b..88bd561 100644
--- a/ro_core/core/core_test.go
+++ b/ro_core/core/core_test.go
@@ -18,14 +18,15 @@
 import (
 	"context"
 	"errors"
+	"fmt"
 	"github.com/opencord/voltha-go/ro_core/config"
 	"github.com/opencord/voltha-lib-go/v2/pkg/db/kvstore"
 	grpcserver "github.com/opencord/voltha-lib-go/v2/pkg/grpc"
 	"github.com/opencord/voltha-lib-go/v2/pkg/log"
+	"github.com/opencord/voltha-lib-go/v2/pkg/mocks"
 	ic "github.com/opencord/voltha-protos/v2/go/inter_container"
 	"github.com/phayes/freeport"
 	"github.com/stretchr/testify/assert"
-	"strconv"
 	"testing"
 )
 
@@ -63,19 +64,31 @@
 
 func MakeTestNewCore() (*config.ROCoreFlags, *roCore) {
 
-	freePort, errP := freeport.GetFreePort()
-	if errP == nil {
-		freePortStr := strconv.Itoa(freePort)
+	clientPort, err := freeport.GetFreePort()
+	if err == nil {
+		peerPort, err := freeport.GetFreePort()
+		if err != nil {
+			log.Fatal(err)
+		}
+		etcdServer := mocks.StartEtcdServer(mocks.MKConfig("voltha.mock.test", clientPort, peerPort, "voltha.lib.mocks.etcd", "error"))
+		if etcdServer == nil {
+			log.Fatal("Embedded server failed to start")
+		}
+		clientAddr := fmt.Sprintf("localhost:%d", clientPort)
 
 		roCoreFlgs := config.NewROCoreFlags()
 		roC := newROCore(roCoreFlgs)
 		if (roC != nil) && (roCoreFlgs != nil) {
-			addr := "127.0.0.1" + ":" + freePortStr
-			cli, err := newKVClient("etcd", addr, 5)
+			cli, err := newKVClient("etcd", clientAddr, 5)
 			if err == nil {
 				roC.kvClient = cli
 				return roCoreFlgs, roC
 			}
+			if err != nil {
+				etcdServer.Stop()
+				log.Fatal("Failed to create an Etcd client")
+			}
+
 		}
 	}
 	return nil, nil