VOL-2970 - Improved readability & traceability of startup code.
Changed Start() function to implement majority of the startup functionality, with less helpers. Start() also defines local variables for each component created, to avoid accidentally using a component that isn't ready.
Also merged the rwCore into the Core.
Also changed Core to cancel a local context to on shutdown, and then wait for shutdown to complete.
Change-Id: I285e8486773476531e20ec352ff85a1b145432bf
diff --git a/rw_core/core/adapter/manager.go b/rw_core/core/adapter/manager.go
index 11752e1..b552d8f 100644
--- a/rw_core/core/adapter/manager.go
+++ b/rw_core/core/adapter/manager.go
@@ -40,14 +40,12 @@
clusterDataProxy *model.Proxy
onAdapterRestart adapterRestartedHandler
coreInstanceID string
- exitChannel chan int
lockAdaptersMap sync.RWMutex
lockdDeviceTypeToAdapterMap sync.RWMutex
}
func NewAdapterManager(cdProxy *model.Proxy, coreInstanceID string, kafkaClient kafka.Client) *Manager {
aMgr := &Manager{
- exitChannel: make(chan int, 1),
coreInstanceID: coreInstanceID,
clusterDataProxy: cdProxy,
deviceTypes: make(map[string]*voltha.DeviceType),
@@ -65,20 +63,19 @@
aMgr.onAdapterRestart = onAdapterRestart
}
-func (aMgr *Manager) Start(ctx context.Context) error {
+func (aMgr *Manager) Start(ctx context.Context) {
+ probe.UpdateStatusFromContext(ctx, "adapter-manager", probe.ServiceStatusPreparing)
logger.Info("starting-adapter-manager")
// Load the existing adapterAgents and device types - this will also ensure the correct paths have been
// created if there are no data in the dB to start
err := aMgr.loadAdaptersAndDevicetypesInMemory()
if err != nil {
- logger.Errorw("Failed-to-load-adapters-and-device-types-in-memeory", log.Fields{"error": err})
- return err
+ logger.Fatalf("failed-to-load-adapters-and-device-types-in-memory: %s", err)
}
probe.UpdateStatusFromContext(ctx, "adapter-manager", probe.ServiceStatusRunning)
logger.Info("adapter-manager-started")
- return nil
}
//loadAdaptersAndDevicetypesInMemory loads the existing set of adapters and device types in memory
diff --git a/rw_core/core/api/adapter_request_handler.go b/rw_core/core/api/adapter_request_handler.go
index 7c03618..4deca75 100644
--- a/rw_core/core/api/adapter_request_handler.go
+++ b/rw_core/core/api/adapter_request_handler.go
@@ -19,13 +19,10 @@
import (
"context"
"errors"
- "github.com/opencord/voltha-go/rw_core/core/adapter"
- "github.com/opencord/voltha-go/rw_core/core/device"
- "time"
-
"github.com/golang/protobuf/ptypes"
"github.com/golang/protobuf/ptypes/empty"
- "github.com/opencord/voltha-go/db/model"
+ "github.com/opencord/voltha-go/rw_core/core/adapter"
+ "github.com/opencord/voltha-go/rw_core/core/device"
"github.com/opencord/voltha-lib-go/v3/pkg/kafka"
"github.com/opencord/voltha-lib-go/v3/pkg/log"
ic "github.com/opencord/voltha-protos/v3/go/inter_container"
@@ -34,28 +31,16 @@
// AdapterRequestHandlerProxy represent adapter request handler proxy attributes
type AdapterRequestHandlerProxy struct {
- coreInstanceID string
- deviceMgr *device.Manager
- adapterMgr *adapter.Manager
- localDataProxy *model.Proxy
- clusterDataProxy *model.Proxy
- defaultRequestTimeout time.Duration
- longRunningRequestTimeout time.Duration
+ deviceMgr *device.Manager
+ adapterMgr *adapter.Manager
}
// NewAdapterRequestHandlerProxy assigns values for adapter request handler proxy attributes and returns the new instance
-func NewAdapterRequestHandlerProxy(coreInstanceID string, dMgr *device.Manager,
- aMgr *adapter.Manager, cdProxy *model.Proxy, ldProxy *model.Proxy, longRunningRequestTimeout time.Duration,
- defaultRequestTimeout time.Duration) *AdapterRequestHandlerProxy {
- var proxy AdapterRequestHandlerProxy
- proxy.coreInstanceID = coreInstanceID
- proxy.deviceMgr = dMgr
- proxy.clusterDataProxy = cdProxy
- proxy.localDataProxy = ldProxy
- proxy.adapterMgr = aMgr
- proxy.defaultRequestTimeout = defaultRequestTimeout
- proxy.longRunningRequestTimeout = longRunningRequestTimeout
- return &proxy
+func NewAdapterRequestHandlerProxy(dMgr *device.Manager, aMgr *adapter.Manager) *AdapterRequestHandlerProxy {
+ return &AdapterRequestHandlerProxy{
+ deviceMgr: dMgr,
+ adapterMgr: aMgr,
+ }
}
func (rhp *AdapterRequestHandlerProxy) Register(args []*ic.Argument) (*voltha.CoreInstance, error) {
@@ -86,7 +71,7 @@
}
}
}
- logger.Debugw("Register", log.Fields{"adapter": *adapter, "device-types": deviceTypes, "transaction-id": transactionID.Val, "core-id": rhp.coreInstanceID})
+ logger.Debugw("Register", log.Fields{"adapter": *adapter, "device-types": deviceTypes, "transaction-id": transactionID.Val})
return rhp.adapterMgr.RegisterAdapter(adapter, deviceTypes)
}
diff --git a/rw_core/core/api/grpc_nbi_handler_test.go b/rw_core/core/api/grpc_nbi_handler_test.go
index e8b651d..592ccea 100755
--- a/rw_core/core/api/grpc_nbi_handler_test.go
+++ b/rw_core/core/api/grpc_nbi_handler_test.go
@@ -127,16 +127,12 @@
proxy := model.NewProxy(backend, "/")
nb.adapterMgr = adapter.NewAdapterManager(proxy, nb.coreInstanceID, nb.kClient)
nb.deviceMgr, nb.logicalDeviceMgr = device.NewManagers(proxy, nb.adapterMgr, nb.kmp, endpointMgr, cfg.CorePairTopic, nb.coreInstanceID, cfg.DefaultCoreTimeout)
- if err = nb.adapterMgr.Start(ctx); err != nil {
- logger.Fatalf("Cannot start adapterMgr: %s", err)
- }
- nb.deviceMgr.Start(ctx)
- nb.logicalDeviceMgr.Start(ctx)
+ nb.adapterMgr.Start(ctx)
- if err = nb.kmp.Start(); err != nil {
+ if err := nb.kmp.Start(); err != nil {
logger.Fatalf("Cannot start InterContainerProxy: %s", err)
}
- requestProxy := NewAdapterRequestHandlerProxy(nb.coreInstanceID, nb.deviceMgr, nb.adapterMgr, proxy, proxy, cfg.LongRunningRequestTimeout, cfg.DefaultRequestTimeout)
+ requestProxy := NewAdapterRequestHandlerProxy(nb.deviceMgr, nb.adapterMgr)
if err := nb.kmp.SubscribeWithRequestHandlerInterface(kafka.Topic{Name: cfg.CoreTopic}, requestProxy); err != nil {
logger.Fatalf("Cannot add request handler: %s", err)
}
@@ -201,12 +197,6 @@
if nb.kClient != nil {
nb.kClient.Stop()
}
- if nb.logicalDeviceMgr != nil {
- nb.logicalDeviceMgr.Stop(context.Background())
- }
- if nb.deviceMgr != nil {
- nb.deviceMgr.Stop(context.Background())
- }
if nb.kmp != nil {
nb.kmp.Stop()
}
diff --git a/rw_core/core/core.go b/rw_core/core/core.go
index 7cf9f98..0dbecc8 100644
--- a/rw_core/core/core.go
+++ b/rw_core/core/core.go
@@ -18,7 +18,7 @@
import (
"context"
- "sync"
+ "strconv"
"time"
"github.com/opencord/voltha-go/db/model"
@@ -26,446 +26,150 @@
"github.com/opencord/voltha-go/rw_core/core/adapter"
"github.com/opencord/voltha-go/rw_core/core/api"
"github.com/opencord/voltha-go/rw_core/core/device"
+ conf "github.com/opencord/voltha-lib-go/v3/pkg/config"
"github.com/opencord/voltha-lib-go/v3/pkg/db"
- "github.com/opencord/voltha-lib-go/v3/pkg/db/kvstore"
grpcserver "github.com/opencord/voltha-lib-go/v3/pkg/grpc"
"github.com/opencord/voltha-lib-go/v3/pkg/kafka"
"github.com/opencord/voltha-lib-go/v3/pkg/log"
"github.com/opencord/voltha-lib-go/v3/pkg/probe"
"github.com/opencord/voltha-protos/v3/go/voltha"
"google.golang.org/grpc"
- "google.golang.org/grpc/codes"
- "google.golang.org/grpc/status"
)
// Core represent read,write core attributes
type Core struct {
- instanceID string
- deviceMgr *device.Manager
- logicalDeviceMgr *device.LogicalManager
- grpcServer *grpcserver.GrpcServer
- grpcNBIAPIHandler *api.NBIHandler
- adapterMgr *adapter.Manager
- config *config.RWCoreFlags
- kmp kafka.InterContainerProxy
- clusterDataProxy *model.Proxy
- localDataProxy *model.Proxy
- exitChannel chan struct{}
- stopOnce sync.Once
- kvClient kvstore.Client
- backend db.Backend
- kafkaClient kafka.Client
+ shutdown context.CancelFunc
+ stopped chan struct{}
}
// NewCore creates instance of rw core
-func NewCore(ctx context.Context, id string, cf *config.RWCoreFlags, kvClient kvstore.Client, kafkaClient kafka.Client) *Core {
- var core Core
- core.instanceID = id
- core.exitChannel = make(chan struct{})
- core.config = cf
- core.kvClient = kvClient
- core.kafkaClient = kafkaClient
-
- // Configure backend to push Liveness Status at least every (cf.LiveProbeInterval / 2) seconds
- // so as to avoid trigger of Liveness check (due to Liveness timeout) when backend is alive
- livenessChannelInterval := cf.LiveProbeInterval / 2
-
- // Setup the KV store
- core.backend = db.Backend{
- Client: kvClient,
- StoreType: cf.KVStoreType,
- Host: cf.KVStoreHost,
- Port: cf.KVStorePort,
- Timeout: cf.KVStoreTimeout,
- LivenessChannelInterval: livenessChannelInterval,
- PathPrefix: cf.KVStoreDataPrefix}
- return &core
-}
-
-// Start brings up core services
-func (core *Core) Start(ctx context.Context) error {
-
+func NewCore(ctx context.Context, id string, cf *config.RWCoreFlags) *Core {
// If the context has a probe then fetch it and register our services
- var p *probe.Probe
- if value := ctx.Value(probe.ProbeContextKey); value != nil {
- if _, ok := value.(*probe.Probe); ok {
- p = value.(*probe.Probe)
- p.RegisterService(
- "message-bus",
- "kv-store",
- "device-manager",
- "logical-device-manager",
- "adapter-manager",
- "grpc-service",
- )
- }
- }
-
- logger.Info("starting-core-services", log.Fields{"coreId": core.instanceID})
-
- // Wait until connection to KV Store is up
- if err := core.waitUntilKVStoreReachableOrMaxTries(ctx, core.config.MaxConnectionRetries, core.config.ConnectionRetryInterval); err != nil {
- logger.Fatal("Unable-to-connect-to-KV-store")
- }
- if p != nil {
- p.UpdateStatus("kv-store", probe.ServiceStatusRunning)
- }
-
- endpointMgr := kafka.NewEndpointManager(&core.backend)
-
- core.clusterDataProxy = model.NewProxy(&core.backend, "/")
- core.localDataProxy = model.NewProxy(&core.backend, "/")
-
- // core.kmp must be created before deviceMgr and adapterMgr, as they will make
- // private copies of the poiner to core.kmp.
- core.initKafkaManager(ctx)
-
- logger.Debugw("values", log.Fields{"kmp": core.kmp})
- core.adapterMgr = adapter.NewAdapterManager(core.clusterDataProxy, core.instanceID, core.kafkaClient)
- core.deviceMgr, core.logicalDeviceMgr = device.NewManagers(core.clusterDataProxy, core.adapterMgr, core.kmp, endpointMgr, core.config.CorePairTopic, core.instanceID, core.config.DefaultCoreTimeout)
-
- // Start the KafkaManager. This must be done after the deviceMgr, adapterMgr, and
- // logicalDeviceMgr have been created, as once the kmp is started, it will register
- // the above with the kmp.
-
- go core.startKafkaManager(ctx,
- core.config.ConnectionRetryInterval,
- core.config.LiveProbeInterval,
- core.config.NotLiveProbeInterval)
-
- go core.startDeviceManager(ctx)
- go core.startLogicalDeviceManager(ctx)
- go core.startGRPCService(ctx)
- go core.startAdapterManager(ctx)
- go core.monitorKvstoreLiveness(ctx)
-
- logger.Info("core-services-started")
- return nil
-}
-
-// Stop brings down core services
-func (core *Core) Stop(ctx context.Context) {
- core.stopOnce.Do(func() {
- logger.Info("stopping-adaptercore")
- // Signal to the KVStoreMonitor that we are stopping.
- close(core.exitChannel)
- // Stop all the started services
- if core.grpcServer != nil {
- core.grpcServer.Stop()
- }
- if core.logicalDeviceMgr != nil {
- core.logicalDeviceMgr.Stop(ctx)
- }
- if core.deviceMgr != nil {
- core.deviceMgr.Stop(ctx)
- }
- if core.kmp != nil {
- core.kmp.Stop()
- }
- logger.Info("adaptercore-stopped")
- })
-}
-
-//startGRPCService creates the grpc service handlers, registers it to the grpc server and starts the server
-func (core *Core) startGRPCService(ctx context.Context) {
- // create an insecure gserver server
- core.grpcServer = grpcserver.NewGrpcServer(core.config.GrpcHost, core.config.GrpcPort, nil, false, probe.GetProbeFromContext(ctx))
- logger.Info("grpc-server-created")
-
- core.grpcNBIAPIHandler = api.NewNBIHandler(core.deviceMgr, core.logicalDeviceMgr, core.adapterMgr)
- logger.Infow("grpc-handler", log.Fields{"core_binding_key": core.config.CoreBindingKey})
- // Create a function to register the core GRPC service with the GRPC server
- f := func(gs *grpc.Server) {
- voltha.RegisterVolthaServiceServer(
- gs,
- core.grpcNBIAPIHandler,
+ if p := probe.GetProbeFromContext(ctx); p != nil {
+ p.RegisterService(
+ "message-bus",
+ "kv-store",
+ "adapter-manager",
+ "grpc-service",
)
}
- core.grpcServer.AddService(f)
+ // new threads will be given a new cancelable context, so that they can be aborted later when Stop() is called
+ shutdownCtx, cancelCtx := context.WithCancel(ctx)
+
+ core := &Core{shutdown: cancelCtx, stopped: make(chan struct{})}
+ go core.start(shutdownCtx, id, cf)
+ return core
+}
+
+func (core *Core) start(ctx context.Context, id string, cf *config.RWCoreFlags) {
+ logger.Info("starting-core-services", log.Fields{"coreId": id})
+
+ // deferred functions are used to run cleanup
+ // failing partway will stop anything that's been started
+ defer close(core.stopped)
+ defer core.shutdown()
+
+ logger.Info("Starting RW Core components")
+
+ // setup kv client
+ logger.Debugw("create-kv-client", log.Fields{"kvstore": cf.KVStoreType})
+ kvClient, err := newKVClient(cf.KVStoreType, cf.KVStoreHost+":"+strconv.Itoa(cf.KVStorePort), cf.KVStoreTimeout)
+ if err != nil {
+ logger.Fatal(err)
+ }
+ defer stopKVClient(context.Background(), kvClient)
+
+ // sync logging config with kv store
+ cm := conf.NewConfigManager(kvClient, cf.KVStoreType, cf.KVStoreHost, cf.KVStorePort, cf.KVStoreTimeout)
+ go conf.StartLogLevelConfigProcessing(cm, ctx)
+
+ backend := &db.Backend{
+ Client: kvClient,
+ StoreType: cf.KVStoreType,
+ Host: cf.KVStoreHost,
+ Port: cf.KVStorePort,
+ Timeout: cf.KVStoreTimeout,
+ // Configure backend to push Liveness Status at least every (cf.LiveProbeInterval / 2) seconds
+ // so as to avoid trigger of Liveness check (due to Liveness timeout) when backend is alive
+ LivenessChannelInterval: cf.LiveProbeInterval / 2,
+ PathPrefix: cf.KVStoreDataPrefix,
+ }
+
+ // wait until connection to KV Store is up
+ if err := waitUntilKVStoreReachableOrMaxTries(ctx, kvClient, cf.MaxConnectionRetries, cf.ConnectionRetryInterval); err != nil {
+ logger.Fatal("Unable-to-connect-to-KV-store")
+ }
+ go monitorKVStoreLiveness(ctx, backend, cf.LiveProbeInterval, cf.NotLiveProbeInterval)
+
+ // create kafka client
+ kafkaClient := kafka.NewSaramaClient(
+ kafka.Host(cf.KafkaAdapterHost),
+ kafka.Port(cf.KafkaAdapterPort),
+ kafka.ConsumerType(kafka.GroupCustomer),
+ kafka.ProducerReturnOnErrors(true),
+ kafka.ProducerReturnOnSuccess(true),
+ kafka.ProducerMaxRetries(6),
+ kafka.NumPartitions(3),
+ kafka.ConsumerGroupName(id),
+ kafka.ConsumerGroupPrefix(id),
+ kafka.AutoCreateTopic(true),
+ kafka.ProducerFlushFrequency(5),
+ kafka.ProducerRetryBackoff(time.Millisecond*30),
+ kafka.LivenessChannelInterval(cf.LiveProbeInterval/2),
+ )
+ // defer kafkaClient.Stop()
+
+ // create kv proxy
+ proxy := model.NewProxy(backend, "/")
+
+ // load adapters & device types while other things are starting
+ adapterMgr := adapter.NewAdapterManager(proxy, id, kafkaClient)
+ go adapterMgr.Start(ctx)
+
+ // connect to kafka, then wait until reachable and publisher/consumer created
+ // core.kmp must be created before deviceMgr and adapterMgr
+ kmp, err := startKafkInterContainerProxy(ctx, kafkaClient, cf.KafkaAdapterHost, cf.KafkaAdapterPort, cf.CoreTopic, cf.AffinityRouterTopic, cf.ConnectionRetryInterval)
+ if err != nil {
+ logger.Warn("Failed to setup kafka connection")
+ return
+ }
+ defer kmp.Stop()
+ go monitorKafkaLiveness(ctx, kmp, cf.LiveProbeInterval, cf.NotLiveProbeInterval)
+
+ // create the core of the system, the device managers
+ endpointMgr := kafka.NewEndpointManager(backend)
+ deviceMgr, logicalDeviceMgr := device.NewManagers(proxy, adapterMgr, kmp, endpointMgr, cf.CorePairTopic, id, cf.DefaultCoreTimeout)
+
+ // register kafka RPC handler
+ registerAdapterRequestHandlers(kmp, deviceMgr, adapterMgr, cf.CoreTopic, cf.CorePairTopic)
+
+ // start gRPC handler
+ grpcServer := grpcserver.NewGrpcServer(cf.GrpcHost, cf.GrpcPort, nil, false, probe.GetProbeFromContext(ctx))
+ go startGRPCService(ctx, grpcServer, api.NewNBIHandler(deviceMgr, logicalDeviceMgr, adapterMgr))
+ defer grpcServer.Stop()
+
+ // wait for core to be stopped, via Stop() or context cancellation, before running deferred functions
+ <-ctx.Done()
+}
+
+// Stop brings down core services
+func (core *Core) Stop() {
+ core.shutdown()
+ <-core.stopped
+}
+
+// startGRPCService creates the grpc service handlers, registers it to the grpc server and starts the server
+func startGRPCService(ctx context.Context, server *grpcserver.GrpcServer, handler voltha.VolthaServiceServer) {
+ logger.Info("grpc-server-created")
+
+ server.AddService(func(gs *grpc.Server) { voltha.RegisterVolthaServiceServer(gs, handler) })
logger.Info("grpc-service-added")
- /*
- * Start the GRPC server
- *
- * This is a bit sub-optimal here as the grpcServer.Start call does not return (blocks)
- * until something fails, but we want to send a "start" status update. As written this
- * means that we are actually sending the "start" status update before the server is
- * started, which means it is possible that the status is "running" before it actually is.
- *
- * This means that there is a small window in which the core could return its status as
- * ready, when it really isn't.
- */
probe.UpdateStatusFromContext(ctx, "grpc-service", probe.ServiceStatusRunning)
logger.Info("grpc-server-started")
- core.grpcServer.Start(ctx)
+ // Note that there is a small window here in which the core could return its status as ready,
+ // when it really isn't. This is unlikely to cause issues, as the delay is incredibly short.
+ server.Start(ctx)
probe.UpdateStatusFromContext(ctx, "grpc-service", probe.ServiceStatusStopped)
}
-
-// Initialize the kafka manager, but we will start it later
-func (core *Core) initKafkaManager(ctx context.Context) {
- logger.Infow("initialize-kafka-manager", log.Fields{"host": core.config.KafkaAdapterHost,
- "port": core.config.KafkaAdapterPort, "topic": core.config.CoreTopic})
-
- probe.UpdateStatusFromContext(ctx, "message-bus", probe.ServiceStatusPreparing)
-
- // create the proxy
- core.kmp = kafka.NewInterContainerProxy(
- kafka.InterContainerHost(core.config.KafkaAdapterHost),
- kafka.InterContainerPort(core.config.KafkaAdapterPort),
- kafka.MsgClient(core.kafkaClient),
- kafka.DefaultTopic(&kafka.Topic{Name: core.config.CoreTopic}),
- kafka.DeviceDiscoveryTopic(&kafka.Topic{Name: core.config.AffinityRouterTopic}))
-
- probe.UpdateStatusFromContext(ctx, "message-bus", probe.ServiceStatusPrepared)
-}
-
-/*
- * KafkaMonitorThread
- *
- * Responsible for starting the Kafka Interadapter Proxy and monitoring its liveness
- * state.
- *
- * Any producer that fails to send will cause KafkaInterContainerProxy to
- * post a false event on its liveness channel. Any producer that succeeds in sending
- * will cause KafkaInterContainerProxy to post a true event on its liveness
- * channel. Group receivers also update liveness state, and a receiver will typically
- * indicate a loss of liveness within 3-5 seconds of Kafka going down. Receivers
- * only indicate restoration of liveness if a message is received. During normal
- * operation, messages will be routinely produced and received, automatically
- * indicating liveness state. These routine liveness indications are rate-limited
- * inside sarama_client.
- *
- * This thread monitors the status of KafkaInterContainerProxy's liveness and pushes
- * that state to the core's readiness probes. If no liveness event has been seen
- * within a timeout, then the thread will make an attempt to produce a "liveness"
- * message, which will in turn trigger a liveness event on the liveness channel, true
- * or false depending on whether the attempt succeeded.
- *
- * The gRPC server in turn monitors the state of the readiness probe and will
- * start issuing UNAVAILABLE response while the probe is not ready.
- *
- * startupRetryInterval -- interval between attempts to start
- * liveProbeInterval -- interval between liveness checks when in a live state
- * notLiveProbeInterval -- interval between liveness checks when in a notLive state
- *
- * liveProbeInterval and notLiveProbeInterval can be configured separately,
- * though the current default is that both are set to 60 seconds.
- */
-
-func (core *Core) startKafkaManager(ctx context.Context, startupRetryInterval time.Duration, liveProbeInterval time.Duration, notLiveProbeInterval time.Duration) {
- logger.Infow("starting-kafka-manager-thread", log.Fields{"host": core.config.KafkaAdapterHost,
- "port": core.config.KafkaAdapterPort, "topic": core.config.CoreTopic})
-
- started := false
- for !started {
- // If we haven't started yet, then try to start
- logger.Infow("starting-kafka-proxy", log.Fields{})
- if err := core.kmp.Start(); err != nil {
- // We failed to start. Delay and then try again later.
- // Don't worry about liveness, as we can't be live until we've started.
- probe.UpdateStatusFromContext(ctx, "message-bus", probe.ServiceStatusNotReady)
- logger.Infow("error-starting-kafka-messaging-proxy", log.Fields{"error": err})
- time.Sleep(startupRetryInterval)
- } else {
- // We started. We only need to do this once.
- // Next we'll fall through and start checking liveness.
- logger.Infow("started-kafka-proxy", log.Fields{})
-
- // cannot do this until after the kmp is started
- if err := core.registerAdapterRequestHandlers(ctx, core.instanceID, core.deviceMgr, core.logicalDeviceMgr, core.adapterMgr, core.clusterDataProxy, core.localDataProxy); err != nil {
- logger.Fatal("Failure-registering-adapterRequestHandler")
- }
-
- started = true
- }
- }
-
- logger.Info("started-kafka-message-proxy")
-
- livenessChannel := core.kmp.EnableLivenessChannel(true)
-
- logger.Info("enabled-kafka-liveness-channel")
-
- timeout := liveProbeInterval
- for {
- timeoutTimer := time.NewTimer(timeout)
- select {
- case liveness := <-livenessChannel:
- logger.Infow("kafka-manager-thread-liveness-event", log.Fields{"liveness": liveness})
- // there was a state change in Kafka liveness
- if !liveness {
- probe.UpdateStatusFromContext(ctx, "message-bus", probe.ServiceStatusNotReady)
-
- if core.grpcServer != nil {
- logger.Info("kafka-manager-thread-set-server-notready")
- }
-
- // retry frequently while life is bad
- timeout = notLiveProbeInterval
- } else {
- probe.UpdateStatusFromContext(ctx, "message-bus", probe.ServiceStatusRunning)
-
- if core.grpcServer != nil {
- logger.Info("kafka-manager-thread-set-server-ready")
- }
-
- // retry infrequently while life is good
- timeout = liveProbeInterval
- }
- if !timeoutTimer.Stop() {
- <-timeoutTimer.C
- }
- case <-timeoutTimer.C:
- logger.Info("kafka-proxy-liveness-recheck")
- // send the liveness probe in a goroutine; we don't want to deadlock ourselves as
- // the liveness probe may wait (and block) writing to our channel.
- go func() {
- err := core.kmp.SendLiveness()
- if err != nil {
- // Catch possible error case if sending liveness after Sarama has been stopped.
- logger.Warnw("error-kafka-send-liveness", log.Fields{"error": err})
- }
- }()
- }
- }
-}
-
-// waitUntilKVStoreReachableOrMaxTries will wait until it can connect to a KV store or until maxtries has been reached
-func (core *Core) waitUntilKVStoreReachableOrMaxTries(ctx context.Context, maxRetries int, retryInterval time.Duration) error {
- logger.Infow("verifying-KV-store-connectivity", log.Fields{"host": core.config.KVStoreHost,
- "port": core.config.KVStorePort, "retries": maxRetries, "retryInterval": retryInterval})
- count := 0
- for {
- if !core.kvClient.IsConnectionUp(ctx) {
- logger.Info("KV-store-unreachable")
- if maxRetries != -1 {
- if count >= maxRetries {
- return status.Error(codes.Unavailable, "kv store unreachable")
- }
- }
- count++
- // Take a nap before retrying
- time.Sleep(retryInterval)
- logger.Infow("retry-KV-store-connectivity", log.Fields{"retryCount": count, "maxRetries": maxRetries, "retryInterval": retryInterval})
-
- } else {
- break
- }
- }
- logger.Info("KV-store-reachable")
- return nil
-}
-
-func (core *Core) registerAdapterRequestHandlers(ctx context.Context, coreInstanceID string, dMgr *device.Manager,
- ldMgr *device.LogicalManager, aMgr *adapter.Manager, cdProxy *model.Proxy, ldProxy *model.Proxy,
-) error {
- requestProxy := api.NewAdapterRequestHandlerProxy(coreInstanceID, dMgr, aMgr, cdProxy, ldProxy,
- core.config.LongRunningRequestTimeout, core.config.DefaultRequestTimeout)
-
- // Register the broadcast topic to handle any core-bound broadcast requests
- if err := core.kmp.SubscribeWithRequestHandlerInterface(kafka.Topic{Name: core.config.CoreTopic}, requestProxy); err != nil {
- logger.Fatalw("Failed-registering-broadcast-handler", log.Fields{"topic": core.config.CoreTopic})
- return err
- }
-
- // Register the core-pair topic to handle core-bound requests destined to the core pair
- if err := core.kmp.SubscribeWithDefaultRequestHandler(kafka.Topic{Name: core.config.CorePairTopic}, kafka.OffsetNewest); err != nil {
- logger.Fatalw("Failed-registering-pair-handler", log.Fields{"topic": core.config.CorePairTopic})
- return err
- }
-
- logger.Info("request-handler-registered")
- return nil
-}
-
-func (core *Core) startDeviceManager(ctx context.Context) {
- logger.Info("DeviceManager-Starting...")
- core.deviceMgr.Start(ctx)
- logger.Info("DeviceManager-Started")
-}
-
-func (core *Core) startLogicalDeviceManager(ctx context.Context) {
- logger.Info("Logical-DeviceManager-Starting...")
- core.logicalDeviceMgr.Start(ctx)
- logger.Info("Logical-DeviceManager-Started")
-}
-
-func (core *Core) startAdapterManager(ctx context.Context) {
- logger.Info("Adapter-Manager-Starting...")
- err := core.adapterMgr.Start(ctx)
- if err != nil {
- logger.Fatalf("failed-to-start-adapter-manager: error %v ", err)
- }
- logger.Info("Adapter-Manager-Started")
-}
-
-/*
-* Thread to monitor kvstore Liveness (connection status)
-*
-* This function constantly monitors Liveness State of kvstore as reported
-* periodically by backend and updates the Status of kv-store service registered
-* with rw_core probe.
-*
-* If no liveness event has been seen within a timeout, then the thread will
-* perform a "liveness" check attempt, which will in turn trigger a liveness event on
-* the liveness channel, true or false depending on whether the attempt succeeded.
-*
-* The gRPC server in turn monitors the state of the readiness probe and will
-* start issuing UNAVAILABLE response while the probe is not ready.
- */
-func (core *Core) monitorKvstoreLiveness(ctx context.Context) {
- logger.Info("start-monitoring-kvstore-liveness")
-
- // Instruct backend to create Liveness channel for transporting state updates
- livenessChannel := core.backend.EnableLivenessChannel()
-
- logger.Debug("enabled-kvstore-liveness-channel")
-
- // Default state for kvstore is alive for rw_core
- timeout := core.config.LiveProbeInterval
-loop:
- for {
- timeoutTimer := time.NewTimer(timeout)
- select {
-
- case liveness := <-livenessChannel:
- logger.Debugw("received-liveness-change-notification", log.Fields{"liveness": liveness})
-
- if !liveness {
- probe.UpdateStatusFromContext(ctx, "kv-store", probe.ServiceStatusNotReady)
-
- if core.grpcServer != nil {
- logger.Info("kvstore-set-server-notready")
- }
-
- timeout = core.config.NotLiveProbeInterval
-
- } else {
- probe.UpdateStatusFromContext(ctx, "kv-store", probe.ServiceStatusRunning)
-
- if core.grpcServer != nil {
- logger.Info("kvstore-set-server-ready")
- }
-
- timeout = core.config.LiveProbeInterval
- }
-
- if !timeoutTimer.Stop() {
- <-timeoutTimer.C
- }
-
- case <-core.exitChannel:
- break loop
-
- case <-timeoutTimer.C:
- logger.Info("kvstore-perform-liveness-check-on-timeout")
-
- // Trigger Liveness check if no liveness update received within the timeout period.
- // The Liveness check will push Live state to same channel which this routine is
- // reading and processing. This, do it asynchronously to avoid blocking for
- // backend response and avoid any possibility of deadlock
- go core.backend.PerformLivenessCheck(ctx)
- }
- }
-}
diff --git a/rw_core/core/device/agent_test.go b/rw_core/core/device/agent_test.go
index 8b003b4..2abfdeb 100755
--- a/rw_core/core/device/agent_test.go
+++ b/rw_core/core/device/agent_test.go
@@ -144,23 +144,13 @@
if err = dat.kmp.Start(); err != nil {
logger.Fatal("Cannot start InterContainerProxy")
}
- if err = adapterMgr.Start(context.Background()); err != nil {
- logger.Fatal("Cannot start adapterMgr")
- }
- dat.deviceMgr.Start(context.Background())
- dat.logicalDeviceMgr.Start(context.Background())
+ adapterMgr.Start(context.Background())
}
func (dat *DATest) stopAll() {
if dat.kClient != nil {
dat.kClient.Stop()
}
- if dat.logicalDeviceMgr != nil {
- dat.logicalDeviceMgr.Stop(context.Background())
- }
- if dat.deviceMgr != nil {
- dat.deviceMgr.Stop(context.Background())
- }
if dat.kmp != nil {
dat.kmp.Stop()
}
diff --git a/rw_core/core/device/logical_agent_test.go b/rw_core/core/device/logical_agent_test.go
index 64c42b5..e562400 100644
--- a/rw_core/core/device/logical_agent_test.go
+++ b/rw_core/core/device/logical_agent_test.go
@@ -487,23 +487,13 @@
if err = lda.kmp.Start(); err != nil {
logger.Fatal("Cannot start InterContainerProxy")
}
- if err = adapterMgr.Start(context.Background()); err != nil {
- logger.Fatal("Cannot start adapterMgr")
- }
- lda.deviceMgr.Start(context.Background())
- lda.logicalDeviceMgr.Start(context.Background())
+ adapterMgr.Start(context.Background())
}
func (lda *LDATest) stopAll() {
if lda.kClient != nil {
lda.kClient.Stop()
}
- if lda.logicalDeviceMgr != nil {
- lda.logicalDeviceMgr.Stop(context.Background())
- }
- if lda.deviceMgr != nil {
- lda.deviceMgr.Stop(context.Background())
- }
if lda.kmp != nil {
lda.kmp.Stop()
}
diff --git a/rw_core/core/device/logical_manager.go b/rw_core/core/device/logical_manager.go
index a5c47b9..53c3759 100644
--- a/rw_core/core/device/logical_manager.go
+++ b/rw_core/core/device/logical_manager.go
@@ -30,7 +30,6 @@
"github.com/opencord/voltha-go/db/model"
"github.com/opencord/voltha-lib-go/v3/pkg/kafka"
"github.com/opencord/voltha-lib-go/v3/pkg/log"
- "github.com/opencord/voltha-lib-go/v3/pkg/probe"
"github.com/opencord/voltha-protos/v3/go/openflow_13"
"github.com/opencord/voltha-protos/v3/go/voltha"
"google.golang.org/grpc/codes"
@@ -44,25 +43,11 @@
deviceMgr *Manager
kafkaICProxy kafka.InterContainerProxy
clusterDataProxy *model.Proxy
- exitChannel chan int
defaultTimeout time.Duration
logicalDevicesLoadingLock sync.RWMutex
logicalDeviceLoadingInProgress map[string][]chan int
}
-func (ldMgr *LogicalManager) Start(ctx context.Context) {
- logger.Info("starting-logical-device-manager")
- probe.UpdateStatusFromContext(ctx, "logical-device-manager", probe.ServiceStatusRunning)
- logger.Info("logical-device-manager-started")
-}
-
-func (ldMgr *LogicalManager) Stop(ctx context.Context) {
- logger.Info("stopping-logical-device-manager")
- ldMgr.exitChannel <- 1
- probe.UpdateStatusFromContext(ctx, "logical-device-manager", probe.ServiceStatusStopped)
- logger.Info("logical-device-manager-stopped")
-}
-
func (ldMgr *LogicalManager) addLogicalDeviceAgentToMap(agent *LogicalAgent) {
if _, exist := ldMgr.logicalDeviceAgents.Load(agent.logicalDeviceID); !exist {
ldMgr.logicalDeviceAgents.Store(agent.logicalDeviceID, agent)
diff --git a/rw_core/core/device/manager.go b/rw_core/core/device/manager.go
index b0128a5..9990104 100755
--- a/rw_core/core/device/manager.go
+++ b/rw_core/core/device/manager.go
@@ -32,7 +32,6 @@
"github.com/opencord/voltha-go/rw_core/utils"
"github.com/opencord/voltha-lib-go/v3/pkg/kafka"
"github.com/opencord/voltha-lib-go/v3/pkg/log"
- "github.com/opencord/voltha-lib-go/v3/pkg/probe"
"github.com/opencord/voltha-protos/v3/go/common"
ic "github.com/opencord/voltha-protos/v3/go/inter_container"
ofp "github.com/opencord/voltha-protos/v3/go/openflow_13"
@@ -53,7 +52,6 @@
stateTransitions *TransitionMap
clusterDataProxy *model.Proxy
coreInstanceID string
- exitChannel chan int
defaultTimeout time.Duration
devicesLoadingLock sync.RWMutex
deviceLoadingInProgress map[string][]chan int
@@ -61,7 +59,6 @@
func NewManagers(proxy *model.Proxy, adapterMgr *adapter.Manager, kmp kafka.InterContainerProxy, endpointMgr kafka.EndpointManager, corePairTopic, coreInstanceID string, defaultCoreTimeout time.Duration) (*Manager, *LogicalManager) {
deviceMgr := &Manager{
- exitChannel: make(chan int, 1),
rootDevices: make(map[string]bool),
kafkaICProxy: kmp,
adapterProxy: remote.NewAdapterProxy(kmp, corePairTopic, endpointMgr),
@@ -71,9 +68,10 @@
defaultTimeout: defaultCoreTimeout * time.Millisecond,
deviceLoadingInProgress: make(map[string][]chan int),
}
+ deviceMgr.stateTransitions = NewTransitionMap(deviceMgr)
+
logicalDeviceMgr := &LogicalManager{
Manager: event.NewManager(),
- exitChannel: make(chan int, 1),
deviceMgr: deviceMgr,
kafkaICProxy: kmp,
clusterDataProxy: proxy,
@@ -87,20 +85,6 @@
return deviceMgr, logicalDeviceMgr
}
-func (dMgr *Manager) Start(ctx context.Context) {
- logger.Info("starting-device-manager")
- dMgr.stateTransitions = NewTransitionMap(dMgr)
- probe.UpdateStatusFromContext(ctx, "device-manager", probe.ServiceStatusRunning)
- logger.Info("device-manager-started")
-}
-
-func (dMgr *Manager) Stop(ctx context.Context) {
- logger.Info("stopping-device-manager")
- dMgr.exitChannel <- 1
- probe.UpdateStatusFromContext(ctx, "device-manager", probe.ServiceStatusStopped)
- logger.Info("device-manager-stopped")
-}
-
func (dMgr *Manager) addDeviceAgentToMap(agent *Agent) {
if _, exist := dMgr.deviceAgents.Load(agent.deviceID); !exist {
dMgr.deviceAgents.Store(agent.deviceID, agent)
diff --git a/rw_core/core/kafka.go b/rw_core/core/kafka.go
new file mode 100644
index 0000000..fcdf340
--- /dev/null
+++ b/rw_core/core/kafka.go
@@ -0,0 +1,164 @@
+/*
+ * Copyright 2018-present Open Networking Foundation
+
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+
+ * http://www.apache.org/licenses/LICENSE-2.0
+
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package core
+
+import (
+ "context"
+ "time"
+
+ "github.com/opencord/voltha-go/rw_core/core/adapter"
+ "github.com/opencord/voltha-go/rw_core/core/api"
+ "github.com/opencord/voltha-go/rw_core/core/device"
+ "github.com/opencord/voltha-lib-go/v3/pkg/kafka"
+ "github.com/opencord/voltha-lib-go/v3/pkg/log"
+ "github.com/opencord/voltha-lib-go/v3/pkg/probe"
+)
+
+// startKafkInterContainerProxy is responsible for starting the Kafka Interadapter Proxy
+func startKafkInterContainerProxy(ctx context.Context, kafkaClient kafka.Client, host string, port int, coreTopic, affinityRouterTopic string, connectionRetryInterval time.Duration) (kafka.InterContainerProxy, error) {
+ logger.Infow("initialize-kafka-manager", log.Fields{"host": host, "port": port, "topic": coreTopic})
+
+ probe.UpdateStatusFromContext(ctx, "message-bus", probe.ServiceStatusPreparing)
+
+ // create the kafka RPC proxy
+ kmp := kafka.NewInterContainerProxy(
+ kafka.InterContainerHost(host),
+ kafka.InterContainerPort(port),
+ kafka.MsgClient(kafkaClient),
+ kafka.DefaultTopic(&kafka.Topic{Name: coreTopic}),
+ kafka.DeviceDiscoveryTopic(&kafka.Topic{Name: affinityRouterTopic}))
+
+ probe.UpdateStatusFromContext(ctx, "message-bus", probe.ServiceStatusPrepared)
+
+ // wait for connectivity
+ logger.Infow("starting-kafka-manager", log.Fields{"host": host,
+ "port": port, "topic": coreTopic})
+
+ for {
+ // If we haven't started yet, then try to start
+ logger.Infow("starting-kafka-proxy", log.Fields{})
+ if err := kmp.Start(); err != nil {
+ // We failed to start. Delay and then try again later.
+ // Don't worry about liveness, as we can't be live until we've started.
+ probe.UpdateStatusFromContext(ctx, "message-bus", probe.ServiceStatusNotReady)
+ logger.Infow("error-starting-kafka-messaging-proxy", log.Fields{"error": err})
+ select {
+ case <-time.After(connectionRetryInterval):
+ case <-ctx.Done():
+ return nil, ctx.Err()
+ }
+ continue
+ }
+ // We started. We only need to do this once.
+ // Next we'll fall through and start checking liveness.
+ logger.Infow("started-kafka-proxy", log.Fields{})
+ break
+ }
+ return kmp, nil
+}
+
+/*
+ * monitorKafkaLiveness is responsible for monitoring the Kafka Interadapter Proxy connectivity state
+ *
+ * Any producer that fails to send will cause KafkaInterContainerProxy to
+ * post a false event on its liveness channel. Any producer that succeeds in sending
+ * will cause KafkaInterContainerProxy to post a true event on its liveness
+ * channel. Group receivers also update liveness state, and a receiver will typically
+ * indicate a loss of liveness within 3-5 seconds of Kafka going down. Receivers
+ * only indicate restoration of liveness if a message is received. During normal
+ * operation, messages will be routinely produced and received, automatically
+ * indicating liveness state. These routine liveness indications are rate-limited
+ * inside sarama_client.
+ *
+ * This thread monitors the status of KafkaInterContainerProxy's liveness and pushes
+ * that state to the core's readiness probes. If no liveness event has been seen
+ * within a timeout, then the thread will make an attempt to produce a "liveness"
+ * message, which will in turn trigger a liveness event on the liveness channel, true
+ * or false depending on whether the attempt succeeded.
+ *
+ * The gRPC server in turn monitors the state of the readiness probe and will
+ * start issuing UNAVAILABLE response while the probe is not ready.
+ *
+ * startupRetryInterval -- interval between attempts to start
+ * liveProbeInterval -- interval between liveness checks when in a live state
+ * notLiveProbeInterval -- interval between liveness checks when in a notLive state
+ *
+ * liveProbeInterval and notLiveProbeInterval can be configured separately,
+ * though the current default is that both are set to 60 seconds.
+ */
+func monitorKafkaLiveness(ctx context.Context, kmp kafka.InterContainerProxy, liveProbeInterval time.Duration, notLiveProbeInterval time.Duration) {
+ logger.Info("started-kafka-message-proxy")
+
+ livenessChannel := kmp.EnableLivenessChannel(true)
+
+ logger.Info("enabled-kafka-liveness-channel")
+
+ timeout := liveProbeInterval
+ for {
+ timeoutTimer := time.NewTimer(timeout)
+ select {
+ case liveness := <-livenessChannel:
+ logger.Infow("kafka-manager-thread-liveness-event", log.Fields{"liveness": liveness})
+ // there was a state change in Kafka liveness
+ if !liveness {
+ probe.UpdateStatusFromContext(ctx, "message-bus", probe.ServiceStatusNotReady)
+ logger.Info("kafka-manager-thread-set-server-notready")
+
+ // retry frequently while life is bad
+ timeout = notLiveProbeInterval
+ } else {
+ probe.UpdateStatusFromContext(ctx, "message-bus", probe.ServiceStatusRunning)
+ logger.Info("kafka-manager-thread-set-server-ready")
+
+ // retry infrequently while life is good
+ timeout = liveProbeInterval
+ }
+ if !timeoutTimer.Stop() {
+ <-timeoutTimer.C
+ }
+ case <-timeoutTimer.C:
+ logger.Info("kafka-proxy-liveness-recheck")
+ // send the liveness probe in a goroutine; we don't want to deadlock ourselves as
+ // the liveness probe may wait (and block) writing to our channel.
+ go func() {
+ err := kmp.SendLiveness()
+ if err != nil {
+ // Catch possible error case if sending liveness after Sarama has been stopped.
+ logger.Warnw("error-kafka-send-liveness", log.Fields{"error": err})
+ }
+ }()
+ case <-ctx.Done():
+ return // just exit
+ }
+ }
+}
+
+func registerAdapterRequestHandlers(kmp kafka.InterContainerProxy, dMgr *device.Manager, aMgr *adapter.Manager, coreTopic, corePairTopic string) {
+ requestProxy := api.NewAdapterRequestHandlerProxy(dMgr, aMgr)
+
+ // Register the broadcast topic to handle any core-bound broadcast requests
+ if err := kmp.SubscribeWithRequestHandlerInterface(kafka.Topic{Name: coreTopic}, requestProxy); err != nil {
+ logger.Fatalw("Failed-registering-broadcast-handler", log.Fields{"topic": coreTopic})
+ }
+
+ // Register the core-pair topic to handle core-bound requests destined to the core pair
+ if err := kmp.SubscribeWithDefaultRequestHandler(kafka.Topic{Name: corePairTopic}, kafka.OffsetNewest); err != nil {
+ logger.Fatalw("Failed-registering-pair-handler", log.Fields{"topic": corePairTopic})
+ }
+
+ logger.Info("request-handler-registered")
+}
diff --git a/rw_core/core/kv.go b/rw_core/core/kv.go
new file mode 100644
index 0000000..48b99e9
--- /dev/null
+++ b/rw_core/core/kv.go
@@ -0,0 +1,145 @@
+/*
+ * Copyright 2018-present Open Networking Foundation
+
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+
+ * http://www.apache.org/licenses/LICENSE-2.0
+
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package core
+
+import (
+ "context"
+ "errors"
+ "time"
+
+ "github.com/opencord/voltha-lib-go/v3/pkg/db"
+ "github.com/opencord/voltha-lib-go/v3/pkg/db/kvstore"
+ "github.com/opencord/voltha-lib-go/v3/pkg/log"
+ "github.com/opencord/voltha-lib-go/v3/pkg/probe"
+ "google.golang.org/grpc/codes"
+ "google.golang.org/grpc/status"
+)
+
+func newKVClient(storeType string, address string, timeout int) (kvstore.Client, error) {
+ logger.Infow("kv-store-type", log.Fields{"store": storeType})
+ switch storeType {
+ case "consul":
+ return kvstore.NewConsulClient(address, timeout)
+ case "etcd":
+ return kvstore.NewEtcdClient(address, timeout, log.FatalLevel)
+ }
+ return nil, errors.New("unsupported-kv-store")
+}
+
+func stopKVClient(ctx context.Context, kvClient kvstore.Client) {
+ // Release all reservations
+ if err := kvClient.ReleaseAllReservations(ctx); err != nil {
+ logger.Infow("fail-to-release-all-reservations", log.Fields{"error": err})
+ }
+ // Close the DB connection
+ kvClient.Close()
+}
+
+// waitUntilKVStoreReachableOrMaxTries will wait until it can connect to a KV store or until maxtries has been reached
+func waitUntilKVStoreReachableOrMaxTries(ctx context.Context, kvClient kvstore.Client, maxRetries int, retryInterval time.Duration) error {
+ logger.Infow("verifying-KV-store-connectivity", log.Fields{"retries": maxRetries, "retryInterval": retryInterval})
+ count := 0
+ for {
+ if !kvClient.IsConnectionUp(ctx) {
+ logger.Info("KV-store-unreachable")
+ if maxRetries != -1 {
+ if count >= maxRetries {
+ return status.Error(codes.Unavailable, "kv store unreachable")
+ }
+ }
+ count++
+
+ // Take a nap before retrying
+ select {
+ case <-ctx.Done():
+ //ctx canceled
+ return ctx.Err()
+ case <-time.After(retryInterval):
+ }
+ logger.Infow("retry-KV-store-connectivity", log.Fields{"retryCount": count, "maxRetries": maxRetries, "retryInterval": retryInterval})
+ } else {
+ break
+ }
+ }
+ probe.UpdateStatusFromContext(ctx, "kv-store", probe.ServiceStatusRunning)
+ logger.Info("KV-store-reachable")
+ return nil
+}
+
+/*
+ * Thread to monitor kvstore Liveness (connection status)
+ *
+ * This function constantly monitors Liveness State of kvstore as reported
+ * periodically by backend and updates the Status of kv-store service registered
+ * with rw_core probe.
+ *
+ * If no liveness event has been seen within a timeout, then the thread will
+ * perform a "liveness" check attempt, which will in turn trigger a liveness event on
+ * the liveness channel, true or false depending on whether the attempt succeeded.
+ *
+ * The gRPC server in turn monitors the state of the readiness probe and will
+ * start issuing UNAVAILABLE response while the probe is not ready.
+ */
+func monitorKVStoreLiveness(ctx context.Context, backend *db.Backend, liveProbeInterval, notLiveProbeInterval time.Duration) {
+ logger.Info("start-monitoring-kvstore-liveness")
+
+ // Instruct backend to create Liveness channel for transporting state updates
+ livenessChannel := backend.EnableLivenessChannel()
+
+ logger.Debug("enabled-kvstore-liveness-channel")
+
+ // Default state for kvstore is alive for rw_core
+ timeout := liveProbeInterval
+loop:
+ for {
+ timeoutTimer := time.NewTimer(timeout)
+ select {
+
+ case liveness := <-livenessChannel:
+ logger.Debugw("received-liveness-change-notification", log.Fields{"liveness": liveness})
+
+ if !liveness {
+ probe.UpdateStatusFromContext(ctx, "kv-store", probe.ServiceStatusNotReady)
+ logger.Info("kvstore-set-server-notready")
+
+ timeout = notLiveProbeInterval
+
+ } else {
+ probe.UpdateStatusFromContext(ctx, "kv-store", probe.ServiceStatusRunning)
+ logger.Info("kvstore-set-server-ready")
+
+ timeout = liveProbeInterval
+ }
+
+ if !timeoutTimer.Stop() {
+ <-timeoutTimer.C
+ }
+
+ case <-ctx.Done():
+ break loop
+
+ case <-timeoutTimer.C:
+ logger.Info("kvstore-perform-liveness-check-on-timeout")
+
+ // Trigger Liveness check if no liveness update received within the timeout period.
+ // The Liveness check will push Live state to same channel which this routine is
+ // reading and processing. This, do it asynchronously to avoid blocking for
+ // backend response and avoid any possibility of deadlock
+ go backend.PerformLivenessCheck(ctx)
+ }
+ }
+}