VOL-2970 - Improved readability & traceability of startup code. Changed Start() function to implement majority of the startup functionality, with less helpers. Start() also defines local variables for each component created, to avoid accidentally using a component that isn't ready. Also merged the rwCore into the Core. Also changed Core to cancel a local context to on shutdown, and then wait for shutdown to complete. Change-Id: I285e8486773476531e20ec352ff85a1b145432bf

commit: 2f0d055e7dd00763fb067e1f41511b115b8447af [log] [tgz]
author: Kent Hagerman <khagerma@ciena.com> Thu Apr 23 17:28:52 2020 -0400
committer: Kent Hagerman <khagerma@ciena.com> Mon Apr 27 17:29:08 2020 -0400
tree: 02891950ac4d98b71040fcd395cd18ec02718286
parent: 45a13e4d478a0f7c9877f919a7012f3324cc73e3 [diff]
diff --git a/rw_core/core/adapter/manager.go b/rw_core/core/adapter/manager.go
index 11752e1..b552d8f 100644
--- a/rw_core/core/adapter/manager.go
+++ b/rw_core/core/adapter/manager.go

@@ -40,14 +40,12 @@
 	clusterDataProxy            *model.Proxy
 	onAdapterRestart            adapterRestartedHandler
 	coreInstanceID              string
-	exitChannel                 chan int
 	lockAdaptersMap             sync.RWMutex
 	lockdDeviceTypeToAdapterMap sync.RWMutex
 }
 
 func NewAdapterManager(cdProxy *model.Proxy, coreInstanceID string, kafkaClient kafka.Client) *Manager {
 	aMgr := &Manager{
-		exitChannel:      make(chan int, 1),
 		coreInstanceID:   coreInstanceID,
 		clusterDataProxy: cdProxy,
 		deviceTypes:      make(map[string]*voltha.DeviceType),
@@ -65,20 +63,19 @@
 	aMgr.onAdapterRestart = onAdapterRestart
 }
 
-func (aMgr *Manager) Start(ctx context.Context) error {
+func (aMgr *Manager) Start(ctx context.Context) {
+	probe.UpdateStatusFromContext(ctx, "adapter-manager", probe.ServiceStatusPreparing)
 	logger.Info("starting-adapter-manager")
 
 	// Load the existing adapterAgents and device types - this will also ensure the correct paths have been
 	// created if there are no data in the dB to start
 	err := aMgr.loadAdaptersAndDevicetypesInMemory()
 	if err != nil {
-		logger.Errorw("Failed-to-load-adapters-and-device-types-in-memeory", log.Fields{"error": err})
-		return err
+		logger.Fatalf("failed-to-load-adapters-and-device-types-in-memory: %s", err)
 	}
 
 	probe.UpdateStatusFromContext(ctx, "adapter-manager", probe.ServiceStatusRunning)
 	logger.Info("adapter-manager-started")
-	return nil
 }
 
 //loadAdaptersAndDevicetypesInMemory loads the existing set of adapters and device types in memory

diff --git a/rw_core/core/api/adapter_request_handler.go b/rw_core/core/api/adapter_request_handler.go
index 7c03618..4deca75 100644
--- a/rw_core/core/api/adapter_request_handler.go
+++ b/rw_core/core/api/adapter_request_handler.go

@@ -19,13 +19,10 @@
 import (
 	"context"
 	"errors"
-	"github.com/opencord/voltha-go/rw_core/core/adapter"
-	"github.com/opencord/voltha-go/rw_core/core/device"
-	"time"
-
 	"github.com/golang/protobuf/ptypes"
 	"github.com/golang/protobuf/ptypes/empty"
-	"github.com/opencord/voltha-go/db/model"
+	"github.com/opencord/voltha-go/rw_core/core/adapter"
+	"github.com/opencord/voltha-go/rw_core/core/device"
 	"github.com/opencord/voltha-lib-go/v3/pkg/kafka"
 	"github.com/opencord/voltha-lib-go/v3/pkg/log"
 	ic "github.com/opencord/voltha-protos/v3/go/inter_container"
@@ -34,28 +31,16 @@
 
 // AdapterRequestHandlerProxy represent adapter request handler proxy attributes
 type AdapterRequestHandlerProxy struct {
-	coreInstanceID            string
-	deviceMgr                 *device.Manager
-	adapterMgr                *adapter.Manager
-	localDataProxy            *model.Proxy
-	clusterDataProxy          *model.Proxy
-	defaultRequestTimeout     time.Duration
-	longRunningRequestTimeout time.Duration
+	deviceMgr  *device.Manager
+	adapterMgr *adapter.Manager
 }
 
 // NewAdapterRequestHandlerProxy assigns values for adapter request handler proxy attributes and returns the new instance
-func NewAdapterRequestHandlerProxy(coreInstanceID string, dMgr *device.Manager,
-	aMgr *adapter.Manager, cdProxy *model.Proxy, ldProxy *model.Proxy, longRunningRequestTimeout time.Duration,
-	defaultRequestTimeout time.Duration) *AdapterRequestHandlerProxy {
-	var proxy AdapterRequestHandlerProxy
-	proxy.coreInstanceID = coreInstanceID
-	proxy.deviceMgr = dMgr
-	proxy.clusterDataProxy = cdProxy
-	proxy.localDataProxy = ldProxy
-	proxy.adapterMgr = aMgr
-	proxy.defaultRequestTimeout = defaultRequestTimeout
-	proxy.longRunningRequestTimeout = longRunningRequestTimeout
-	return &proxy
+func NewAdapterRequestHandlerProxy(dMgr *device.Manager, aMgr *adapter.Manager) *AdapterRequestHandlerProxy {
+	return &AdapterRequestHandlerProxy{
+		deviceMgr:  dMgr,
+		adapterMgr: aMgr,
+	}
 }
 
 func (rhp *AdapterRequestHandlerProxy) Register(args []*ic.Argument) (*voltha.CoreInstance, error) {
@@ -86,7 +71,7 @@
 			}
 		}
 	}
-	logger.Debugw("Register", log.Fields{"adapter": *adapter, "device-types": deviceTypes, "transaction-id": transactionID.Val, "core-id": rhp.coreInstanceID})
+	logger.Debugw("Register", log.Fields{"adapter": *adapter, "device-types": deviceTypes, "transaction-id": transactionID.Val})
 
 	return rhp.adapterMgr.RegisterAdapter(adapter, deviceTypes)
 }

diff --git a/rw_core/core/api/grpc_nbi_handler_test.go b/rw_core/core/api/grpc_nbi_handler_test.go
index e8b651d..592ccea 100755
--- a/rw_core/core/api/grpc_nbi_handler_test.go
+++ b/rw_core/core/api/grpc_nbi_handler_test.go

@@ -127,16 +127,12 @@
 	proxy := model.NewProxy(backend, "/")
 	nb.adapterMgr = adapter.NewAdapterManager(proxy, nb.coreInstanceID, nb.kClient)
 	nb.deviceMgr, nb.logicalDeviceMgr = device.NewManagers(proxy, nb.adapterMgr, nb.kmp, endpointMgr, cfg.CorePairTopic, nb.coreInstanceID, cfg.DefaultCoreTimeout)
-	if err = nb.adapterMgr.Start(ctx); err != nil {
-		logger.Fatalf("Cannot start adapterMgr: %s", err)
-	}
-	nb.deviceMgr.Start(ctx)
-	nb.logicalDeviceMgr.Start(ctx)
+	nb.adapterMgr.Start(ctx)
 
-	if err = nb.kmp.Start(); err != nil {
+	if err := nb.kmp.Start(); err != nil {
 		logger.Fatalf("Cannot start InterContainerProxy: %s", err)
 	}
-	requestProxy := NewAdapterRequestHandlerProxy(nb.coreInstanceID, nb.deviceMgr, nb.adapterMgr, proxy, proxy, cfg.LongRunningRequestTimeout, cfg.DefaultRequestTimeout)
+	requestProxy := NewAdapterRequestHandlerProxy(nb.deviceMgr, nb.adapterMgr)
 	if err := nb.kmp.SubscribeWithRequestHandlerInterface(kafka.Topic{Name: cfg.CoreTopic}, requestProxy); err != nil {
 		logger.Fatalf("Cannot add request handler: %s", err)
 	}
@@ -201,12 +197,6 @@
 	if nb.kClient != nil {
 		nb.kClient.Stop()
 	}
-	if nb.logicalDeviceMgr != nil {
-		nb.logicalDeviceMgr.Stop(context.Background())
-	}
-	if nb.deviceMgr != nil {
-		nb.deviceMgr.Stop(context.Background())
-	}
 	if nb.kmp != nil {
 		nb.kmp.Stop()
 	}

diff --git a/rw_core/core/core.go b/rw_core/core/core.go
index 7cf9f98..0dbecc8 100644
--- a/rw_core/core/core.go
+++ b/rw_core/core/core.go

@@ -18,7 +18,7 @@
 
 import (
 	"context"
-	"sync"
+	"strconv"
 	"time"
 
 	"github.com/opencord/voltha-go/db/model"
@@ -26,446 +26,150 @@
 	"github.com/opencord/voltha-go/rw_core/core/adapter"
 	"github.com/opencord/voltha-go/rw_core/core/api"
 	"github.com/opencord/voltha-go/rw_core/core/device"
+	conf "github.com/opencord/voltha-lib-go/v3/pkg/config"
 	"github.com/opencord/voltha-lib-go/v3/pkg/db"
-	"github.com/opencord/voltha-lib-go/v3/pkg/db/kvstore"
 	grpcserver "github.com/opencord/voltha-lib-go/v3/pkg/grpc"
 	"github.com/opencord/voltha-lib-go/v3/pkg/kafka"
 	"github.com/opencord/voltha-lib-go/v3/pkg/log"
 	"github.com/opencord/voltha-lib-go/v3/pkg/probe"
 	"github.com/opencord/voltha-protos/v3/go/voltha"
 	"google.golang.org/grpc"
-	"google.golang.org/grpc/codes"
-	"google.golang.org/grpc/status"
 )
 
 // Core represent read,write core attributes
 type Core struct {
-	instanceID        string
-	deviceMgr         *device.Manager
-	logicalDeviceMgr  *device.LogicalManager
-	grpcServer        *grpcserver.GrpcServer
-	grpcNBIAPIHandler *api.NBIHandler
-	adapterMgr        *adapter.Manager
-	config            *config.RWCoreFlags
-	kmp               kafka.InterContainerProxy
-	clusterDataProxy  *model.Proxy
-	localDataProxy    *model.Proxy
-	exitChannel       chan struct{}
-	stopOnce          sync.Once
-	kvClient          kvstore.Client
-	backend           db.Backend
-	kafkaClient       kafka.Client
+	shutdown context.CancelFunc
+	stopped  chan struct{}
 }
 
 // NewCore creates instance of rw core
-func NewCore(ctx context.Context, id string, cf *config.RWCoreFlags, kvClient kvstore.Client, kafkaClient kafka.Client) *Core {
-	var core Core
-	core.instanceID = id
-	core.exitChannel = make(chan struct{})
-	core.config = cf
-	core.kvClient = kvClient
-	core.kafkaClient = kafkaClient
-
-	// Configure backend to push Liveness Status at least every (cf.LiveProbeInterval / 2) seconds
-	// so as to avoid trigger of Liveness check (due to Liveness timeout) when backend is alive
-	livenessChannelInterval := cf.LiveProbeInterval / 2
-
-	// Setup the KV store
-	core.backend = db.Backend{
-		Client:                  kvClient,
-		StoreType:               cf.KVStoreType,
-		Host:                    cf.KVStoreHost,
-		Port:                    cf.KVStorePort,
-		Timeout:                 cf.KVStoreTimeout,
-		LivenessChannelInterval: livenessChannelInterval,
-		PathPrefix:              cf.KVStoreDataPrefix}
-	return &core
-}
-
-// Start brings up core services
-func (core *Core) Start(ctx context.Context) error {
-
+func NewCore(ctx context.Context, id string, cf *config.RWCoreFlags) *Core {
 	// If the context has a probe then fetch it and register our services
-	var p *probe.Probe
-	if value := ctx.Value(probe.ProbeContextKey); value != nil {
-		if _, ok := value.(*probe.Probe); ok {
-			p = value.(*probe.Probe)
-			p.RegisterService(
-				"message-bus",
-				"kv-store",
-				"device-manager",
-				"logical-device-manager",
-				"adapter-manager",
-				"grpc-service",
-			)
-		}
-	}
-
-	logger.Info("starting-core-services", log.Fields{"coreId": core.instanceID})
-
-	// Wait until connection to KV Store is up
-	if err := core.waitUntilKVStoreReachableOrMaxTries(ctx, core.config.MaxConnectionRetries, core.config.ConnectionRetryInterval); err != nil {
-		logger.Fatal("Unable-to-connect-to-KV-store")
-	}
-	if p != nil {
-		p.UpdateStatus("kv-store", probe.ServiceStatusRunning)
-	}
-
-	endpointMgr := kafka.NewEndpointManager(&core.backend)
-
-	core.clusterDataProxy = model.NewProxy(&core.backend, "/")
-	core.localDataProxy = model.NewProxy(&core.backend, "/")
-
-	// core.kmp must be created before deviceMgr and adapterMgr, as they will make
-	// private copies of the poiner to core.kmp.
-	core.initKafkaManager(ctx)
-
-	logger.Debugw("values", log.Fields{"kmp": core.kmp})
-	core.adapterMgr = adapter.NewAdapterManager(core.clusterDataProxy, core.instanceID, core.kafkaClient)
-	core.deviceMgr, core.logicalDeviceMgr = device.NewManagers(core.clusterDataProxy, core.adapterMgr, core.kmp, endpointMgr, core.config.CorePairTopic, core.instanceID, core.config.DefaultCoreTimeout)
-
-	// Start the KafkaManager. This must be done after the deviceMgr, adapterMgr, and
-	// logicalDeviceMgr have been created, as once the kmp is started, it will register
-	// the above with the kmp.
-
-	go core.startKafkaManager(ctx,
-		core.config.ConnectionRetryInterval,
-		core.config.LiveProbeInterval,
-		core.config.NotLiveProbeInterval)
-
-	go core.startDeviceManager(ctx)
-	go core.startLogicalDeviceManager(ctx)
-	go core.startGRPCService(ctx)
-	go core.startAdapterManager(ctx)
-	go core.monitorKvstoreLiveness(ctx)
-
-	logger.Info("core-services-started")
-	return nil
-}
-
-// Stop brings down core services
-func (core *Core) Stop(ctx context.Context) {
-	core.stopOnce.Do(func() {
-		logger.Info("stopping-adaptercore")
-		// Signal to the KVStoreMonitor that we are stopping.
-		close(core.exitChannel)
-		// Stop all the started services
-		if core.grpcServer != nil {
-			core.grpcServer.Stop()
-		}
-		if core.logicalDeviceMgr != nil {
-			core.logicalDeviceMgr.Stop(ctx)
-		}
-		if core.deviceMgr != nil {
-			core.deviceMgr.Stop(ctx)
-		}
-		if core.kmp != nil {
-			core.kmp.Stop()
-		}
-		logger.Info("adaptercore-stopped")
-	})
-}
-
-//startGRPCService creates the grpc service handlers, registers it to the grpc server and starts the server
-func (core *Core) startGRPCService(ctx context.Context) {
-	//	create an insecure gserver server
-	core.grpcServer = grpcserver.NewGrpcServer(core.config.GrpcHost, core.config.GrpcPort, nil, false, probe.GetProbeFromContext(ctx))
-	logger.Info("grpc-server-created")
-
-	core.grpcNBIAPIHandler = api.NewNBIHandler(core.deviceMgr, core.logicalDeviceMgr, core.adapterMgr)
-	logger.Infow("grpc-handler", log.Fields{"core_binding_key": core.config.CoreBindingKey})
-	//	Create a function to register the core GRPC service with the GRPC server
-	f := func(gs *grpc.Server) {
-		voltha.RegisterVolthaServiceServer(
-			gs,
-			core.grpcNBIAPIHandler,
+	if p := probe.GetProbeFromContext(ctx); p != nil {
+		p.RegisterService(
+			"message-bus",
+			"kv-store",
+			"adapter-manager",
+			"grpc-service",
 		)
 	}
 
-	core.grpcServer.AddService(f)
+	// new threads will be given a new cancelable context, so that they can be aborted later when Stop() is called
+	shutdownCtx, cancelCtx := context.WithCancel(ctx)
+
+	core := &Core{shutdown: cancelCtx, stopped: make(chan struct{})}
+	go core.start(shutdownCtx, id, cf)
+	return core
+}
+
+func (core *Core) start(ctx context.Context, id string, cf *config.RWCoreFlags) {
+	logger.Info("starting-core-services", log.Fields{"coreId": id})
+
+	// deferred functions are used to run cleanup
+	// failing partway will stop anything that's been started
+	defer close(core.stopped)
+	defer core.shutdown()
+
+	logger.Info("Starting RW Core components")
+
+	// setup kv client
+	logger.Debugw("create-kv-client", log.Fields{"kvstore": cf.KVStoreType})
+	kvClient, err := newKVClient(cf.KVStoreType, cf.KVStoreHost+":"+strconv.Itoa(cf.KVStorePort), cf.KVStoreTimeout)
+	if err != nil {
+		logger.Fatal(err)
+	}
+	defer stopKVClient(context.Background(), kvClient)
+
+	// sync logging config with kv store
+	cm := conf.NewConfigManager(kvClient, cf.KVStoreType, cf.KVStoreHost, cf.KVStorePort, cf.KVStoreTimeout)
+	go conf.StartLogLevelConfigProcessing(cm, ctx)
+
+	backend := &db.Backend{
+		Client:    kvClient,
+		StoreType: cf.KVStoreType,
+		Host:      cf.KVStoreHost,
+		Port:      cf.KVStorePort,
+		Timeout:   cf.KVStoreTimeout,
+		// Configure backend to push Liveness Status at least every (cf.LiveProbeInterval / 2) seconds
+		// so as to avoid trigger of Liveness check (due to Liveness timeout) when backend is alive
+		LivenessChannelInterval: cf.LiveProbeInterval / 2,
+		PathPrefix:              cf.KVStoreDataPrefix,
+	}
+
+	// wait until connection to KV Store is up
+	if err := waitUntilKVStoreReachableOrMaxTries(ctx, kvClient, cf.MaxConnectionRetries, cf.ConnectionRetryInterval); err != nil {
+		logger.Fatal("Unable-to-connect-to-KV-store")
+	}
+	go monitorKVStoreLiveness(ctx, backend, cf.LiveProbeInterval, cf.NotLiveProbeInterval)
+
+	// create kafka client
+	kafkaClient := kafka.NewSaramaClient(
+		kafka.Host(cf.KafkaAdapterHost),
+		kafka.Port(cf.KafkaAdapterPort),
+		kafka.ConsumerType(kafka.GroupCustomer),
+		kafka.ProducerReturnOnErrors(true),
+		kafka.ProducerReturnOnSuccess(true),
+		kafka.ProducerMaxRetries(6),
+		kafka.NumPartitions(3),
+		kafka.ConsumerGroupName(id),
+		kafka.ConsumerGroupPrefix(id),
+		kafka.AutoCreateTopic(true),
+		kafka.ProducerFlushFrequency(5),
+		kafka.ProducerRetryBackoff(time.Millisecond*30),
+		kafka.LivenessChannelInterval(cf.LiveProbeInterval/2),
+	)
+	// defer kafkaClient.Stop()
+
+	// create kv proxy
+	proxy := model.NewProxy(backend, "/")
+
+	// load adapters & device types while other things are starting
+	adapterMgr := adapter.NewAdapterManager(proxy, id, kafkaClient)
+	go adapterMgr.Start(ctx)
+
+	// connect to kafka, then wait until reachable and publisher/consumer created
+	// core.kmp must be created before deviceMgr and adapterMgr
+	kmp, err := startKafkInterContainerProxy(ctx, kafkaClient, cf.KafkaAdapterHost, cf.KafkaAdapterPort, cf.CoreTopic, cf.AffinityRouterTopic, cf.ConnectionRetryInterval)
+	if err != nil {
+		logger.Warn("Failed to setup kafka connection")
+		return
+	}
+	defer kmp.Stop()
+	go monitorKafkaLiveness(ctx, kmp, cf.LiveProbeInterval, cf.NotLiveProbeInterval)
+
+	// create the core of the system, the device managers
+	endpointMgr := kafka.NewEndpointManager(backend)
+	deviceMgr, logicalDeviceMgr := device.NewManagers(proxy, adapterMgr, kmp, endpointMgr, cf.CorePairTopic, id, cf.DefaultCoreTimeout)
+
+	// register kafka RPC handler
+	registerAdapterRequestHandlers(kmp, deviceMgr, adapterMgr, cf.CoreTopic, cf.CorePairTopic)
+
+	// start gRPC handler
+	grpcServer := grpcserver.NewGrpcServer(cf.GrpcHost, cf.GrpcPort, nil, false, probe.GetProbeFromContext(ctx))
+	go startGRPCService(ctx, grpcServer, api.NewNBIHandler(deviceMgr, logicalDeviceMgr, adapterMgr))
+	defer grpcServer.Stop()
+
+	// wait for core to be stopped, via Stop() or context cancellation, before running deferred functions
+	<-ctx.Done()
+}
+
+// Stop brings down core services
+func (core *Core) Stop() {
+	core.shutdown()
+	<-core.stopped
+}
+
+// startGRPCService creates the grpc service handlers, registers it to the grpc server and starts the server
+func startGRPCService(ctx context.Context, server *grpcserver.GrpcServer, handler voltha.VolthaServiceServer) {
+	logger.Info("grpc-server-created")
+
+	server.AddService(func(gs *grpc.Server) { voltha.RegisterVolthaServiceServer(gs, handler) })
 	logger.Info("grpc-service-added")
 
-	/*
-	 * Start the GRPC server
-	 *
-	 * This is a bit sub-optimal here as the grpcServer.Start call does not return (blocks)
-	 * until something fails, but we want to send a "start" status update. As written this
-	 * means that we are actually sending the "start" status update before the server is
-	 * started, which means it is possible that the status is "running" before it actually is.
-	 *
-	 * This means that there is a small window in which the core could return its status as
-	 * ready, when it really isn't.
-	 */
 	probe.UpdateStatusFromContext(ctx, "grpc-service", probe.ServiceStatusRunning)
 	logger.Info("grpc-server-started")
-	core.grpcServer.Start(ctx)
+	// Note that there is a small window here in which the core could return its status as ready,
+	// when it really isn't.  This is unlikely to cause issues, as the delay is incredibly short.
+	server.Start(ctx)
 	probe.UpdateStatusFromContext(ctx, "grpc-service", probe.ServiceStatusStopped)
 }
-
-// Initialize the kafka manager, but we will start it later
-func (core *Core) initKafkaManager(ctx context.Context) {
-	logger.Infow("initialize-kafka-manager", log.Fields{"host": core.config.KafkaAdapterHost,
-		"port": core.config.KafkaAdapterPort, "topic": core.config.CoreTopic})
-
-	probe.UpdateStatusFromContext(ctx, "message-bus", probe.ServiceStatusPreparing)
-
-	// create the proxy
-	core.kmp = kafka.NewInterContainerProxy(
-		kafka.InterContainerHost(core.config.KafkaAdapterHost),
-		kafka.InterContainerPort(core.config.KafkaAdapterPort),
-		kafka.MsgClient(core.kafkaClient),
-		kafka.DefaultTopic(&kafka.Topic{Name: core.config.CoreTopic}),
-		kafka.DeviceDiscoveryTopic(&kafka.Topic{Name: core.config.AffinityRouterTopic}))
-
-	probe.UpdateStatusFromContext(ctx, "message-bus", probe.ServiceStatusPrepared)
-}
-
-/*
- * KafkaMonitorThread
- *
- * Responsible for starting the Kafka Interadapter Proxy and monitoring its liveness
- * state.
- *
- * Any producer that fails to send will cause KafkaInterContainerProxy to
- * post a false event on its liveness channel. Any producer that succeeds in sending
- * will cause KafkaInterContainerProxy to post a true event on its liveness
- * channel. Group receivers also update liveness state, and a receiver will typically
- * indicate a loss of liveness within 3-5 seconds of Kafka going down. Receivers
- * only indicate restoration of liveness if a message is received. During normal
- * operation, messages will be routinely produced and received, automatically
- * indicating liveness state. These routine liveness indications are rate-limited
- * inside sarama_client.
- *
- * This thread monitors the status of KafkaInterContainerProxy's liveness and pushes
- * that state to the core's readiness probes. If no liveness event has been seen
- * within a timeout, then the thread will make an attempt to produce a "liveness"
- * message, which will in turn trigger a liveness event on the liveness channel, true
- * or false depending on whether the attempt succeeded.
- *
- * The gRPC server in turn monitors the state of the readiness probe and will
- * start issuing UNAVAILABLE response while the probe is not ready.
- *
- * startupRetryInterval -- interval between attempts to start
- * liveProbeInterval -- interval between liveness checks when in a live state
- * notLiveProbeInterval -- interval between liveness checks when in a notLive state
- *
- * liveProbeInterval and notLiveProbeInterval can be configured separately,
- * though the current default is that both are set to 60 seconds.
- */
-
-func (core *Core) startKafkaManager(ctx context.Context, startupRetryInterval time.Duration, liveProbeInterval time.Duration, notLiveProbeInterval time.Duration) {
-	logger.Infow("starting-kafka-manager-thread", log.Fields{"host": core.config.KafkaAdapterHost,
-		"port": core.config.KafkaAdapterPort, "topic": core.config.CoreTopic})
-
-	started := false
-	for !started {
-		// If we haven't started yet, then try to start
-		logger.Infow("starting-kafka-proxy", log.Fields{})
-		if err := core.kmp.Start(); err != nil {
-			// We failed to start. Delay and then try again later.
-			// Don't worry about liveness, as we can't be live until we've started.
-			probe.UpdateStatusFromContext(ctx, "message-bus", probe.ServiceStatusNotReady)
-			logger.Infow("error-starting-kafka-messaging-proxy", log.Fields{"error": err})
-			time.Sleep(startupRetryInterval)
-		} else {
-			// We started. We only need to do this once.
-			// Next we'll fall through and start checking liveness.
-			logger.Infow("started-kafka-proxy", log.Fields{})
-
-			// cannot do this until after the kmp is started
-			if err := core.registerAdapterRequestHandlers(ctx, core.instanceID, core.deviceMgr, core.logicalDeviceMgr, core.adapterMgr, core.clusterDataProxy, core.localDataProxy); err != nil {
-				logger.Fatal("Failure-registering-adapterRequestHandler")
-			}
-
-			started = true
-		}
-	}
-
-	logger.Info("started-kafka-message-proxy")
-
-	livenessChannel := core.kmp.EnableLivenessChannel(true)
-
-	logger.Info("enabled-kafka-liveness-channel")
-
-	timeout := liveProbeInterval
-	for {
-		timeoutTimer := time.NewTimer(timeout)
-		select {
-		case liveness := <-livenessChannel:
-			logger.Infow("kafka-manager-thread-liveness-event", log.Fields{"liveness": liveness})
-			// there was a state change in Kafka liveness
-			if !liveness {
-				probe.UpdateStatusFromContext(ctx, "message-bus", probe.ServiceStatusNotReady)
-
-				if core.grpcServer != nil {
-					logger.Info("kafka-manager-thread-set-server-notready")
-				}
-
-				// retry frequently while life is bad
-				timeout = notLiveProbeInterval
-			} else {
-				probe.UpdateStatusFromContext(ctx, "message-bus", probe.ServiceStatusRunning)
-
-				if core.grpcServer != nil {
-					logger.Info("kafka-manager-thread-set-server-ready")
-				}
-
-				// retry infrequently while life is good
-				timeout = liveProbeInterval
-			}
-			if !timeoutTimer.Stop() {
-				<-timeoutTimer.C
-			}
-		case <-timeoutTimer.C:
-			logger.Info("kafka-proxy-liveness-recheck")
-			// send the liveness probe in a goroutine; we don't want to deadlock ourselves as
-			// the liveness probe may wait (and block) writing to our channel.
-			go func() {
-				err := core.kmp.SendLiveness()
-				if err != nil {
-					// Catch possible error case if sending liveness after Sarama has been stopped.
-					logger.Warnw("error-kafka-send-liveness", log.Fields{"error": err})
-				}
-			}()
-		}
-	}
-}
-
-// waitUntilKVStoreReachableOrMaxTries will wait until it can connect to a KV store or until maxtries has been reached
-func (core *Core) waitUntilKVStoreReachableOrMaxTries(ctx context.Context, maxRetries int, retryInterval time.Duration) error {
-	logger.Infow("verifying-KV-store-connectivity", log.Fields{"host": core.config.KVStoreHost,
-		"port": core.config.KVStorePort, "retries": maxRetries, "retryInterval": retryInterval})
-	count := 0
-	for {
-		if !core.kvClient.IsConnectionUp(ctx) {
-			logger.Info("KV-store-unreachable")
-			if maxRetries != -1 {
-				if count >= maxRetries {
-					return status.Error(codes.Unavailable, "kv store unreachable")
-				}
-			}
-			count++
-			//	Take a nap before retrying
-			time.Sleep(retryInterval)
-			logger.Infow("retry-KV-store-connectivity", log.Fields{"retryCount": count, "maxRetries": maxRetries, "retryInterval": retryInterval})
-
-		} else {
-			break
-		}
-	}
-	logger.Info("KV-store-reachable")
-	return nil
-}
-
-func (core *Core) registerAdapterRequestHandlers(ctx context.Context, coreInstanceID string, dMgr *device.Manager,
-	ldMgr *device.LogicalManager, aMgr *adapter.Manager, cdProxy *model.Proxy, ldProxy *model.Proxy,
-) error {
-	requestProxy := api.NewAdapterRequestHandlerProxy(coreInstanceID, dMgr, aMgr, cdProxy, ldProxy,
-		core.config.LongRunningRequestTimeout, core.config.DefaultRequestTimeout)
-
-	// Register the broadcast topic to handle any core-bound broadcast requests
-	if err := core.kmp.SubscribeWithRequestHandlerInterface(kafka.Topic{Name: core.config.CoreTopic}, requestProxy); err != nil {
-		logger.Fatalw("Failed-registering-broadcast-handler", log.Fields{"topic": core.config.CoreTopic})
-		return err
-	}
-
-	// Register the core-pair topic to handle core-bound requests destined to the core pair
-	if err := core.kmp.SubscribeWithDefaultRequestHandler(kafka.Topic{Name: core.config.CorePairTopic}, kafka.OffsetNewest); err != nil {
-		logger.Fatalw("Failed-registering-pair-handler", log.Fields{"topic": core.config.CorePairTopic})
-		return err
-	}
-
-	logger.Info("request-handler-registered")
-	return nil
-}
-
-func (core *Core) startDeviceManager(ctx context.Context) {
-	logger.Info("DeviceManager-Starting...")
-	core.deviceMgr.Start(ctx)
-	logger.Info("DeviceManager-Started")
-}
-
-func (core *Core) startLogicalDeviceManager(ctx context.Context) {
-	logger.Info("Logical-DeviceManager-Starting...")
-	core.logicalDeviceMgr.Start(ctx)
-	logger.Info("Logical-DeviceManager-Started")
-}
-
-func (core *Core) startAdapterManager(ctx context.Context) {
-	logger.Info("Adapter-Manager-Starting...")
-	err := core.adapterMgr.Start(ctx)
-	if err != nil {
-		logger.Fatalf("failed-to-start-adapter-manager: error %v ", err)
-	}
-	logger.Info("Adapter-Manager-Started")
-}
-
-/*
-* Thread to monitor kvstore Liveness (connection status)
-*
-* This function constantly monitors Liveness State of kvstore as reported
-* periodically by backend and updates the Status of kv-store service registered
-* with rw_core probe.
-*
-* If no liveness event has been seen within a timeout, then the thread will
-* perform a "liveness" check attempt, which will in turn trigger a liveness event on
-* the liveness channel, true or false depending on whether the attempt succeeded.
-*
-* The gRPC server in turn monitors the state of the readiness probe and will
-* start issuing UNAVAILABLE response while the probe is not ready.
- */
-func (core *Core) monitorKvstoreLiveness(ctx context.Context) {
-	logger.Info("start-monitoring-kvstore-liveness")
-
-	// Instruct backend to create Liveness channel for transporting state updates
-	livenessChannel := core.backend.EnableLivenessChannel()
-
-	logger.Debug("enabled-kvstore-liveness-channel")
-
-	// Default state for kvstore is alive for rw_core
-	timeout := core.config.LiveProbeInterval
-loop:
-	for {
-		timeoutTimer := time.NewTimer(timeout)
-		select {
-
-		case liveness := <-livenessChannel:
-			logger.Debugw("received-liveness-change-notification", log.Fields{"liveness": liveness})
-
-			if !liveness {
-				probe.UpdateStatusFromContext(ctx, "kv-store", probe.ServiceStatusNotReady)
-
-				if core.grpcServer != nil {
-					logger.Info("kvstore-set-server-notready")
-				}
-
-				timeout = core.config.NotLiveProbeInterval
-
-			} else {
-				probe.UpdateStatusFromContext(ctx, "kv-store", probe.ServiceStatusRunning)
-
-				if core.grpcServer != nil {
-					logger.Info("kvstore-set-server-ready")
-				}
-
-				timeout = core.config.LiveProbeInterval
-			}
-
-			if !timeoutTimer.Stop() {
-				<-timeoutTimer.C
-			}
-
-		case <-core.exitChannel:
-			break loop
-
-		case <-timeoutTimer.C:
-			logger.Info("kvstore-perform-liveness-check-on-timeout")
-
-			// Trigger Liveness check if no liveness update received within the timeout period.
-			// The Liveness check will push Live state to same channel which this routine is
-			// reading and processing. This, do it asynchronously to avoid blocking for
-			// backend response and avoid any possibility of deadlock
-			go core.backend.PerformLivenessCheck(ctx)
-		}
-	}
-}

diff --git a/rw_core/core/device/agent_test.go b/rw_core/core/device/agent_test.go
index 8b003b4..2abfdeb 100755
--- a/rw_core/core/device/agent_test.go
+++ b/rw_core/core/device/agent_test.go

@@ -144,23 +144,13 @@
 	if err = dat.kmp.Start(); err != nil {
 		logger.Fatal("Cannot start InterContainerProxy")
 	}
-	if err = adapterMgr.Start(context.Background()); err != nil {
-		logger.Fatal("Cannot start adapterMgr")
-	}
-	dat.deviceMgr.Start(context.Background())
-	dat.logicalDeviceMgr.Start(context.Background())
+	adapterMgr.Start(context.Background())
 }
 
 func (dat *DATest) stopAll() {
 	if dat.kClient != nil {
 		dat.kClient.Stop()
 	}
-	if dat.logicalDeviceMgr != nil {
-		dat.logicalDeviceMgr.Stop(context.Background())
-	}
-	if dat.deviceMgr != nil {
-		dat.deviceMgr.Stop(context.Background())
-	}
 	if dat.kmp != nil {
 		dat.kmp.Stop()
 	}

diff --git a/rw_core/core/device/logical_agent_test.go b/rw_core/core/device/logical_agent_test.go
index 64c42b5..e562400 100644
--- a/rw_core/core/device/logical_agent_test.go
+++ b/rw_core/core/device/logical_agent_test.go

@@ -487,23 +487,13 @@
 	if err = lda.kmp.Start(); err != nil {
 		logger.Fatal("Cannot start InterContainerProxy")
 	}
-	if err = adapterMgr.Start(context.Background()); err != nil {
-		logger.Fatal("Cannot start adapterMgr")
-	}
-	lda.deviceMgr.Start(context.Background())
-	lda.logicalDeviceMgr.Start(context.Background())
+	adapterMgr.Start(context.Background())
 }
 
 func (lda *LDATest) stopAll() {
 	if lda.kClient != nil {
 		lda.kClient.Stop()
 	}
-	if lda.logicalDeviceMgr != nil {
-		lda.logicalDeviceMgr.Stop(context.Background())
-	}
-	if lda.deviceMgr != nil {
-		lda.deviceMgr.Stop(context.Background())
-	}
 	if lda.kmp != nil {
 		lda.kmp.Stop()
 	}

diff --git a/rw_core/core/device/logical_manager.go b/rw_core/core/device/logical_manager.go
index a5c47b9..53c3759 100644
--- a/rw_core/core/device/logical_manager.go
+++ b/rw_core/core/device/logical_manager.go

@@ -30,7 +30,6 @@
 	"github.com/opencord/voltha-go/db/model"
 	"github.com/opencord/voltha-lib-go/v3/pkg/kafka"
 	"github.com/opencord/voltha-lib-go/v3/pkg/log"
-	"github.com/opencord/voltha-lib-go/v3/pkg/probe"
 	"github.com/opencord/voltha-protos/v3/go/openflow_13"
 	"github.com/opencord/voltha-protos/v3/go/voltha"
 	"google.golang.org/grpc/codes"
@@ -44,25 +43,11 @@
 	deviceMgr                      *Manager
 	kafkaICProxy                   kafka.InterContainerProxy
 	clusterDataProxy               *model.Proxy
-	exitChannel                    chan int
 	defaultTimeout                 time.Duration
 	logicalDevicesLoadingLock      sync.RWMutex
 	logicalDeviceLoadingInProgress map[string][]chan int
 }
 
-func (ldMgr *LogicalManager) Start(ctx context.Context) {
-	logger.Info("starting-logical-device-manager")
-	probe.UpdateStatusFromContext(ctx, "logical-device-manager", probe.ServiceStatusRunning)
-	logger.Info("logical-device-manager-started")
-}
-
-func (ldMgr *LogicalManager) Stop(ctx context.Context) {
-	logger.Info("stopping-logical-device-manager")
-	ldMgr.exitChannel <- 1
-	probe.UpdateStatusFromContext(ctx, "logical-device-manager", probe.ServiceStatusStopped)
-	logger.Info("logical-device-manager-stopped")
-}
-
 func (ldMgr *LogicalManager) addLogicalDeviceAgentToMap(agent *LogicalAgent) {
 	if _, exist := ldMgr.logicalDeviceAgents.Load(agent.logicalDeviceID); !exist {
 		ldMgr.logicalDeviceAgents.Store(agent.logicalDeviceID, agent)

diff --git a/rw_core/core/device/manager.go b/rw_core/core/device/manager.go
index b0128a5..9990104 100755
--- a/rw_core/core/device/manager.go
+++ b/rw_core/core/device/manager.go

@@ -32,7 +32,6 @@
 	"github.com/opencord/voltha-go/rw_core/utils"
 	"github.com/opencord/voltha-lib-go/v3/pkg/kafka"
 	"github.com/opencord/voltha-lib-go/v3/pkg/log"
-	"github.com/opencord/voltha-lib-go/v3/pkg/probe"
 	"github.com/opencord/voltha-protos/v3/go/common"
 	ic "github.com/opencord/voltha-protos/v3/go/inter_container"
 	ofp "github.com/opencord/voltha-protos/v3/go/openflow_13"
@@ -53,7 +52,6 @@
 	stateTransitions        *TransitionMap
 	clusterDataProxy        *model.Proxy
 	coreInstanceID          string
-	exitChannel             chan int
 	defaultTimeout          time.Duration
 	devicesLoadingLock      sync.RWMutex
 	deviceLoadingInProgress map[string][]chan int
@@ -61,7 +59,6 @@
 
 func NewManagers(proxy *model.Proxy, adapterMgr *adapter.Manager, kmp kafka.InterContainerProxy, endpointMgr kafka.EndpointManager, corePairTopic, coreInstanceID string, defaultCoreTimeout time.Duration) (*Manager, *LogicalManager) {
 	deviceMgr := &Manager{
-		exitChannel:             make(chan int, 1),
 		rootDevices:             make(map[string]bool),
 		kafkaICProxy:            kmp,
 		adapterProxy:            remote.NewAdapterProxy(kmp, corePairTopic, endpointMgr),
@@ -71,9 +68,10 @@
 		defaultTimeout:          defaultCoreTimeout * time.Millisecond,
 		deviceLoadingInProgress: make(map[string][]chan int),
 	}
+	deviceMgr.stateTransitions = NewTransitionMap(deviceMgr)
+
 	logicalDeviceMgr := &LogicalManager{
 		Manager:                        event.NewManager(),
-		exitChannel:                    make(chan int, 1),
 		deviceMgr:                      deviceMgr,
 		kafkaICProxy:                   kmp,
 		clusterDataProxy:               proxy,
@@ -87,20 +85,6 @@
 	return deviceMgr, logicalDeviceMgr
 }
 
-func (dMgr *Manager) Start(ctx context.Context) {
-	logger.Info("starting-device-manager")
-	dMgr.stateTransitions = NewTransitionMap(dMgr)
-	probe.UpdateStatusFromContext(ctx, "device-manager", probe.ServiceStatusRunning)
-	logger.Info("device-manager-started")
-}
-
-func (dMgr *Manager) Stop(ctx context.Context) {
-	logger.Info("stopping-device-manager")
-	dMgr.exitChannel <- 1
-	probe.UpdateStatusFromContext(ctx, "device-manager", probe.ServiceStatusStopped)
-	logger.Info("device-manager-stopped")
-}
-
 func (dMgr *Manager) addDeviceAgentToMap(agent *Agent) {
 	if _, exist := dMgr.deviceAgents.Load(agent.deviceID); !exist {
 		dMgr.deviceAgents.Store(agent.deviceID, agent)

diff --git a/rw_core/core/kafka.go b/rw_core/core/kafka.go
new file mode 100644
index 0000000..fcdf340
--- /dev/null
+++ b/rw_core/core/kafka.go

@@ -0,0 +1,164 @@
+/*
+ * Copyright 2018-present Open Networking Foundation
+
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+
+ * http://www.apache.org/licenses/LICENSE-2.0
+
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package core
+
+import (
+	"context"
+	"time"
+
+	"github.com/opencord/voltha-go/rw_core/core/adapter"
+	"github.com/opencord/voltha-go/rw_core/core/api"
+	"github.com/opencord/voltha-go/rw_core/core/device"
+	"github.com/opencord/voltha-lib-go/v3/pkg/kafka"
+	"github.com/opencord/voltha-lib-go/v3/pkg/log"
+	"github.com/opencord/voltha-lib-go/v3/pkg/probe"
+)
+
+// startKafkInterContainerProxy is responsible for starting the Kafka Interadapter Proxy
+func startKafkInterContainerProxy(ctx context.Context, kafkaClient kafka.Client, host string, port int, coreTopic, affinityRouterTopic string, connectionRetryInterval time.Duration) (kafka.InterContainerProxy, error) {
+	logger.Infow("initialize-kafka-manager", log.Fields{"host": host, "port": port, "topic": coreTopic})
+
+	probe.UpdateStatusFromContext(ctx, "message-bus", probe.ServiceStatusPreparing)
+
+	// create the kafka RPC proxy
+	kmp := kafka.NewInterContainerProxy(
+		kafka.InterContainerHost(host),
+		kafka.InterContainerPort(port),
+		kafka.MsgClient(kafkaClient),
+		kafka.DefaultTopic(&kafka.Topic{Name: coreTopic}),
+		kafka.DeviceDiscoveryTopic(&kafka.Topic{Name: affinityRouterTopic}))
+
+	probe.UpdateStatusFromContext(ctx, "message-bus", probe.ServiceStatusPrepared)
+
+	// wait for connectivity
+	logger.Infow("starting-kafka-manager", log.Fields{"host": host,
+		"port": port, "topic": coreTopic})
+
+	for {
+		// If we haven't started yet, then try to start
+		logger.Infow("starting-kafka-proxy", log.Fields{})
+		if err := kmp.Start(); err != nil {
+			// We failed to start. Delay and then try again later.
+			// Don't worry about liveness, as we can't be live until we've started.
+			probe.UpdateStatusFromContext(ctx, "message-bus", probe.ServiceStatusNotReady)
+			logger.Infow("error-starting-kafka-messaging-proxy", log.Fields{"error": err})
+			select {
+			case <-time.After(connectionRetryInterval):
+			case <-ctx.Done():
+				return nil, ctx.Err()
+			}
+			continue
+		}
+		// We started. We only need to do this once.
+		// Next we'll fall through and start checking liveness.
+		logger.Infow("started-kafka-proxy", log.Fields{})
+		break
+	}
+	return kmp, nil
+}
+
+/*
+ * monitorKafkaLiveness is responsible for monitoring the Kafka Interadapter Proxy connectivity state
+ *
+ * Any producer that fails to send will cause KafkaInterContainerProxy to
+ * post a false event on its liveness channel. Any producer that succeeds in sending
+ * will cause KafkaInterContainerProxy to post a true event on its liveness
+ * channel. Group receivers also update liveness state, and a receiver will typically
+ * indicate a loss of liveness within 3-5 seconds of Kafka going down. Receivers
+ * only indicate restoration of liveness if a message is received. During normal
+ * operation, messages will be routinely produced and received, automatically
+ * indicating liveness state. These routine liveness indications are rate-limited
+ * inside sarama_client.
+ *
+ * This thread monitors the status of KafkaInterContainerProxy's liveness and pushes
+ * that state to the core's readiness probes. If no liveness event has been seen
+ * within a timeout, then the thread will make an attempt to produce a "liveness"
+ * message, which will in turn trigger a liveness event on the liveness channel, true
+ * or false depending on whether the attempt succeeded.
+ *
+ * The gRPC server in turn monitors the state of the readiness probe and will
+ * start issuing UNAVAILABLE response while the probe is not ready.
+ *
+ * startupRetryInterval -- interval between attempts to start
+ * liveProbeInterval -- interval between liveness checks when in a live state
+ * notLiveProbeInterval -- interval between liveness checks when in a notLive state
+ *
+ * liveProbeInterval and notLiveProbeInterval can be configured separately,
+ * though the current default is that both are set to 60 seconds.
+ */
+func monitorKafkaLiveness(ctx context.Context, kmp kafka.InterContainerProxy, liveProbeInterval time.Duration, notLiveProbeInterval time.Duration) {
+	logger.Info("started-kafka-message-proxy")
+
+	livenessChannel := kmp.EnableLivenessChannel(true)
+
+	logger.Info("enabled-kafka-liveness-channel")
+
+	timeout := liveProbeInterval
+	for {
+		timeoutTimer := time.NewTimer(timeout)
+		select {
+		case liveness := <-livenessChannel:
+			logger.Infow("kafka-manager-thread-liveness-event", log.Fields{"liveness": liveness})
+			// there was a state change in Kafka liveness
+			if !liveness {
+				probe.UpdateStatusFromContext(ctx, "message-bus", probe.ServiceStatusNotReady)
+				logger.Info("kafka-manager-thread-set-server-notready")
+
+				// retry frequently while life is bad
+				timeout = notLiveProbeInterval
+			} else {
+				probe.UpdateStatusFromContext(ctx, "message-bus", probe.ServiceStatusRunning)
+				logger.Info("kafka-manager-thread-set-server-ready")
+
+				// retry infrequently while life is good
+				timeout = liveProbeInterval
+			}
+			if !timeoutTimer.Stop() {
+				<-timeoutTimer.C
+			}
+		case <-timeoutTimer.C:
+			logger.Info("kafka-proxy-liveness-recheck")
+			// send the liveness probe in a goroutine; we don't want to deadlock ourselves as
+			// the liveness probe may wait (and block) writing to our channel.
+			go func() {
+				err := kmp.SendLiveness()
+				if err != nil {
+					// Catch possible error case if sending liveness after Sarama has been stopped.
+					logger.Warnw("error-kafka-send-liveness", log.Fields{"error": err})
+				}
+			}()
+		case <-ctx.Done():
+			return // just exit
+		}
+	}
+}
+
+func registerAdapterRequestHandlers(kmp kafka.InterContainerProxy, dMgr *device.Manager, aMgr *adapter.Manager, coreTopic, corePairTopic string) {
+	requestProxy := api.NewAdapterRequestHandlerProxy(dMgr, aMgr)
+
+	// Register the broadcast topic to handle any core-bound broadcast requests
+	if err := kmp.SubscribeWithRequestHandlerInterface(kafka.Topic{Name: coreTopic}, requestProxy); err != nil {
+		logger.Fatalw("Failed-registering-broadcast-handler", log.Fields{"topic": coreTopic})
+	}
+
+	// Register the core-pair topic to handle core-bound requests destined to the core pair
+	if err := kmp.SubscribeWithDefaultRequestHandler(kafka.Topic{Name: corePairTopic}, kafka.OffsetNewest); err != nil {
+		logger.Fatalw("Failed-registering-pair-handler", log.Fields{"topic": corePairTopic})
+	}
+
+	logger.Info("request-handler-registered")
+}

diff --git a/rw_core/core/kv.go b/rw_core/core/kv.go
new file mode 100644
index 0000000..48b99e9
--- /dev/null
+++ b/rw_core/core/kv.go

@@ -0,0 +1,145 @@
+/*
+ * Copyright 2018-present Open Networking Foundation
+
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+
+ * http://www.apache.org/licenses/LICENSE-2.0
+
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package core
+
+import (
+	"context"
+	"errors"
+	"time"
+
+	"github.com/opencord/voltha-lib-go/v3/pkg/db"
+	"github.com/opencord/voltha-lib-go/v3/pkg/db/kvstore"
+	"github.com/opencord/voltha-lib-go/v3/pkg/log"
+	"github.com/opencord/voltha-lib-go/v3/pkg/probe"
+	"google.golang.org/grpc/codes"
+	"google.golang.org/grpc/status"
+)
+
+func newKVClient(storeType string, address string, timeout int) (kvstore.Client, error) {
+	logger.Infow("kv-store-type", log.Fields{"store": storeType})
+	switch storeType {
+	case "consul":
+		return kvstore.NewConsulClient(address, timeout)
+	case "etcd":
+		return kvstore.NewEtcdClient(address, timeout, log.FatalLevel)
+	}
+	return nil, errors.New("unsupported-kv-store")
+}
+
+func stopKVClient(ctx context.Context, kvClient kvstore.Client) {
+	// Release all reservations
+	if err := kvClient.ReleaseAllReservations(ctx); err != nil {
+		logger.Infow("fail-to-release-all-reservations", log.Fields{"error": err})
+	}
+	// Close the DB connection
+	kvClient.Close()
+}
+
+// waitUntilKVStoreReachableOrMaxTries will wait until it can connect to a KV store or until maxtries has been reached
+func waitUntilKVStoreReachableOrMaxTries(ctx context.Context, kvClient kvstore.Client, maxRetries int, retryInterval time.Duration) error {
+	logger.Infow("verifying-KV-store-connectivity", log.Fields{"retries": maxRetries, "retryInterval": retryInterval})
+	count := 0
+	for {
+		if !kvClient.IsConnectionUp(ctx) {
+			logger.Info("KV-store-unreachable")
+			if maxRetries != -1 {
+				if count >= maxRetries {
+					return status.Error(codes.Unavailable, "kv store unreachable")
+				}
+			}
+			count++
+
+			//	Take a nap before retrying
+			select {
+			case <-ctx.Done():
+				//ctx canceled
+				return ctx.Err()
+			case <-time.After(retryInterval):
+			}
+			logger.Infow("retry-KV-store-connectivity", log.Fields{"retryCount": count, "maxRetries": maxRetries, "retryInterval": retryInterval})
+		} else {
+			break
+		}
+	}
+	probe.UpdateStatusFromContext(ctx, "kv-store", probe.ServiceStatusRunning)
+	logger.Info("KV-store-reachable")
+	return nil
+}
+
+/*
+ * Thread to monitor kvstore Liveness (connection status)
+ *
+ * This function constantly monitors Liveness State of kvstore as reported
+ * periodically by backend and updates the Status of kv-store service registered
+ * with rw_core probe.
+ *
+ * If no liveness event has been seen within a timeout, then the thread will
+ * perform a "liveness" check attempt, which will in turn trigger a liveness event on
+ * the liveness channel, true or false depending on whether the attempt succeeded.
+ *
+ * The gRPC server in turn monitors the state of the readiness probe and will
+ * start issuing UNAVAILABLE response while the probe is not ready.
+ */
+func monitorKVStoreLiveness(ctx context.Context, backend *db.Backend, liveProbeInterval, notLiveProbeInterval time.Duration) {
+	logger.Info("start-monitoring-kvstore-liveness")
+
+	// Instruct backend to create Liveness channel for transporting state updates
+	livenessChannel := backend.EnableLivenessChannel()
+
+	logger.Debug("enabled-kvstore-liveness-channel")
+
+	// Default state for kvstore is alive for rw_core
+	timeout := liveProbeInterval
+loop:
+	for {
+		timeoutTimer := time.NewTimer(timeout)
+		select {
+
+		case liveness := <-livenessChannel:
+			logger.Debugw("received-liveness-change-notification", log.Fields{"liveness": liveness})
+
+			if !liveness {
+				probe.UpdateStatusFromContext(ctx, "kv-store", probe.ServiceStatusNotReady)
+				logger.Info("kvstore-set-server-notready")
+
+				timeout = notLiveProbeInterval
+
+			} else {
+				probe.UpdateStatusFromContext(ctx, "kv-store", probe.ServiceStatusRunning)
+				logger.Info("kvstore-set-server-ready")
+
+				timeout = liveProbeInterval
+			}
+
+			if !timeoutTimer.Stop() {
+				<-timeoutTimer.C
+			}
+
+		case <-ctx.Done():
+			break loop
+
+		case <-timeoutTimer.C:
+			logger.Info("kvstore-perform-liveness-check-on-timeout")
+
+			// Trigger Liveness check if no liveness update received within the timeout period.
+			// The Liveness check will push Live state to same channel which this routine is
+			// reading and processing. This, do it asynchronously to avoid blocking for
+			// backend response and avoid any possibility of deadlock
+			go backend.PerformLivenessCheck(ctx)
+		}
+	}
+}

diff --git a/rw_core/main.go b/rw_core/main.go
index 4d99fbb..6884993 100644
--- a/rw_core/main.go
+++ b/rw_core/main.go

@@ -18,141 +18,20 @@
 
 import (
 	"context"
-	"errors"
 	"fmt"
 	"os"
 	"os/signal"
-	"strconv"
 	"syscall"
 	"time"
 
 	"github.com/opencord/voltha-go/rw_core/config"
 	c "github.com/opencord/voltha-go/rw_core/core"
 	"github.com/opencord/voltha-go/rw_core/utils"
-	conf "github.com/opencord/voltha-lib-go/v3/pkg/config"
-	"github.com/opencord/voltha-lib-go/v3/pkg/db/kvstore"
-	"github.com/opencord/voltha-lib-go/v3/pkg/kafka"
 	"github.com/opencord/voltha-lib-go/v3/pkg/log"
 	"github.com/opencord/voltha-lib-go/v3/pkg/probe"
 	"github.com/opencord/voltha-lib-go/v3/pkg/version"
-	ic "github.com/opencord/voltha-protos/v3/go/inter_container"
 )
 
-type rwCore struct {
-	kvClient    kvstore.Client
-	config      *config.RWCoreFlags
-	halted      bool
-	exitChannel chan int
-	//kmp         *kafka.KafkaMessagingProxy
-	kafkaClient kafka.Client
-	core        *c.Core
-	//For test
-	receiverChannels []<-chan *ic.InterContainerMessage
-}
-
-func newKVClient(storeType string, address string, timeout int) (kvstore.Client, error) {
-
-	logger.Infow("kv-store-type", log.Fields{"store": storeType})
-	switch storeType {
-	case "consul":
-		return kvstore.NewConsulClient(address, timeout)
-	case "etcd":
-		return kvstore.NewEtcdClient(address, timeout, log.FatalLevel)
-	}
-	return nil, errors.New("unsupported-kv-store")
-}
-
-func newKafkaClient(clientType string, host string, port int, instanceID string, livenessChannelInterval time.Duration) (kafka.Client, error) {
-
-	logger.Infow("kafka-client-type", log.Fields{"client": clientType})
-	switch clientType {
-	case "sarama":
-		return kafka.NewSaramaClient(
-			kafka.Host(host),
-			kafka.Port(port),
-			kafka.ConsumerType(kafka.GroupCustomer),
-			kafka.ProducerReturnOnErrors(true),
-			kafka.ProducerReturnOnSuccess(true),
-			kafka.ProducerMaxRetries(6),
-			kafka.NumPartitions(3),
-			kafka.ConsumerGroupName(instanceID),
-			kafka.ConsumerGroupPrefix(instanceID),
-			kafka.AutoCreateTopic(true),
-			kafka.ProducerFlushFrequency(5),
-			kafka.ProducerRetryBackoff(time.Millisecond*30),
-			kafka.LivenessChannelInterval(livenessChannelInterval),
-		), nil
-	}
-	return nil, errors.New("unsupported-client-type")
-}
-
-func newRWCore(cf *config.RWCoreFlags) *rwCore {
-	var rwCore rwCore
-	rwCore.config = cf
-	rwCore.halted = false
-	rwCore.exitChannel = make(chan int, 1)
-	rwCore.receiverChannels = make([]<-chan *ic.InterContainerMessage, 0)
-	return &rwCore
-}
-
-func (rw *rwCore) start(ctx context.Context, instanceID string) {
-	logger.Info("Starting RW Core components")
-
-	// Setup KV Client
-	logger.Debugw("create-kv-client", log.Fields{"kvstore": rw.config.KVStoreType})
-	var err error
-	if rw.kvClient, err = newKVClient(
-		rw.config.KVStoreType,
-		rw.config.KVStoreHost+":"+strconv.Itoa(rw.config.KVStorePort),
-		rw.config.KVStoreTimeout); err != nil {
-		logger.Fatal(err)
-	}
-	cm := conf.NewConfigManager(rw.kvClient, rw.config.KVStoreType, rw.config.KVStoreHost, rw.config.KVStorePort, rw.config.KVStoreTimeout)
-	go conf.StartLogLevelConfigProcessing(cm, ctx)
-
-	// Setup Kafka Client
-	if rw.kafkaClient, err = newKafkaClient("sarama",
-		rw.config.KafkaAdapterHost,
-		rw.config.KafkaAdapterPort,
-		instanceID,
-		rw.config.LiveProbeInterval/2); err != nil {
-		logger.Fatal("Unsupported-kafka-client")
-	}
-
-	// Create the core service
-	rw.core = c.NewCore(ctx, instanceID, rw.config, rw.kvClient, rw.kafkaClient)
-
-	// start the core
-	err = rw.core.Start(ctx)
-	if err != nil {
-		logger.Fatalf("failed-to-start-rwcore", log.Fields{"error": err})
-	}
-}
-
-func (rw *rwCore) stop(ctx context.Context) {
-	// Stop leadership tracking
-	rw.halted = true
-
-	// send exit signal
-	rw.exitChannel <- 0
-
-	// Cleanup - applies only if we had a kvClient
-	if rw.kvClient != nil {
-		// Release all reservations
-		if err := rw.kvClient.ReleaseAllReservations(ctx); err != nil {
-			logger.Infow("fail-to-release-all-reservations", log.Fields{"error": err})
-		}
-		// Close the DB connection
-		rw.kvClient.Close()
-	}
-
-	rw.core.Stop(ctx)
-
-	//if rw.kafkaClient != nil {
-	//	rw.kafkaClient.Stop()
-	//}
-}
-
 func waitForExit() int {
 	signalChannel := make(chan os.Signal, 1)
 	signal.Notify(signalChannel,
@@ -161,35 +40,28 @@
 		syscall.SIGTERM,
 		syscall.SIGQUIT)
 
-	exitChannel := make(chan int)
-
-	go func() {
-		s := <-signalChannel
-		switch s {
-		case syscall.SIGHUP,
-			syscall.SIGINT,
-			syscall.SIGTERM,
-			syscall.SIGQUIT:
-			logger.Infow("closing-signal-received", log.Fields{"signal": s})
-			exitChannel <- 0
-		default:
-			logger.Infow("unexpected-signal-received", log.Fields{"signal": s})
-			exitChannel <- 1
-		}
-	}()
-
-	code := <-exitChannel
-	return code
+	s := <-signalChannel
+	switch s {
+	case syscall.SIGHUP,
+		syscall.SIGINT,
+		syscall.SIGTERM,
+		syscall.SIGQUIT:
+		logger.Infow("closing-signal-received", log.Fields{"signal": s})
+		return 0
+	default:
+		logger.Infow("unexpected-signal-received", log.Fields{"signal": s})
+		return 1
+	}
 }
 
 func printBanner() {
-	fmt.Println("                                            ")
-	fmt.Println(" ______        ______                       ")
-	fmt.Println("|  _ \\ \\      / / ___|___  _ __ ___       ")
-	fmt.Println("| |_) \\ \\ /\\ / / |   / _ \\| '__/ _ \\   ")
-	fmt.Println("|  _ < \\ V  V /| |__| (_) | | |  __/       ")
-	fmt.Println("|_| \\_\\ \\_/\\_/  \\____\\___/|_|  \\___| ")
-	fmt.Println("                                            ")
+	fmt.Println(`                                    `)
+	fmt.Println(` ______        ______               `)
+	fmt.Println(`|  _ \ \      / / ___|___  _ __ ___ `)
+	fmt.Println(`| |_) \ \ /\ / / |   / _ \| '__/ _ \`)
+	fmt.Println(`|  _ < \ V  V /| |__| (_) | | |  __/`)
+	fmt.Println(`|_| \_\ \_/\_/  \____\___/|_|  \___|`)
+	fmt.Println(`                                    `)
 }
 
 func printVersion() {
@@ -254,9 +126,6 @@
 
 	logger.Infow("rw-core-config", log.Fields{"config": *cf})
 
-	// Create the core
-	rw := newRWCore(cf)
-
 	// Create a context adding the status update channel
 	ctx, cancel := context.WithCancel(context.Background())
 	defer cancel()
@@ -267,19 +136,19 @@
 	 * objects there can be a single probe end point for the process.
 	 */
 	p := &probe.Probe{}
-	go p.ListenAndServe(fmt.Sprintf("%s:%d", rw.config.ProbeHost, rw.config.ProbePort))
+	go p.ListenAndServe(fmt.Sprintf("%s:%d", cf.ProbeHost, cf.ProbePort))
 
 	// Add the probe to the context to pass to all the services started
 	probeCtx := context.WithValue(ctx, probe.ProbeContextKey, p)
 
-	// Start the core
-	go rw.start(probeCtx, instanceID)
+	// create and start the core
+	core := c.NewCore(probeCtx, instanceID, cf)
 
 	code := waitForExit()
 	logger.Infow("received-a-closing-signal", log.Fields{"code": code})
 
 	// Cleanup before leaving
-	rw.stop(probeCtx)
+	core.Stop()
 
 	elapsed := time.Since(start)
 	logger.Infow("rw-core-run-time", log.Fields{"core": instanceID, "time": elapsed / time.Second})
commit	2f0d055e7dd00763fb067e1f41511b115b8447af	[log] [tgz]
author	Kent Hagerman <khagerma@ciena.com>	Thu Apr 23 17:28:52 2020 -0400
committer	Kent Hagerman <khagerma@ciena.com>	Mon Apr 27 17:29:08 2020 -0400
tree	02891950ac4d98b71040fcd395cd18ec02718286
parent	45a13e4d478a0f7c9877f919a7012f3324cc73e3 [diff]