blob: de126a2cdda45fdc3097a3a7937784f69bdf4bee [file] [log] [blame]
khenaidoob9203542018-09-17 22:56:37 -04001/*
2 * Copyright 2018-present Open Networking Foundation
3
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7
8 * http://www.apache.org/licenses/LICENSE-2.0
9
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
npujar1d86a522019-11-14 17:11:16 +053016
khenaidoob9203542018-09-17 22:56:37 -040017package core
18
19import (
20 "context"
Scott Baker2d87ee32020-03-03 13:04:01 -080021 "sync"
npujar1d86a522019-11-14 17:11:16 +053022 "time"
23
sbarbari17d7e222019-11-05 10:02:29 -050024 "github.com/opencord/voltha-go/db/model"
khenaidoob9203542018-09-17 22:56:37 -040025 "github.com/opencord/voltha-go/rw_core/config"
Kent Hagerman2b216042020-04-03 18:28:56 -040026 "github.com/opencord/voltha-go/rw_core/core/adapter"
27 "github.com/opencord/voltha-go/rw_core/core/api"
28 "github.com/opencord/voltha-go/rw_core/core/device"
serkant.uluderya2ae470f2020-01-21 11:13:09 -080029 "github.com/opencord/voltha-lib-go/v3/pkg/db"
30 "github.com/opencord/voltha-lib-go/v3/pkg/db/kvstore"
31 grpcserver "github.com/opencord/voltha-lib-go/v3/pkg/grpc"
32 "github.com/opencord/voltha-lib-go/v3/pkg/kafka"
33 "github.com/opencord/voltha-lib-go/v3/pkg/log"
34 "github.com/opencord/voltha-lib-go/v3/pkg/probe"
35 "github.com/opencord/voltha-protos/v3/go/voltha"
khenaidoob9203542018-09-17 22:56:37 -040036 "google.golang.org/grpc"
khenaidoob3244212019-08-27 14:32:27 -040037 "google.golang.org/grpc/codes"
38 "google.golang.org/grpc/status"
khenaidoob9203542018-09-17 22:56:37 -040039)
40
npujar1d86a522019-11-14 17:11:16 +053041// Core represent read,write core attributes
khenaidoob9203542018-09-17 22:56:37 -040042type Core struct {
npujar1d86a522019-11-14 17:11:16 +053043 instanceID string
Kent Hagerman2b216042020-04-03 18:28:56 -040044 deviceMgr *device.Manager
45 logicalDeviceMgr *device.LogicalManager
khenaidoob9203542018-09-17 22:56:37 -040046 grpcServer *grpcserver.GrpcServer
Kent Hagerman2b216042020-04-03 18:28:56 -040047 grpcNBIAPIHandler *api.NBIHandler
48 adapterMgr *adapter.Manager
khenaidoob9203542018-09-17 22:56:37 -040049 config *config.RWCoreFlags
npujar467fe752020-01-16 20:17:45 +053050 kmp kafka.InterContainerProxy
khenaidoob9203542018-09-17 22:56:37 -040051 clusterDataProxy *model.Proxy
52 localDataProxy *model.Proxy
Scott Baker2d87ee32020-03-03 13:04:01 -080053 exitChannel chan struct{}
54 stopOnce sync.Once
Richard Jankowskie4d77662018-10-17 13:53:21 -040055 kvClient kvstore.Client
Girish Kumar4d3887d2019-11-22 14:22:05 +000056 backend db.Backend
khenaidoo43c82122018-11-22 18:38:28 -050057 kafkaClient kafka.Client
khenaidoob9203542018-09-17 22:56:37 -040058}
59
npujar1d86a522019-11-14 17:11:16 +053060// NewCore creates instance of rw core
Thomas Lee Se5a44012019-11-07 20:32:24 +053061func NewCore(ctx context.Context, id string, cf *config.RWCoreFlags, kvClient kvstore.Client, kafkaClient kafka.Client) *Core {
khenaidoob9203542018-09-17 22:56:37 -040062 var core Core
npujar1d86a522019-11-14 17:11:16 +053063 core.instanceID = id
Scott Baker2d87ee32020-03-03 13:04:01 -080064 core.exitChannel = make(chan struct{})
khenaidoob9203542018-09-17 22:56:37 -040065 core.config = cf
Richard Jankowskie4d77662018-10-17 13:53:21 -040066 core.kvClient = kvClient
khenaidoo43c82122018-11-22 18:38:28 -050067 core.kafkaClient = kafkaClient
Richard Jankowskie4d77662018-10-17 13:53:21 -040068
Girish Kumar4d3887d2019-11-22 14:22:05 +000069 // Configure backend to push Liveness Status at least every (cf.LiveProbeInterval / 2) seconds
70 // so as to avoid trigger of Liveness check (due to Liveness timeout) when backend is alive
71 livenessChannelInterval := cf.LiveProbeInterval / 2
72
Richard Jankowskie4d77662018-10-17 13:53:21 -040073 // Setup the KV store
Girish Kumar4d3887d2019-11-22 14:22:05 +000074 core.backend = db.Backend{
75 Client: kvClient,
76 StoreType: cf.KVStoreType,
77 Host: cf.KVStoreHost,
78 Port: cf.KVStorePort,
79 Timeout: cf.KVStoreTimeout,
80 LivenessChannelInterval: livenessChannelInterval,
81 PathPrefix: cf.KVStoreDataPrefix}
khenaidoob9203542018-09-17 22:56:37 -040082 return &core
83}
84
npujar1d86a522019-11-14 17:11:16 +053085// Start brings up core services
Thomas Lee Se5a44012019-11-07 20:32:24 +053086func (core *Core) Start(ctx context.Context) error {
David K. Bainbridgeb4a9ab02019-09-20 15:12:16 -070087
88 // If the context has a probe then fetch it and register our services
89 var p *probe.Probe
90 if value := ctx.Value(probe.ProbeContextKey); value != nil {
91 if _, ok := value.(*probe.Probe); ok {
92 p = value.(*probe.Probe)
93 p.RegisterService(
94 "message-bus",
95 "kv-store",
96 "device-manager",
97 "logical-device-manager",
98 "adapter-manager",
99 "grpc-service",
100 )
101 }
102 }
103
Girish Kumarf56a4682020-03-20 20:07:46 +0000104 logger.Info("starting-core-services", log.Fields{"coreId": core.instanceID})
khenaidoob3244212019-08-27 14:32:27 -0400105
106 // Wait until connection to KV Store is up
107 if err := core.waitUntilKVStoreReachableOrMaxTries(ctx, core.config.MaxConnectionRetries, core.config.ConnectionRetryInterval); err != nil {
Girish Kumarf56a4682020-03-20 20:07:46 +0000108 logger.Fatal("Unable-to-connect-to-KV-store")
khenaidoob3244212019-08-27 14:32:27 -0400109 }
David K. Bainbridgeb4a9ab02019-09-20 15:12:16 -0700110 if p != nil {
111 p.UpdateStatus("kv-store", probe.ServiceStatusRunning)
112 }
Thomas Lee Se5a44012019-11-07 20:32:24 +0530113
Kent Hagerman2b216042020-04-03 18:28:56 -0400114 endpointMgr := kafka.NewEndpointManager(&core.backend)
115
Kent Hagerman4f355f52020-03-30 16:01:33 -0400116 core.clusterDataProxy = model.NewProxy(&core.backend, "/")
117 core.localDataProxy = model.NewProxy(&core.backend, "/")
khenaidoob3244212019-08-27 14:32:27 -0400118
Scott Bakeree6a0872019-10-29 15:59:52 -0700119 // core.kmp must be created before deviceMgr and adapterMgr, as they will make
120 // private copies of the poiner to core.kmp.
npujar467fe752020-01-16 20:17:45 +0530121 core.initKafkaManager(ctx)
khenaidoob3244212019-08-27 14:32:27 -0400122
Girish Kumarf56a4682020-03-20 20:07:46 +0000123 logger.Debugw("values", log.Fields{"kmp": core.kmp})
Kent Hagerman2b216042020-04-03 18:28:56 -0400124 core.adapterMgr = adapter.NewAdapterManager(core.clusterDataProxy, core.instanceID, core.kafkaClient)
125 core.deviceMgr, core.logicalDeviceMgr = device.NewDeviceManagers(core.clusterDataProxy, core.adapterMgr, core.kmp, endpointMgr, core.config.CorePairTopic, core.instanceID, core.config.DefaultCoreTimeout)
khenaidoo54e0ddf2019-02-27 16:21:33 -0500126
Scott Bakeree6a0872019-10-29 15:59:52 -0700127 // Start the KafkaManager. This must be done after the deviceMgr, adapterMgr, and
128 // logicalDeviceMgr have been created, as once the kmp is started, it will register
129 // the above with the kmp.
130
131 go core.startKafkaManager(ctx,
132 core.config.ConnectionRetryInterval,
133 core.config.LiveProbeInterval,
134 core.config.NotLiveProbeInterval)
khenaidoob3244212019-08-27 14:32:27 -0400135
khenaidoob9203542018-09-17 22:56:37 -0400136 go core.startDeviceManager(ctx)
137 go core.startLogicalDeviceManager(ctx)
138 go core.startGRPCService(ctx)
khenaidoo21d51152019-02-01 13:48:37 -0500139 go core.startAdapterManager(ctx)
Girish Kumar4d3887d2019-11-22 14:22:05 +0000140 go core.monitorKvstoreLiveness(ctx)
khenaidoob9203542018-09-17 22:56:37 -0400141
Girish Kumarf56a4682020-03-20 20:07:46 +0000142 logger.Info("core-services-started")
Thomas Lee Se5a44012019-11-07 20:32:24 +0530143 return nil
khenaidoob9203542018-09-17 22:56:37 -0400144}
145
npujar1d86a522019-11-14 17:11:16 +0530146// Stop brings down core services
khenaidoob9203542018-09-17 22:56:37 -0400147func (core *Core) Stop(ctx context.Context) {
Scott Baker2d87ee32020-03-03 13:04:01 -0800148 core.stopOnce.Do(func() {
Girish Kumarf56a4682020-03-20 20:07:46 +0000149 logger.Info("stopping-adaptercore")
Scott Baker2d87ee32020-03-03 13:04:01 -0800150 // Signal to the KVStoreMonitor that we are stopping.
151 close(core.exitChannel)
152 // Stop all the started services
153 if core.grpcServer != nil {
154 core.grpcServer.Stop()
155 }
156 if core.logicalDeviceMgr != nil {
Kent Hagerman2b216042020-04-03 18:28:56 -0400157 core.logicalDeviceMgr.Stop(ctx)
Scott Baker2d87ee32020-03-03 13:04:01 -0800158 }
159 if core.deviceMgr != nil {
Kent Hagerman2b216042020-04-03 18:28:56 -0400160 core.deviceMgr.Stop(ctx)
Scott Baker2d87ee32020-03-03 13:04:01 -0800161 }
162 if core.kmp != nil {
163 core.kmp.Stop()
164 }
Girish Kumarf56a4682020-03-20 20:07:46 +0000165 logger.Info("adaptercore-stopped")
Scott Baker2d87ee32020-03-03 13:04:01 -0800166 })
khenaidoob9203542018-09-17 22:56:37 -0400167}
168
khenaidoo631fe542019-05-31 15:44:43 -0400169//startGRPCService creates the grpc service handlers, registers it to the grpc server and starts the server
khenaidoob9203542018-09-17 22:56:37 -0400170func (core *Core) startGRPCService(ctx context.Context) {
171 // create an insecure gserver server
Scott Bakeree6a0872019-10-29 15:59:52 -0700172 core.grpcServer = grpcserver.NewGrpcServer(core.config.GrpcHost, core.config.GrpcPort, nil, false, probe.GetProbeFromContext(ctx))
Girish Kumarf56a4682020-03-20 20:07:46 +0000173 logger.Info("grpc-server-created")
khenaidoob9203542018-09-17 22:56:37 -0400174
Kent Hagerman2b216042020-04-03 18:28:56 -0400175 core.grpcNBIAPIHandler = api.NewAPIHandler(core.deviceMgr, core.logicalDeviceMgr, core.adapterMgr)
Girish Kumarf56a4682020-03-20 20:07:46 +0000176 logger.Infow("grpc-handler", log.Fields{"core_binding_key": core.config.CoreBindingKey})
Kent Hagerman2b216042020-04-03 18:28:56 -0400177 core.logicalDeviceMgr.SetEventCallbacks(core.grpcNBIAPIHandler)
khenaidoob9203542018-09-17 22:56:37 -0400178 // Create a function to register the core GRPC service with the GRPC server
179 f := func(gs *grpc.Server) {
180 voltha.RegisterVolthaServiceServer(
181 gs,
Richard Jankowskidbab94a2018-12-06 16:20:25 -0500182 core.grpcNBIAPIHandler,
khenaidoob9203542018-09-17 22:56:37 -0400183 )
184 }
185
186 core.grpcServer.AddService(f)
Girish Kumarf56a4682020-03-20 20:07:46 +0000187 logger.Info("grpc-service-added")
khenaidoob9203542018-09-17 22:56:37 -0400188
David K. Bainbridgeb4a9ab02019-09-20 15:12:16 -0700189 /*
190 * Start the GRPC server
191 *
192 * This is a bit sub-optimal here as the grpcServer.Start call does not return (blocks)
193 * until something fails, but we want to send a "start" status update. As written this
194 * means that we are actually sending the "start" status update before the server is
195 * started, which means it is possible that the status is "running" before it actually is.
196 *
197 * This means that there is a small window in which the core could return its status as
198 * ready, when it really isn't.
199 */
200 probe.UpdateStatusFromContext(ctx, "grpc-service", probe.ServiceStatusRunning)
Girish Kumarf56a4682020-03-20 20:07:46 +0000201 logger.Info("grpc-server-started")
npujar467fe752020-01-16 20:17:45 +0530202 core.grpcServer.Start(ctx)
David K. Bainbridgeb4a9ab02019-09-20 15:12:16 -0700203 probe.UpdateStatusFromContext(ctx, "grpc-service", probe.ServiceStatusStopped)
khenaidoob9203542018-09-17 22:56:37 -0400204}
205
Scott Bakeree6a0872019-10-29 15:59:52 -0700206// Initialize the kafka manager, but we will start it later
npujar467fe752020-01-16 20:17:45 +0530207func (core *Core) initKafkaManager(ctx context.Context) {
Girish Kumarf56a4682020-03-20 20:07:46 +0000208 logger.Infow("initialize-kafka-manager", log.Fields{"host": core.config.KafkaAdapterHost,
khenaidoob9203542018-09-17 22:56:37 -0400209 "port": core.config.KafkaAdapterPort, "topic": core.config.CoreTopic})
Scott Bakeree6a0872019-10-29 15:59:52 -0700210
211 probe.UpdateStatusFromContext(ctx, "message-bus", probe.ServiceStatusPreparing)
212
213 // create the proxy
npujar467fe752020-01-16 20:17:45 +0530214 core.kmp = kafka.NewInterContainerProxy(
khenaidoo43c82122018-11-22 18:38:28 -0500215 kafka.InterContainerHost(core.config.KafkaAdapterHost),
216 kafka.InterContainerPort(core.config.KafkaAdapterPort),
217 kafka.MsgClient(core.kafkaClient),
khenaidoo79232702018-12-04 11:00:41 -0500218 kafka.DefaultTopic(&kafka.Topic{Name: core.config.CoreTopic}),
npujar467fe752020-01-16 20:17:45 +0530219 kafka.DeviceDiscoveryTopic(&kafka.Topic{Name: core.config.AffinityRouterTopic}))
Scott Bakeree6a0872019-10-29 15:59:52 -0700220
221 probe.UpdateStatusFromContext(ctx, "message-bus", probe.ServiceStatusPrepared)
Scott Bakeree6a0872019-10-29 15:59:52 -0700222}
223
224/*
225 * KafkaMonitorThread
226 *
npujar1d86a522019-11-14 17:11:16 +0530227 * Responsible for starting the Kafka Interadapter Proxy and monitoring its liveness
Scott Bakeree6a0872019-10-29 15:59:52 -0700228 * state.
229 *
230 * Any producer that fails to send will cause KafkaInterContainerProxy to
231 * post a false event on its liveness channel. Any producer that succeeds in sending
232 * will cause KafkaInterContainerProxy to post a true event on its liveness
npujar1d86a522019-11-14 17:11:16 +0530233 * channel. Group receivers also update liveness state, and a receiver will typically
Scott Bakeree6a0872019-10-29 15:59:52 -0700234 * indicate a loss of liveness within 3-5 seconds of Kafka going down. Receivers
235 * only indicate restoration of liveness if a message is received. During normal
236 * operation, messages will be routinely produced and received, automatically
237 * indicating liveness state. These routine liveness indications are rate-limited
238 * inside sarama_client.
239 *
240 * This thread monitors the status of KafkaInterContainerProxy's liveness and pushes
241 * that state to the core's readiness probes. If no liveness event has been seen
242 * within a timeout, then the thread will make an attempt to produce a "liveness"
243 * message, which will in turn trigger a liveness event on the liveness channel, true
244 * or false depending on whether the attempt succeeded.
245 *
246 * The gRPC server in turn monitors the state of the readiness probe and will
247 * start issuing UNAVAILABLE response while the probe is not ready.
248 *
249 * startupRetryInterval -- interval between attempts to start
250 * liveProbeInterval -- interval between liveness checks when in a live state
251 * notLiveProbeInterval -- interval between liveness checks when in a notLive state
252 *
253 * liveProbeInterval and notLiveProbeInterval can be configured separately,
254 * though the current default is that both are set to 60 seconds.
255 */
256
Girish Kumar4d3887d2019-11-22 14:22:05 +0000257func (core *Core) startKafkaManager(ctx context.Context, startupRetryInterval time.Duration, liveProbeInterval time.Duration, notLiveProbeInterval time.Duration) {
Girish Kumarf56a4682020-03-20 20:07:46 +0000258 logger.Infow("starting-kafka-manager-thread", log.Fields{"host": core.config.KafkaAdapterHost,
Scott Bakeree6a0872019-10-29 15:59:52 -0700259 "port": core.config.KafkaAdapterPort, "topic": core.config.CoreTopic})
260
261 started := false
262 for !started {
263 // If we haven't started yet, then try to start
Girish Kumarf56a4682020-03-20 20:07:46 +0000264 logger.Infow("starting-kafka-proxy", log.Fields{})
Scott Bakeree6a0872019-10-29 15:59:52 -0700265 if err := core.kmp.Start(); err != nil {
266 // We failed to start. Delay and then try again later.
267 // Don't worry about liveness, as we can't be live until we've started.
268 probe.UpdateStatusFromContext(ctx, "message-bus", probe.ServiceStatusNotReady)
Girish Kumarf56a4682020-03-20 20:07:46 +0000269 logger.Infow("error-starting-kafka-messaging-proxy", log.Fields{"error": err})
Girish Kumar4d3887d2019-11-22 14:22:05 +0000270 time.Sleep(startupRetryInterval)
khenaidoob3244212019-08-27 14:32:27 -0400271 } else {
Scott Bakeree6a0872019-10-29 15:59:52 -0700272 // We started. We only need to do this once.
273 // Next we'll fall through and start checking liveness.
Girish Kumarf56a4682020-03-20 20:07:46 +0000274 logger.Infow("started-kafka-proxy", log.Fields{})
Scott Bakeree6a0872019-10-29 15:59:52 -0700275
276 // cannot do this until after the kmp is started
npujar1d86a522019-11-14 17:11:16 +0530277 if err := core.registerAdapterRequestHandlers(ctx, core.instanceID, core.deviceMgr, core.logicalDeviceMgr, core.adapterMgr, core.clusterDataProxy, core.localDataProxy); err != nil {
Girish Kumarf56a4682020-03-20 20:07:46 +0000278 logger.Fatal("Failure-registering-adapterRequestHandler")
Scott Bakeree6a0872019-10-29 15:59:52 -0700279 }
280
281 started = true
khenaidoob3244212019-08-27 14:32:27 -0400282 }
khenaidoob9203542018-09-17 22:56:37 -0400283 }
Scott Bakeree6a0872019-10-29 15:59:52 -0700284
Girish Kumarf56a4682020-03-20 20:07:46 +0000285 logger.Info("started-kafka-message-proxy")
Scott Bakeree6a0872019-10-29 15:59:52 -0700286
287 livenessChannel := core.kmp.EnableLivenessChannel(true)
288
Girish Kumarf56a4682020-03-20 20:07:46 +0000289 logger.Info("enabled-kafka-liveness-channel")
Scott Bakeree6a0872019-10-29 15:59:52 -0700290
Girish Kumar4d3887d2019-11-22 14:22:05 +0000291 timeout := liveProbeInterval
Scott Bakeree6a0872019-10-29 15:59:52 -0700292 for {
293 timeoutTimer := time.NewTimer(timeout)
294 select {
295 case liveness := <-livenessChannel:
Girish Kumarf56a4682020-03-20 20:07:46 +0000296 logger.Infow("kafka-manager-thread-liveness-event", log.Fields{"liveness": liveness})
Scott Bakeree6a0872019-10-29 15:59:52 -0700297 // there was a state change in Kafka liveness
298 if !liveness {
299 probe.UpdateStatusFromContext(ctx, "message-bus", probe.ServiceStatusNotReady)
300
301 if core.grpcServer != nil {
Girish Kumarf56a4682020-03-20 20:07:46 +0000302 logger.Info("kafka-manager-thread-set-server-notready")
Scott Bakeree6a0872019-10-29 15:59:52 -0700303 }
304
305 // retry frequently while life is bad
Girish Kumar4d3887d2019-11-22 14:22:05 +0000306 timeout = notLiveProbeInterval
Scott Bakeree6a0872019-10-29 15:59:52 -0700307 } else {
308 probe.UpdateStatusFromContext(ctx, "message-bus", probe.ServiceStatusRunning)
309
310 if core.grpcServer != nil {
Girish Kumarf56a4682020-03-20 20:07:46 +0000311 logger.Info("kafka-manager-thread-set-server-ready")
Scott Bakeree6a0872019-10-29 15:59:52 -0700312 }
313
314 // retry infrequently while life is good
Girish Kumar4d3887d2019-11-22 14:22:05 +0000315 timeout = liveProbeInterval
Scott Bakeree6a0872019-10-29 15:59:52 -0700316 }
317 if !timeoutTimer.Stop() {
318 <-timeoutTimer.C
319 }
320 case <-timeoutTimer.C:
Girish Kumarf56a4682020-03-20 20:07:46 +0000321 logger.Info("kafka-proxy-liveness-recheck")
Scott Bakeree6a0872019-10-29 15:59:52 -0700322 // send the liveness probe in a goroutine; we don't want to deadlock ourselves as
323 // the liveness probe may wait (and block) writing to our channel.
324 go func() {
325 err := core.kmp.SendLiveness()
326 if err != nil {
327 // Catch possible error case if sending liveness after Sarama has been stopped.
Girish Kumarf56a4682020-03-20 20:07:46 +0000328 logger.Warnw("error-kafka-send-liveness", log.Fields{"error": err})
Scott Bakeree6a0872019-10-29 15:59:52 -0700329 }
330 }()
331 }
332 }
khenaidoob9203542018-09-17 22:56:37 -0400333}
334
khenaidoob3244212019-08-27 14:32:27 -0400335// waitUntilKVStoreReachableOrMaxTries will wait until it can connect to a KV store or until maxtries has been reached
Girish Kumar4d3887d2019-11-22 14:22:05 +0000336func (core *Core) waitUntilKVStoreReachableOrMaxTries(ctx context.Context, maxRetries int, retryInterval time.Duration) error {
Girish Kumarf56a4682020-03-20 20:07:46 +0000337 logger.Infow("verifying-KV-store-connectivity", log.Fields{"host": core.config.KVStoreHost,
khenaidoob3244212019-08-27 14:32:27 -0400338 "port": core.config.KVStorePort, "retries": maxRetries, "retryInterval": retryInterval})
khenaidoob3244212019-08-27 14:32:27 -0400339 count := 0
340 for {
npujar467fe752020-01-16 20:17:45 +0530341 if !core.kvClient.IsConnectionUp(ctx) {
Girish Kumarf56a4682020-03-20 20:07:46 +0000342 logger.Info("KV-store-unreachable")
khenaidoob3244212019-08-27 14:32:27 -0400343 if maxRetries != -1 {
344 if count >= maxRetries {
345 return status.Error(codes.Unavailable, "kv store unreachable")
346 }
347 }
npujar1d86a522019-11-14 17:11:16 +0530348 count++
khenaidoob3244212019-08-27 14:32:27 -0400349 // Take a nap before retrying
Girish Kumar4d3887d2019-11-22 14:22:05 +0000350 time.Sleep(retryInterval)
Girish Kumarf56a4682020-03-20 20:07:46 +0000351 logger.Infow("retry-KV-store-connectivity", log.Fields{"retryCount": count, "maxRetries": maxRetries, "retryInterval": retryInterval})
khenaidoob3244212019-08-27 14:32:27 -0400352
353 } else {
354 break
355 }
356 }
Girish Kumarf56a4682020-03-20 20:07:46 +0000357 logger.Info("KV-store-reachable")
khenaidoob3244212019-08-27 14:32:27 -0400358 return nil
359}
360
Kent Hagerman2b216042020-04-03 18:28:56 -0400361func (core *Core) registerAdapterRequestHandlers(ctx context.Context, coreInstanceID string, dMgr *device.Manager,
362 ldMgr *device.LogicalManager, aMgr *adapter.Manager, cdProxy *model.Proxy, ldProxy *model.Proxy,
khenaidoo54e0ddf2019-02-27 16:21:33 -0500363) error {
Kent Hagerman2b216042020-04-03 18:28:56 -0400364 requestProxy := api.NewAdapterRequestHandlerProxy(coreInstanceID, dMgr, aMgr, cdProxy, ldProxy,
David Bainbridged1afd662020-03-26 18:27:41 -0700365 core.config.LongRunningRequestTimeout, core.config.DefaultRequestTimeout)
khenaidoob9203542018-09-17 22:56:37 -0400366
khenaidoo54e0ddf2019-02-27 16:21:33 -0500367 // Register the broadcast topic to handle any core-bound broadcast requests
368 if err := core.kmp.SubscribeWithRequestHandlerInterface(kafka.Topic{Name: core.config.CoreTopic}, requestProxy); err != nil {
Girish Kumarf56a4682020-03-20 20:07:46 +0000369 logger.Fatalw("Failed-registering-broadcast-handler", log.Fields{"topic": core.config.CoreTopic})
khenaidoo54e0ddf2019-02-27 16:21:33 -0500370 return err
371 }
372
Kent Hagermana6d0c362019-07-30 12:50:21 -0400373 // Register the core-pair topic to handle core-bound requests destined to the core pair
374 if err := core.kmp.SubscribeWithDefaultRequestHandler(kafka.Topic{Name: core.config.CorePairTopic}, kafka.OffsetNewest); err != nil {
Girish Kumarf56a4682020-03-20 20:07:46 +0000375 logger.Fatalw("Failed-registering-pair-handler", log.Fields{"topic": core.config.CorePairTopic})
Kent Hagermana6d0c362019-07-30 12:50:21 -0400376 return err
377 }
378
Girish Kumarf56a4682020-03-20 20:07:46 +0000379 logger.Info("request-handler-registered")
khenaidoob9203542018-09-17 22:56:37 -0400380 return nil
381}
382
383func (core *Core) startDeviceManager(ctx context.Context) {
Girish Kumarf56a4682020-03-20 20:07:46 +0000384 logger.Info("DeviceManager-Starting...")
Kent Hagerman2b216042020-04-03 18:28:56 -0400385 core.deviceMgr.Start(ctx)
Girish Kumarf56a4682020-03-20 20:07:46 +0000386 logger.Info("DeviceManager-Started")
khenaidoob9203542018-09-17 22:56:37 -0400387}
388
389func (core *Core) startLogicalDeviceManager(ctx context.Context) {
Girish Kumarf56a4682020-03-20 20:07:46 +0000390 logger.Info("Logical-DeviceManager-Starting...")
Kent Hagerman2b216042020-04-03 18:28:56 -0400391 core.logicalDeviceMgr.Start(ctx)
Girish Kumarf56a4682020-03-20 20:07:46 +0000392 logger.Info("Logical-DeviceManager-Started")
khenaidoob9203542018-09-17 22:56:37 -0400393}
khenaidoo21d51152019-02-01 13:48:37 -0500394
395func (core *Core) startAdapterManager(ctx context.Context) {
Girish Kumarf56a4682020-03-20 20:07:46 +0000396 logger.Info("Adapter-Manager-Starting...")
Kent Hagerman2b216042020-04-03 18:28:56 -0400397 err := core.adapterMgr.Start(ctx)
Thomas Lee Se5a44012019-11-07 20:32:24 +0530398 if err != nil {
Girish Kumarf56a4682020-03-20 20:07:46 +0000399 logger.Fatalf("failed-to-start-adapter-manager: error %v ", err)
Thomas Lee Se5a44012019-11-07 20:32:24 +0530400 }
Girish Kumarf56a4682020-03-20 20:07:46 +0000401 logger.Info("Adapter-Manager-Started")
William Kurkiandaa6bb22019-03-07 12:26:28 -0500402}
Girish Kumar4d3887d2019-11-22 14:22:05 +0000403
404/*
405* Thread to monitor kvstore Liveness (connection status)
406*
407* This function constantly monitors Liveness State of kvstore as reported
408* periodically by backend and updates the Status of kv-store service registered
409* with rw_core probe.
410*
411* If no liveness event has been seen within a timeout, then the thread will
412* perform a "liveness" check attempt, which will in turn trigger a liveness event on
413* the liveness channel, true or false depending on whether the attempt succeeded.
414*
415* The gRPC server in turn monitors the state of the readiness probe and will
416* start issuing UNAVAILABLE response while the probe is not ready.
417 */
418func (core *Core) monitorKvstoreLiveness(ctx context.Context) {
Girish Kumarf56a4682020-03-20 20:07:46 +0000419 logger.Info("start-monitoring-kvstore-liveness")
Girish Kumar4d3887d2019-11-22 14:22:05 +0000420
421 // Instruct backend to create Liveness channel for transporting state updates
422 livenessChannel := core.backend.EnableLivenessChannel()
423
Girish Kumarf56a4682020-03-20 20:07:46 +0000424 logger.Debug("enabled-kvstore-liveness-channel")
Girish Kumar4d3887d2019-11-22 14:22:05 +0000425
426 // Default state for kvstore is alive for rw_core
427 timeout := core.config.LiveProbeInterval
Scott Baker2d87ee32020-03-03 13:04:01 -0800428loop:
Girish Kumar4d3887d2019-11-22 14:22:05 +0000429 for {
430 timeoutTimer := time.NewTimer(timeout)
431 select {
432
433 case liveness := <-livenessChannel:
Girish Kumarf56a4682020-03-20 20:07:46 +0000434 logger.Debugw("received-liveness-change-notification", log.Fields{"liveness": liveness})
Girish Kumar4d3887d2019-11-22 14:22:05 +0000435
436 if !liveness {
437 probe.UpdateStatusFromContext(ctx, "kv-store", probe.ServiceStatusNotReady)
438
439 if core.grpcServer != nil {
Girish Kumarf56a4682020-03-20 20:07:46 +0000440 logger.Info("kvstore-set-server-notready")
Girish Kumar4d3887d2019-11-22 14:22:05 +0000441 }
442
443 timeout = core.config.NotLiveProbeInterval
444
445 } else {
446 probe.UpdateStatusFromContext(ctx, "kv-store", probe.ServiceStatusRunning)
447
448 if core.grpcServer != nil {
Girish Kumarf56a4682020-03-20 20:07:46 +0000449 logger.Info("kvstore-set-server-ready")
Girish Kumar4d3887d2019-11-22 14:22:05 +0000450 }
451
452 timeout = core.config.LiveProbeInterval
453 }
454
455 if !timeoutTimer.Stop() {
456 <-timeoutTimer.C
457 }
458
Scott Baker2d87ee32020-03-03 13:04:01 -0800459 case <-core.exitChannel:
460 break loop
461
Girish Kumar4d3887d2019-11-22 14:22:05 +0000462 case <-timeoutTimer.C:
Girish Kumarf56a4682020-03-20 20:07:46 +0000463 logger.Info("kvstore-perform-liveness-check-on-timeout")
Girish Kumar4d3887d2019-11-22 14:22:05 +0000464
465 // Trigger Liveness check if no liveness update received within the timeout period.
466 // The Liveness check will push Live state to same channel which this routine is
467 // reading and processing. This, do it asynchronously to avoid blocking for
468 // backend response and avoid any possibility of deadlock
npujar467fe752020-01-16 20:17:45 +0530469 go core.backend.PerformLivenessCheck(ctx)
Girish Kumar4d3887d2019-11-22 14:22:05 +0000470 }
471 }
472}