blob: 879142c071b8018db0ae9c63c3437b68cafd194a [file] [log] [blame]
khenaidoo26721882021-08-11 17:42:52 -04001/*
Joey Armstrong7f8436c2023-07-09 20:23:27 -04002 * Copyright 2021-2023 Open Networking Foundation (ONF) and the ONF Contributors
khenaidoo26721882021-08-11 17:42:52 -04003 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16package grpc
17
18import (
19 "context"
20 "fmt"
21 "reflect"
22 "strings"
23 "sync"
24 "time"
25
26 grpc_middleware "github.com/grpc-ecosystem/go-grpc-middleware"
27 grpc_opentracing "github.com/grpc-ecosystem/go-grpc-middleware/tracing/opentracing"
khenaidoo0927c722021-12-15 16:49:32 -050028 "github.com/jhump/protoreflect/dynamic/grpcdynamic"
29 "github.com/jhump/protoreflect/grpcreflect"
khenaidoo26721882021-08-11 17:42:52 -040030 "github.com/opencord/voltha-lib-go/v7/pkg/log"
31 "github.com/opencord/voltha-lib-go/v7/pkg/probe"
khenaidoo0927c722021-12-15 16:49:32 -050032 "github.com/opencord/voltha-protos/v5/go/adapter_service"
khenaidoob9503212021-12-08 14:22:21 -050033 "github.com/opencord/voltha-protos/v5/go/common"
khenaidooa5feb8e2021-10-19 17:29:22 -040034 "github.com/opencord/voltha-protos/v5/go/core_service"
35 "github.com/opencord/voltha-protos/v5/go/olt_inter_adapter_service"
36 "github.com/opencord/voltha-protos/v5/go/onu_inter_adapter_service"
khenaidoo26721882021-08-11 17:42:52 -040037 "google.golang.org/grpc"
khenaidoo0927c722021-12-15 16:49:32 -050038 "google.golang.org/grpc/codes"
39 rpb "google.golang.org/grpc/reflection/grpc_reflection_v1alpha"
40 "google.golang.org/grpc/status"
khenaidoo26721882021-08-11 17:42:52 -040041)
42
43type event byte
44type state byte
khenaidoo0927c722021-12-15 16:49:32 -050045type GetServiceClient func(context.Context, *grpc.ClientConn) interface{}
khenaidoo26721882021-08-11 17:42:52 -040046type RestartedHandler func(ctx context.Context, endPoint string) error
47
khenaidoo26721882021-08-11 17:42:52 -040048const (
49 grpcBackoffInitialInterval = "GRPC_BACKOFF_INITIAL_INTERVAL"
50 grpcBackoffMaxInterval = "GRPC_BACKOFF_MAX_INTERVAL"
51 grpcBackoffMaxElapsedTime = "GRPC_BACKOFF_MAX_ELAPSED_TIME"
52 grpcMonitorInterval = "GRPC_MONITOR_INTERVAL"
53)
54
55const (
56 DefaultBackoffInitialInterval = 100 * time.Millisecond
57 DefaultBackoffMaxInterval = 5 * time.Second
58 DefaultBackoffMaxElapsedTime = 0 * time.Second // No time limit
59 DefaultGRPCMonitorInterval = 5 * time.Second
60)
61
62const (
khenaidoo26721882021-08-11 17:42:52 -040063 eventConnecting = event(iota)
khenaidoo0927c722021-12-15 16:49:32 -050064 eventValidatingConnection
khenaidoo26721882021-08-11 17:42:52 -040065 eventConnected
66 eventDisconnected
67 eventStopped
68 eventError
69
70 stateConnected = state(iota)
khenaidoo0927c722021-12-15 16:49:32 -050071 stateValidatingConnection
khenaidoo26721882021-08-11 17:42:52 -040072 stateConnecting
73 stateDisconnected
74)
75
76type Client struct {
khenaidoob9503212021-12-08 14:22:21 -050077 clientEndpoint string
khenaidoo0927c722021-12-15 16:49:32 -050078 clientContextData string
khenaidoob9503212021-12-08 14:22:21 -050079 serverEndPoint string
khenaidoo0927c722021-12-15 16:49:32 -050080 remoteServiceName string
khenaidoo26721882021-08-11 17:42:52 -040081 connection *grpc.ClientConn
82 connectionLock sync.RWMutex
83 stateLock sync.RWMutex
84 state state
85 service interface{}
86 events chan event
87 onRestart RestartedHandler
88 backoffInitialInterval time.Duration
89 backoffMaxInterval time.Duration
90 backoffMaxElapsedTime time.Duration
khenaidoo26721882021-08-11 17:42:52 -040091 monitorInterval time.Duration
khenaidoo26721882021-08-11 17:42:52 -040092 done bool
khenaidoo0927c722021-12-15 16:49:32 -050093 livenessLock sync.RWMutex
khenaidoo26721882021-08-11 17:42:52 -040094 livenessCallback func(timestamp time.Time)
95}
96
97type ClientOption func(*Client)
98
khenaidoo0927c722021-12-15 16:49:32 -050099func ClientContextData(data string) ClientOption {
100 return func(args *Client) {
101 args.clientContextData = data
102 }
103}
104
105func NewClient(clientEndpoint, serverEndpoint, remoteServiceName string, onRestart RestartedHandler,
106 opts ...ClientOption) (*Client, error) {
khenaidoo26721882021-08-11 17:42:52 -0400107 c := &Client{
khenaidoob9503212021-12-08 14:22:21 -0500108 clientEndpoint: clientEndpoint,
109 serverEndPoint: serverEndpoint,
khenaidoo0927c722021-12-15 16:49:32 -0500110 remoteServiceName: remoteServiceName,
khenaidoo26721882021-08-11 17:42:52 -0400111 onRestart: onRestart,
khenaidoo0927c722021-12-15 16:49:32 -0500112 events: make(chan event, 5),
khenaidoo26721882021-08-11 17:42:52 -0400113 state: stateDisconnected,
114 backoffInitialInterval: DefaultBackoffInitialInterval,
115 backoffMaxInterval: DefaultBackoffMaxInterval,
116 backoffMaxElapsedTime: DefaultBackoffMaxElapsedTime,
117 monitorInterval: DefaultGRPCMonitorInterval,
118 }
119 for _, option := range opts {
120 option(c)
121 }
122
123 // Check for environment variables
124 if err := SetFromEnvVariable(grpcBackoffInitialInterval, &c.backoffInitialInterval); err != nil {
125 logger.Warnw(context.Background(), "failure-reading-env-variable", log.Fields{"error": err, "variable": grpcBackoffInitialInterval})
126 }
127
128 if err := SetFromEnvVariable(grpcBackoffMaxInterval, &c.backoffMaxInterval); err != nil {
129 logger.Warnw(context.Background(), "failure-reading-env-variable", log.Fields{"error": err, "variable": grpcBackoffMaxInterval})
130 }
131
132 if err := SetFromEnvVariable(grpcBackoffMaxElapsedTime, &c.backoffMaxElapsedTime); err != nil {
133 logger.Warnw(context.Background(), "failure-reading-env-variable", log.Fields{"error": err, "variable": grpcBackoffMaxElapsedTime})
134 }
135
136 if err := SetFromEnvVariable(grpcMonitorInterval, &c.monitorInterval); err != nil {
137 logger.Warnw(context.Background(), "failure-reading-env-variable", log.Fields{"error": err, "variable": grpcMonitorInterval})
138 }
139
140 logger.Infow(context.Background(), "initialized-client", log.Fields{"client": c})
141
142 // Sanity check
143 if c.backoffInitialInterval > c.backoffMaxInterval {
144 return nil, fmt.Errorf("initial retry delay %v is greater than maximum retry delay %v", c.backoffInitialInterval, c.backoffMaxInterval)
145 }
146
khenaidoo0927c722021-12-15 16:49:32 -0500147 grpc.EnableTracing = true
148
khenaidoo26721882021-08-11 17:42:52 -0400149 return c, nil
150}
151
152func (c *Client) GetClient() (interface{}, error) {
153 c.connectionLock.RLock()
154 defer c.connectionLock.RUnlock()
155 if c.service == nil {
khenaidoob9503212021-12-08 14:22:21 -0500156 return nil, fmt.Errorf("no connection to %s", c.serverEndPoint)
khenaidoo26721882021-08-11 17:42:52 -0400157 }
158 return c.service, nil
159}
160
161// GetCoreServiceClient is a helper function that returns a concrete service instead of the GetClient() API
162// which returns an interface
khenaidooa5feb8e2021-10-19 17:29:22 -0400163func (c *Client) GetCoreServiceClient() (core_service.CoreServiceClient, error) {
khenaidoo26721882021-08-11 17:42:52 -0400164 c.connectionLock.RLock()
165 defer c.connectionLock.RUnlock()
166 if c.service == nil {
khenaidoob9503212021-12-08 14:22:21 -0500167 return nil, fmt.Errorf("no core connection to %s", c.serverEndPoint)
khenaidoo26721882021-08-11 17:42:52 -0400168 }
khenaidooa5feb8e2021-10-19 17:29:22 -0400169 client, ok := c.service.(core_service.CoreServiceClient)
khenaidoo26721882021-08-11 17:42:52 -0400170 if ok {
171 return client, nil
172 }
173 return nil, fmt.Errorf("invalid-service-%s", reflect.TypeOf(c.service))
174}
175
176// GetOnuAdapterServiceClient is a helper function that returns a concrete service instead of the GetClient() API
177// which returns an interface
khenaidooa5feb8e2021-10-19 17:29:22 -0400178func (c *Client) GetOnuInterAdapterServiceClient() (onu_inter_adapter_service.OnuInterAdapterServiceClient, error) {
khenaidoo26721882021-08-11 17:42:52 -0400179 c.connectionLock.RLock()
180 defer c.connectionLock.RUnlock()
181 if c.service == nil {
khenaidoob9503212021-12-08 14:22:21 -0500182 return nil, fmt.Errorf("no child adapter connection to %s", c.serverEndPoint)
khenaidoo26721882021-08-11 17:42:52 -0400183 }
khenaidooa5feb8e2021-10-19 17:29:22 -0400184 client, ok := c.service.(onu_inter_adapter_service.OnuInterAdapterServiceClient)
khenaidoo26721882021-08-11 17:42:52 -0400185 if ok {
186 return client, nil
187 }
188 return nil, fmt.Errorf("invalid-service-%s", reflect.TypeOf(c.service))
189}
190
191// GetOltAdapterServiceClient is a helper function that returns a concrete service instead of the GetClient() API
192// which returns an interface
khenaidooa5feb8e2021-10-19 17:29:22 -0400193func (c *Client) GetOltInterAdapterServiceClient() (olt_inter_adapter_service.OltInterAdapterServiceClient, error) {
khenaidoo26721882021-08-11 17:42:52 -0400194 c.connectionLock.RLock()
195 defer c.connectionLock.RUnlock()
196 if c.service == nil {
khenaidoob9503212021-12-08 14:22:21 -0500197 return nil, fmt.Errorf("no parent adapter connection to %s", c.serverEndPoint)
khenaidoo26721882021-08-11 17:42:52 -0400198 }
khenaidooa5feb8e2021-10-19 17:29:22 -0400199 client, ok := c.service.(olt_inter_adapter_service.OltInterAdapterServiceClient)
khenaidoo26721882021-08-11 17:42:52 -0400200 if ok {
201 return client, nil
202 }
203 return nil, fmt.Errorf("invalid-service-%s", reflect.TypeOf(c.service))
204}
205
khenaidoo0927c722021-12-15 16:49:32 -0500206// GetAdapterServiceClient is a helper function that returns a concrete service instead of the GetClient() API
207// which returns an interface
208func (c *Client) GetAdapterServiceClient() (adapter_service.AdapterServiceClient, error) {
209 c.connectionLock.RLock()
210 defer c.connectionLock.RUnlock()
211 if c.service == nil {
212 return nil, fmt.Errorf("no adapter service connection to %s", c.serverEndPoint)
213 }
214 client, ok := c.service.(adapter_service.AdapterServiceClient)
215 if ok {
216 return client, nil
217 }
218 return nil, fmt.Errorf("invalid-service-%s", reflect.TypeOf(c.service))
219}
220
khenaidoo26721882021-08-11 17:42:52 -0400221func (c *Client) Reset(ctx context.Context) {
khenaidoo0927c722021-12-15 16:49:32 -0500222 logger.Debugw(ctx, "resetting-client-connection", log.Fields{"api-endpoint": c.serverEndPoint, "client": c.clientEndpoint})
khenaidoo26721882021-08-11 17:42:52 -0400223 c.stateLock.Lock()
224 defer c.stateLock.Unlock()
225 if c.state == stateConnected {
226 c.state = stateDisconnected
227 c.events <- eventDisconnected
228 }
229}
230
khenaidoo0927c722021-12-15 16:49:32 -0500231// executeWithTimeout runs a sending function (sf) along with a receiving one(rf) and returns an error, if any.
nikesh.krishnanb547c1a2023-03-11 03:05:16 +0530232// If the deadline elapses first, it returns a grpc DeadlineExceeded error instead.
khenaidoo0927c722021-12-15 16:49:32 -0500233func (c *Client) executeWithTimeout(sf func(*common.Connection) error, rf func() (interface{}, error), conn *common.Connection, d time.Duration) error {
234 errChan := make(chan error, 1)
235 go func() {
236 err := sf(conn)
237 logger.Debugw(context.Background(), "message-sent", log.Fields{"error": err, "qpi-endpoint": c.serverEndPoint, "client": c.clientEndpoint})
238 if err == nil {
239 response, err := rf()
240 logger.Debugw(context.Background(), "message-received", log.Fields{"error": err, "qpi-endpoint": c.serverEndPoint, "client": c.clientEndpoint, "health": response})
241 }
242 errChan <- err
243 close(errChan)
244 }()
245 t := time.NewTimer(d)
246 select {
247 case <-t.C:
248 return status.Errorf(codes.DeadlineExceeded, "timeout-on-sending-message")
249 case err := <-errChan:
250 if !t.Stop() {
251 <-t.C
khenaidoo26721882021-08-11 17:42:52 -0400252 }
253 return err
254 }
khenaidoo26721882021-08-11 17:42:52 -0400255}
256
khenaidoo0927c722021-12-15 16:49:32 -0500257func (c *Client) monitorConnection(ctx context.Context) {
258 logger.Debugw(ctx, "monitor-connection-started", log.Fields{"qpi-endpoint": c.serverEndPoint, "client": c.clientEndpoint})
khenaidoo26721882021-08-11 17:42:52 -0400259
khenaidoo0927c722021-12-15 16:49:32 -0500260 // If we exit, assume disconnected
261 defer func() {
262 c.stateLock.Lock()
263 if !c.done && (c.state == stateConnected || c.state == stateValidatingConnection) {
264 // Handle only connected state here. We need the validating state to know if we need to backoff before a retry
khenaidoo0927c722021-12-15 16:49:32 -0500265 logger.Warnw(ctx, "sending-disconnect-event", log.Fields{"api-endpoint": c.serverEndPoint, "client": c.clientEndpoint, "curr-state": stateConnected, "new-state": c.state})
266 c.events <- eventDisconnected
267 } else {
268 logger.Debugw(ctx, "no-state-change-needed", log.Fields{"api-endpoint": c.serverEndPoint, "client": c.clientEndpoint, "state": c.state, "client-done": c.done})
khenaidoo26721882021-08-11 17:42:52 -0400269 }
khenaidoo0927c722021-12-15 16:49:32 -0500270 c.stateLock.Unlock()
271 logger.Debugw(ctx, "monitor-connection-ended", log.Fields{"api-endpoint": c.serverEndPoint, "client": c.clientEndpoint})
272 }()
273
274 c.connectionLock.RLock()
275 conn := c.connection
276 c.connectionLock.RUnlock()
277 if conn == nil {
278 logger.Errorw(ctx, "connection-nil", log.Fields{"api-endpoint": c.serverEndPoint, "client": c.clientEndpoint})
279 return
khenaidoo26721882021-08-11 17:42:52 -0400280 }
khenaidoo26721882021-08-11 17:42:52 -0400281
khenaidoo0927c722021-12-15 16:49:32 -0500282 // Get a new client using reflection. The server can implement any grpc service, but it
283 // needs to also implement the "StartKeepAliveStream" API
284 grpcReflectClient := grpcreflect.NewClient(ctx, rpb.NewServerReflectionClient(conn))
285 if grpcReflectClient == nil {
286 logger.Errorw(ctx, "grpc-reflect-client-nil", log.Fields{"api-endpoint": c.serverEndPoint, "client": c.clientEndpoint})
287 return
khenaidoo26721882021-08-11 17:42:52 -0400288 }
khenaidoo26721882021-08-11 17:42:52 -0400289
khenaidoo0927c722021-12-15 16:49:32 -0500290 // Get the list of services - there should be 2 services: a server reflection and the voltha service we are interested in
291 services, err := grpcReflectClient.ListServices()
292 if err != nil {
293 logger.Errorw(ctx, "list-services-error", log.Fields{"api-endpoint": c.serverEndPoint, "client": c.clientEndpoint, "error": err})
294 return
295 }
khenaidoo26721882021-08-11 17:42:52 -0400296
khenaidoo0927c722021-12-15 16:49:32 -0500297 // Filter out the service
298 logger.Debugw(ctx, "services", log.Fields{"services": services})
299 serviceOfInterest := ""
300 for _, service := range services {
301 if strings.EqualFold(service, c.remoteServiceName) {
302 serviceOfInterest = service
303 break
304 }
305 }
306 if serviceOfInterest == "" {
307 logger.Errorw(ctx, "no-service-found", log.Fields{"api-endpoint": c.serverEndPoint, "client": c.clientEndpoint, "services": services, "expected-remote-service": c.remoteServiceName})
308 return
309 }
khenaidooaa290962021-10-22 18:14:33 -0400310
khenaidoo0927c722021-12-15 16:49:32 -0500311 // Resolve the service
312 resolvedService, err := grpcReflectClient.ResolveService(serviceOfInterest)
313 if err != nil {
314 logger.Errorw(ctx, "service-error", log.Fields{"api-endpoint": c.serverEndPoint, "client": c.clientEndpoint, "service": resolvedService, "error": err})
315 return
316 }
317
318 // Find the method of interest
319 method := resolvedService.FindMethodByName("GetHealthStatus")
320 if method == nil {
321 logger.Errorw(ctx, "nil-method", log.Fields{"api-endpoint": c.serverEndPoint, "client": c.clientEndpoint, "service": resolvedService})
322 return
323 }
324 logger.Debugw(ctx, "resolved-to-method", log.Fields{"service": resolvedService.GetName(), "method": method.GetName()})
325
326 // Get a dynamic connection
327 dynamicConn := grpcdynamic.NewStub(conn)
328
329 // Get the stream and send this client information
330 streamCtx, streamDone := context.WithCancel(log.WithSpanFromContext(context.Background(), ctx))
331 defer streamDone()
332 stream, err := dynamicConn.InvokeRpcBidiStream(streamCtx, method)
333 if err != nil {
334 logger.Errorw(ctx, "stream-error", log.Fields{"api-endpoint": c.serverEndPoint, "client": c.clientEndpoint, "service": resolvedService, "error": err})
335 return
336 }
337
338 clientInfo := &common.Connection{
339 Endpoint: c.clientEndpoint,
340 ContextInfo: c.clientContextData,
341 KeepAliveInterval: int64(c.monitorInterval),
342 }
343
344 initialConnection := true
khenaidoo26721882021-08-11 17:42:52 -0400345loop:
346 for {
khenaidoo0927c722021-12-15 16:49:32 -0500347 // Let's send a keep alive message with our info
348 err := c.executeWithTimeout(
349 func(conn *common.Connection) error { return stream.SendMsg(conn) },
350 func() (interface{}, error) { return stream.RecvMsg() },
351 clientInfo,
352 c.monitorInterval)
khenaidoo26721882021-08-11 17:42:52 -0400353
khenaidoo0927c722021-12-15 16:49:32 -0500354 if err != nil {
355 // Any error means the far end is gone
356 logger.Errorw(ctx, "sending-stream-error", log.Fields{"error": err, "api-endpoint": c.serverEndPoint, "client": c.clientEndpoint, "context": stream.Context().Err()})
khenaidoo26721882021-08-11 17:42:52 -0400357 break loop
khenaidoo0927c722021-12-15 16:49:32 -0500358 }
359 // Send a connect event
360 if initialConnection {
361 logger.Debugw(ctx, "first-stream-data-sent", log.Fields{"api-endpoint": c.serverEndPoint, "client": c.clientEndpoint})
362 c.events <- eventConnected
363 initialConnection = false
364 }
365 logger.Debugw(ctx, "stream-data-sent", log.Fields{"api-endpoint": c.serverEndPoint, "client": c.clientEndpoint})
366 // Update liveness, if configured
367 c.livenessLock.RLock()
368 if c.livenessCallback != nil {
369 go c.livenessCallback(time.Now())
370 }
371 c.livenessLock.RUnlock()
khenaidoo26721882021-08-11 17:42:52 -0400372
khenaidoo0927c722021-12-15 16:49:32 -0500373 // Wait to send the next keep alive
374 keepAliveTimer := time.NewTimer(time.Duration(clientInfo.KeepAliveInterval))
375 select {
376 case <-ctx.Done():
377 logger.Warnw(ctx, "context-done", log.Fields{"api-endpont": c.serverEndPoint, "client": c.clientEndpoint})
378 break loop
379 case <-stream.Context().Done():
380 logger.Debugw(ctx, "stream-context-done", log.Fields{"api-endpoint": c.serverEndPoint, "stream-info": stream.Context(), "client": c.clientEndpoint})
381 break loop
382 case <-keepAliveTimer.C:
383 continue
khenaidoo26721882021-08-11 17:42:52 -0400384 }
385 }
khenaidoo0927c722021-12-15 16:49:32 -0500386 if stream != nil {
387 if err := stream.CloseSend(); err != nil {
388 logger.Warnw(ctx, "closing-stream-error", log.Fields{"error": err, "api-endpoint": c.serverEndPoint, "client": c.clientEndpoint})
389 }
390 }
khenaidoo26721882021-08-11 17:42:52 -0400391}
392
393// Start kicks off the adapter agent by trying to connect to the adapter
nikesh.krishnanb547c1a2023-03-11 03:05:16 +0530394func (c *Client) Start(ctx context.Context, handler GetServiceClient, retry_interceptor ...grpc.UnaryClientInterceptor) {
khenaidoob9503212021-12-08 14:22:21 -0500395 logger.Debugw(ctx, "Starting GRPC - Client", log.Fields{"api-endpoint": c.serverEndPoint})
khenaidoo26721882021-08-11 17:42:52 -0400396
397 // If the context contains a k8s probe then register services
398 p := probe.GetProbeFromContext(ctx)
399 if p != nil {
khenaidoob9503212021-12-08 14:22:21 -0500400 p.RegisterService(ctx, c.serverEndPoint)
khenaidoo26721882021-08-11 17:42:52 -0400401 }
402
khenaidoo0927c722021-12-15 16:49:32 -0500403 var monitorConnectionCtx context.Context
404 var monitorConnectionDone func()
khenaidoo26721882021-08-11 17:42:52 -0400405
406 initialConnection := true
407 c.events <- eventConnecting
408 backoff := NewBackoff(c.backoffInitialInterval, c.backoffMaxInterval, c.backoffMaxElapsedTime)
409 attempt := 1
410loop:
411 for {
412 select {
413 case <-ctx.Done():
khenaidoo0927c722021-12-15 16:49:32 -0500414 logger.Warnw(ctx, "context-closing", log.Fields{"api_endpoint": c.serverEndPoint, "client": c.clientEndpoint, "context": ctx})
415 c.connectionLock.Lock()
416 if !c.done {
417 c.done = true
418 c.events <- eventStopped
419 close(c.events)
420 }
421 c.connectionLock.Unlock()
422 // break loop
khenaidoo26721882021-08-11 17:42:52 -0400423 case event := <-c.events:
khenaidoo0927c722021-12-15 16:49:32 -0500424 logger.Debugw(ctx, "received-event", log.Fields{"event": event, "api-endpoint": c.serverEndPoint, "client": c.clientEndpoint})
khenaidoofe90ac32021-11-08 18:17:32 -0500425 c.connectionLock.RLock()
426 // On a client stopped, just allow the stop event to go through
427 if c.done && event != eventStopped {
428 c.connectionLock.RUnlock()
khenaidoo0927c722021-12-15 16:49:32 -0500429 logger.Debugw(ctx, "ignoring-event-on-client-stop", log.Fields{"event": event, "api-endpoint": c.serverEndPoint, "client": c.clientEndpoint})
khenaidoofe90ac32021-11-08 18:17:32 -0500430 continue
431 }
432 c.connectionLock.RUnlock()
khenaidoo26721882021-08-11 17:42:52 -0400433 switch event {
434 case eventConnecting:
khenaidoo26721882021-08-11 17:42:52 -0400435 c.stateLock.Lock()
khenaidoo0927c722021-12-15 16:49:32 -0500436 logger.Debugw(ctx, "connection-start", log.Fields{"api-endpoint": c.serverEndPoint, "attempts": attempt, "curr-state": c.state, "client": c.clientEndpoint})
khenaidoo26721882021-08-11 17:42:52 -0400437 if c.state == stateConnected {
438 c.state = stateDisconnected
439 }
440 if c.state != stateConnecting {
441 c.state = stateConnecting
442 go func() {
nikesh.krishnanb547c1a2023-03-11 03:05:16 +0530443 var err error
444 if len(retry_interceptor) > 0 {
445 err = c.connectToEndpoint(ctx, p, retry_interceptor...)
446 } else {
447 err = c.connectToEndpoint(ctx, p)
448 }
449
450 if err != nil {
khenaidoo26721882021-08-11 17:42:52 -0400451 c.stateLock.Lock()
452 c.state = stateDisconnected
453 c.stateLock.Unlock()
khenaidoo0927c722021-12-15 16:49:32 -0500454 logger.Errorw(ctx, "connection-failed", log.Fields{"api-endpoint": c.serverEndPoint, "attempt": attempt, "client": c.clientEndpoint, "error": err})
khenaidoo26721882021-08-11 17:42:52 -0400455
456 // Retry connection after a delay
457 if err = backoff.Backoff(ctx); err != nil {
458 // Context has closed or reached maximum elapsed time, if set
khenaidoo0927c722021-12-15 16:49:32 -0500459 logger.Errorw(ctx, "retry-aborted", log.Fields{"api-endpoint": c.serverEndPoint, "client": c.clientEndpoint, "error": err})
khenaidoo26721882021-08-11 17:42:52 -0400460 return
461 }
462 attempt += 1
khenaidoofe90ac32021-11-08 18:17:32 -0500463 c.connectionLock.RLock()
464 if !c.done {
465 c.events <- eventConnecting
466 }
467 c.connectionLock.RUnlock()
khenaidoo26721882021-08-11 17:42:52 -0400468 }
469 }()
470 }
471 c.stateLock.Unlock()
472
khenaidoo0927c722021-12-15 16:49:32 -0500473 case eventValidatingConnection:
474 logger.Debugw(ctx, "connection-validation", log.Fields{"api-endpoint": c.serverEndPoint, "client": c.clientEndpoint})
475 c.stateLock.Lock()
476 if c.state != stateConnected {
477 c.state = stateValidatingConnection
478 }
479 c.stateLock.Unlock()
480 monitorConnectionCtx, monitorConnectionDone = context.WithCancel(context.Background())
481 go c.monitorConnection(monitorConnectionCtx)
482
khenaidoo26721882021-08-11 17:42:52 -0400483 case eventConnected:
khenaidoo26721882021-08-11 17:42:52 -0400484 attempt = 1
khenaidoo0927c722021-12-15 16:49:32 -0500485 backoff.Reset()
khenaidoo26721882021-08-11 17:42:52 -0400486 c.stateLock.Lock()
khenaidoo0927c722021-12-15 16:49:32 -0500487 logger.Debugw(ctx, "endpoint-connected", log.Fields{"api-endpoint": c.serverEndPoint, "curr-state": c.state, "client": c.clientEndpoint})
khenaidoo26721882021-08-11 17:42:52 -0400488 if c.state != stateConnected {
khenaidoo0927c722021-12-15 16:49:32 -0500489 // Setup the service
490 c.connectionLock.RLock()
491 conn := c.connection
492 c.connectionLock.RUnlock()
493
494 subCtx, cancel := context.WithTimeout(ctx, c.backoffMaxInterval)
495 svc := handler(subCtx, conn)
496 if svc != nil {
497 c.service = svc
498 if p != nil {
499 p.UpdateStatus(ctx, c.serverEndPoint, probe.ServiceStatusRunning)
500 }
501 logger.Infow(ctx, "connected-to-endpoint", log.Fields{"api-endpoint": c.serverEndPoint, "client": c.clientEndpoint})
502 } else {
503 // Should never happen, but just in case
504 logger.Warnw(ctx, "service-is-nil", log.Fields{"api-endpoint": c.serverEndPoint, "client": c.clientEndpoint})
505 c.events <- eventDisconnected
506 }
507 cancel()
khenaidoo26721882021-08-11 17:42:52 -0400508 c.state = stateConnected
509 if initialConnection {
khenaidoo0927c722021-12-15 16:49:32 -0500510 logger.Debugw(ctx, "initial-endpoint-connection", log.Fields{"api-endpoint": c.serverEndPoint, "client": c.clientEndpoint})
khenaidoo26721882021-08-11 17:42:52 -0400511 initialConnection = false
512 } else {
khenaidoo0927c722021-12-15 16:49:32 -0500513 logger.Debugw(ctx, "endpoint-reconnection", log.Fields{"api-endpoint": c.serverEndPoint, "client": c.clientEndpoint})
khenaidoo26721882021-08-11 17:42:52 -0400514 // Trigger any callback on a restart
515 go func() {
khenaidoob9503212021-12-08 14:22:21 -0500516 err := c.onRestart(log.WithSpanFromContext(context.Background(), ctx), c.serverEndPoint)
khenaidoo26721882021-08-11 17:42:52 -0400517 if err != nil {
khenaidoo0927c722021-12-15 16:49:32 -0500518 logger.Errorw(ctx, "unable-to-restart-endpoint", log.Fields{"error": err, "api-endpoint": c.serverEndPoint, "client": c.clientEndpoint})
khenaidoo26721882021-08-11 17:42:52 -0400519 }
520 }()
521 }
522 }
523 c.stateLock.Unlock()
524
525 case eventDisconnected:
526 if p != nil {
khenaidoob9503212021-12-08 14:22:21 -0500527 p.UpdateStatus(ctx, c.serverEndPoint, probe.ServiceStatusNotReady)
khenaidoo26721882021-08-11 17:42:52 -0400528 }
khenaidoo0927c722021-12-15 16:49:32 -0500529 connectionValidationFail := false
530 c.stateLock.Lock()
531 logger.Debugw(ctx, "endpoint-disconnected", log.Fields{"api-endpoint": c.serverEndPoint, "curr-state": c.state, "client": c.clientEndpoint})
nikesh.krishnan6228a3d2023-06-10 06:37:05 +0530532 if c.state == stateConnected || c.state == stateValidatingConnection {
khenaidoo0927c722021-12-15 16:49:32 -0500533 connectionValidationFail = true
534 c.state = stateDisconnected
535 }
536 c.stateLock.Unlock()
khenaidoo26721882021-08-11 17:42:52 -0400537
khenaidoo0927c722021-12-15 16:49:32 -0500538 // Stop the streaming connection
539 if monitorConnectionDone != nil {
540 monitorConnectionDone()
541 monitorConnectionDone = nil
542 }
543
544 if connectionValidationFail {
545 // Retry connection after a delay
546 if err := backoff.Backoff(ctx); err != nil {
547 // Context has closed or reached maximum elapsed time, if set
548 logger.Errorw(ctx, "retry-aborted", log.Fields{"api-endpoint": c.serverEndPoint, "client": c.clientEndpoint, "error": err})
549 return
550 }
551 }
552 c.connectionLock.RLock()
553 if !c.done {
nikesh.krishnan6228a3d2023-06-10 06:37:05 +0530554 c.events <- eventValidatingConnection
khenaidoo0927c722021-12-15 16:49:32 -0500555 }
556 c.connectionLock.RUnlock()
khenaidoo26721882021-08-11 17:42:52 -0400557
558 case eventStopped:
khenaidoo0927c722021-12-15 16:49:32 -0500559 logger.Debugw(ctx, "endpoint-stopped", log.Fields{"api-endpoint": c.serverEndPoint, "client": c.clientEndpoint})
560
561 if monitorConnectionDone != nil {
562 monitorConnectionDone()
563 monitorConnectionDone = nil
564 }
565 if err := c.closeConnection(ctx, p); err != nil {
566 logger.Errorw(ctx, "endpoint-closing-connection-failed", log.Fields{"api-endpoint": c.serverEndPoint, "client": c.clientEndpoint, "error": err})
567 }
khenaidoo26721882021-08-11 17:42:52 -0400568 break loop
569 case eventError:
khenaidoo0927c722021-12-15 16:49:32 -0500570 logger.Errorw(ctx, "endpoint-error-event", log.Fields{"api-endpoint": c.serverEndPoint, "client": c.clientEndpoint})
khenaidoo26721882021-08-11 17:42:52 -0400571 default:
khenaidoo0927c722021-12-15 16:49:32 -0500572 logger.Errorw(ctx, "endpoint-unknown-event", log.Fields{"api-endpoint": c.serverEndPoint, "client": c.clientEndpoint, "error": event})
khenaidoo26721882021-08-11 17:42:52 -0400573 }
574 }
575 }
khenaidoo0927c722021-12-15 16:49:32 -0500576
577 // Stop the streaming connection
578 if monitorConnectionDone != nil {
579 logger.Debugw(ctx, "closing-connection-monitoring", log.Fields{"api-endpoint": c.serverEndPoint, "client": c.clientEndpoint})
580 monitorConnectionDone()
581 }
582
583 logger.Infow(ctx, "client-stopped", log.Fields{"api-endpoint": c.serverEndPoint, "client": c.clientEndpoint})
khenaidoo26721882021-08-11 17:42:52 -0400584}
585
nikesh.krishnanb547c1a2023-03-11 03:05:16 +0530586func (c *Client) connectToEndpoint(ctx context.Context, p *probe.Probe, retry_interceptor ...grpc.UnaryClientInterceptor) error {
khenaidoo26721882021-08-11 17:42:52 -0400587 if p != nil {
khenaidoob9503212021-12-08 14:22:21 -0500588 p.UpdateStatus(ctx, c.serverEndPoint, probe.ServiceStatusPreparing)
khenaidoo26721882021-08-11 17:42:52 -0400589 }
590
591 c.connectionLock.Lock()
592 defer c.connectionLock.Unlock()
593
594 if c.connection != nil {
595 _ = c.connection.Close()
596 c.connection = nil
597 }
598
599 c.service = nil
600
601 // Use Interceptors to:
602 // 1. automatically inject
603 // 2. publish Open Tracing Spans by this GRPC Client
604 // 3. detect connection failure on client calls such that the reconnection process can begin
nikesh.krishnanb547c1a2023-03-11 03:05:16 +0530605 interceptor_opts := []grpc.UnaryClientInterceptor{grpc_opentracing.UnaryClientInterceptor(grpc_opentracing.WithTracer(log.ActiveTracerProxy{}))}
606
607 if len(retry_interceptor) > 0 {
608 interceptor_opts = append(interceptor_opts, retry_interceptor...)
609 }
khenaidoob9503212021-12-08 14:22:21 -0500610 conn, err := grpc.Dial(c.serverEndPoint,
khenaidoo26721882021-08-11 17:42:52 -0400611 grpc.WithInsecure(),
612 grpc.WithStreamInterceptor(grpc_middleware.ChainStreamClient(
613 grpc_opentracing.StreamClientInterceptor(grpc_opentracing.WithTracer(log.ActiveTracerProxy{})),
614 )),
nikesh.krishnanb547c1a2023-03-11 03:05:16 +0530615 grpc.WithUnaryInterceptor(grpc_middleware.ChainUnaryClient(interceptor_opts...)),
khenaidoo26721882021-08-11 17:42:52 -0400616 )
617
618 if err == nil {
khenaidoo0927c722021-12-15 16:49:32 -0500619 c.connection = conn
620 c.events <- eventValidatingConnection
621 return nil
622 } else {
623 logger.Warnw(ctx, "no-connection-to-endpoint", log.Fields{"api-endpoint": c.serverEndPoint, "client": c.clientEndpoint, "error": err})
khenaidoo26721882021-08-11 17:42:52 -0400624 }
khenaidoo26721882021-08-11 17:42:52 -0400625
626 if p != nil {
khenaidoob9503212021-12-08 14:22:21 -0500627 p.UpdateStatus(ctx, c.serverEndPoint, probe.ServiceStatusFailed)
khenaidoo26721882021-08-11 17:42:52 -0400628 }
khenaidoo0927c722021-12-15 16:49:32 -0500629 return fmt.Errorf("no connection to api endpoint %s", c.serverEndPoint)
khenaidoo26721882021-08-11 17:42:52 -0400630}
631
632func (c *Client) closeConnection(ctx context.Context, p *probe.Probe) error {
633 if p != nil {
khenaidoob9503212021-12-08 14:22:21 -0500634 p.UpdateStatus(ctx, c.serverEndPoint, probe.ServiceStatusStopped)
khenaidoo26721882021-08-11 17:42:52 -0400635 }
khenaidoo0927c722021-12-15 16:49:32 -0500636 logger.Infow(ctx, "client-closing-connection", log.Fields{"api-endpoint": c.serverEndPoint, "client": c.clientEndpoint})
khenaidoo26721882021-08-11 17:42:52 -0400637
638 c.connectionLock.Lock()
639 defer c.connectionLock.Unlock()
640
641 if c.connection != nil {
642 err := c.connection.Close()
khenaidoo0927c722021-12-15 16:49:32 -0500643 c.service = nil
khenaidoo26721882021-08-11 17:42:52 -0400644 c.connection = nil
645 return err
646 }
647
648 return nil
649}
650
651func (c *Client) Stop(ctx context.Context) {
khenaidoo0927c722021-12-15 16:49:32 -0500652 logger.Infow(ctx, "client-stop-request-event-received", log.Fields{"api-endpoint": c.serverEndPoint, "client": c.clientEndpoint})
khenaidoofe90ac32021-11-08 18:17:32 -0500653 c.connectionLock.Lock()
654 defer c.connectionLock.Unlock()
khenaidoo26721882021-08-11 17:42:52 -0400655 if !c.done {
khenaidoofe90ac32021-11-08 18:17:32 -0500656 c.done = true
khenaidoo26721882021-08-11 17:42:52 -0400657 c.events <- eventStopped
658 close(c.events)
khenaidoo26721882021-08-11 17:42:52 -0400659 }
khenaidoo0927c722021-12-15 16:49:32 -0500660 logger.Infow(ctx, "client-stop-request-event-sent", log.Fields{"api-endpoint": c.serverEndPoint, "client": c.clientEndpoint})
khenaidoo26721882021-08-11 17:42:52 -0400661}
662
663// SetService is used for testing only
664func (c *Client) SetService(srv interface{}) {
665 c.connectionLock.Lock()
666 defer c.connectionLock.Unlock()
667 c.service = srv
668}
669
670func (c *Client) SubscribeForLiveness(callback func(timestamp time.Time)) {
khenaidoo0927c722021-12-15 16:49:32 -0500671 c.livenessLock.Lock()
672 defer c.livenessLock.Unlock()
khenaidoo26721882021-08-11 17:42:52 -0400673 c.livenessCallback = callback
674}