blob: 2cd54a0708a153b53f58b628102d02cf0e96f2f9 [file] [log] [blame]
khenaidoo26721882021-08-11 17:42:52 -04001/*
Joey Armstrong9cdee9f2024-01-03 04:56:14 -05002 * Copyright 2021-2024 Open Networking Foundation (ONF) and the ONF Contributors
khenaidoo26721882021-08-11 17:42:52 -04003 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16package grpc
17
18import (
19 "context"
20 "fmt"
21 "reflect"
22 "strings"
23 "sync"
24 "time"
25
26 grpc_middleware "github.com/grpc-ecosystem/go-grpc-middleware"
27 grpc_opentracing "github.com/grpc-ecosystem/go-grpc-middleware/tracing/opentracing"
khenaidoo0927c722021-12-15 16:49:32 -050028 "github.com/jhump/protoreflect/dynamic/grpcdynamic"
29 "github.com/jhump/protoreflect/grpcreflect"
khenaidoo26721882021-08-11 17:42:52 -040030 "github.com/opencord/voltha-lib-go/v7/pkg/log"
31 "github.com/opencord/voltha-lib-go/v7/pkg/probe"
khenaidoo0927c722021-12-15 16:49:32 -050032 "github.com/opencord/voltha-protos/v5/go/adapter_service"
khenaidoob9503212021-12-08 14:22:21 -050033 "github.com/opencord/voltha-protos/v5/go/common"
khenaidooa5feb8e2021-10-19 17:29:22 -040034 "github.com/opencord/voltha-protos/v5/go/core_service"
35 "github.com/opencord/voltha-protos/v5/go/olt_inter_adapter_service"
36 "github.com/opencord/voltha-protos/v5/go/onu_inter_adapter_service"
khenaidoo26721882021-08-11 17:42:52 -040037 "google.golang.org/grpc"
khenaidoo0927c722021-12-15 16:49:32 -050038 "google.golang.org/grpc/codes"
39 rpb "google.golang.org/grpc/reflection/grpc_reflection_v1alpha"
40 "google.golang.org/grpc/status"
khenaidoo26721882021-08-11 17:42:52 -040041)
42
43type event byte
44type state byte
khenaidoo0927c722021-12-15 16:49:32 -050045type GetServiceClient func(context.Context, *grpc.ClientConn) interface{}
khenaidoo26721882021-08-11 17:42:52 -040046type RestartedHandler func(ctx context.Context, endPoint string) error
47
khenaidoo26721882021-08-11 17:42:52 -040048const (
49 grpcBackoffInitialInterval = "GRPC_BACKOFF_INITIAL_INTERVAL"
50 grpcBackoffMaxInterval = "GRPC_BACKOFF_MAX_INTERVAL"
51 grpcBackoffMaxElapsedTime = "GRPC_BACKOFF_MAX_ELAPSED_TIME"
52 grpcMonitorInterval = "GRPC_MONITOR_INTERVAL"
53)
54
55const (
56 DefaultBackoffInitialInterval = 100 * time.Millisecond
57 DefaultBackoffMaxInterval = 5 * time.Second
58 DefaultBackoffMaxElapsedTime = 0 * time.Second // No time limit
59 DefaultGRPCMonitorInterval = 5 * time.Second
60)
61
62const (
abhay116c4d42025-03-21 00:35:07 +053063 // [VOL-5434] Setting max receive message size to 20 MB,
64 // Default value of 'defaultServerMaxReceiveMessageSize' is 4 MB
65 grpcRecvMsgSizeLimit = 20
66)
67
68const (
khenaidoo26721882021-08-11 17:42:52 -040069 eventConnecting = event(iota)
khenaidoo0927c722021-12-15 16:49:32 -050070 eventValidatingConnection
khenaidoo26721882021-08-11 17:42:52 -040071 eventConnected
72 eventDisconnected
73 eventStopped
74 eventError
75
76 stateConnected = state(iota)
khenaidoo0927c722021-12-15 16:49:32 -050077 stateValidatingConnection
khenaidoo26721882021-08-11 17:42:52 -040078 stateConnecting
79 stateDisconnected
80)
81
82type Client struct {
khenaidoob9503212021-12-08 14:22:21 -050083 clientEndpoint string
khenaidoo0927c722021-12-15 16:49:32 -050084 clientContextData string
khenaidoob9503212021-12-08 14:22:21 -050085 serverEndPoint string
khenaidoo0927c722021-12-15 16:49:32 -050086 remoteServiceName string
khenaidoo26721882021-08-11 17:42:52 -040087 connection *grpc.ClientConn
88 connectionLock sync.RWMutex
89 stateLock sync.RWMutex
90 state state
91 service interface{}
92 events chan event
93 onRestart RestartedHandler
94 backoffInitialInterval time.Duration
95 backoffMaxInterval time.Duration
96 backoffMaxElapsedTime time.Duration
khenaidoo26721882021-08-11 17:42:52 -040097 monitorInterval time.Duration
khenaidoo26721882021-08-11 17:42:52 -040098 done bool
khenaidoo0927c722021-12-15 16:49:32 -050099 livenessLock sync.RWMutex
khenaidoo26721882021-08-11 17:42:52 -0400100 livenessCallback func(timestamp time.Time)
101}
102
103type ClientOption func(*Client)
104
khenaidoo0927c722021-12-15 16:49:32 -0500105func ClientContextData(data string) ClientOption {
106 return func(args *Client) {
107 args.clientContextData = data
108 }
109}
110
111func NewClient(clientEndpoint, serverEndpoint, remoteServiceName string, onRestart RestartedHandler,
112 opts ...ClientOption) (*Client, error) {
khenaidoo26721882021-08-11 17:42:52 -0400113 c := &Client{
khenaidoob9503212021-12-08 14:22:21 -0500114 clientEndpoint: clientEndpoint,
115 serverEndPoint: serverEndpoint,
khenaidoo0927c722021-12-15 16:49:32 -0500116 remoteServiceName: remoteServiceName,
khenaidoo26721882021-08-11 17:42:52 -0400117 onRestart: onRestart,
khenaidoo0927c722021-12-15 16:49:32 -0500118 events: make(chan event, 5),
khenaidoo26721882021-08-11 17:42:52 -0400119 state: stateDisconnected,
120 backoffInitialInterval: DefaultBackoffInitialInterval,
121 backoffMaxInterval: DefaultBackoffMaxInterval,
122 backoffMaxElapsedTime: DefaultBackoffMaxElapsedTime,
123 monitorInterval: DefaultGRPCMonitorInterval,
124 }
125 for _, option := range opts {
126 option(c)
127 }
128
129 // Check for environment variables
130 if err := SetFromEnvVariable(grpcBackoffInitialInterval, &c.backoffInitialInterval); err != nil {
131 logger.Warnw(context.Background(), "failure-reading-env-variable", log.Fields{"error": err, "variable": grpcBackoffInitialInterval})
132 }
133
134 if err := SetFromEnvVariable(grpcBackoffMaxInterval, &c.backoffMaxInterval); err != nil {
135 logger.Warnw(context.Background(), "failure-reading-env-variable", log.Fields{"error": err, "variable": grpcBackoffMaxInterval})
136 }
137
138 if err := SetFromEnvVariable(grpcBackoffMaxElapsedTime, &c.backoffMaxElapsedTime); err != nil {
139 logger.Warnw(context.Background(), "failure-reading-env-variable", log.Fields{"error": err, "variable": grpcBackoffMaxElapsedTime})
140 }
141
142 if err := SetFromEnvVariable(grpcMonitorInterval, &c.monitorInterval); err != nil {
143 logger.Warnw(context.Background(), "failure-reading-env-variable", log.Fields{"error": err, "variable": grpcMonitorInterval})
144 }
145
146 logger.Infow(context.Background(), "initialized-client", log.Fields{"client": c})
147
148 // Sanity check
149 if c.backoffInitialInterval > c.backoffMaxInterval {
150 return nil, fmt.Errorf("initial retry delay %v is greater than maximum retry delay %v", c.backoffInitialInterval, c.backoffMaxInterval)
151 }
152
khenaidoo0927c722021-12-15 16:49:32 -0500153 grpc.EnableTracing = true
154
khenaidoo26721882021-08-11 17:42:52 -0400155 return c, nil
156}
157
158func (c *Client) GetClient() (interface{}, error) {
159 c.connectionLock.RLock()
160 defer c.connectionLock.RUnlock()
161 if c.service == nil {
khenaidoob9503212021-12-08 14:22:21 -0500162 return nil, fmt.Errorf("no connection to %s", c.serverEndPoint)
khenaidoo26721882021-08-11 17:42:52 -0400163 }
164 return c.service, nil
165}
166
167// GetCoreServiceClient is a helper function that returns a concrete service instead of the GetClient() API
168// which returns an interface
khenaidooa5feb8e2021-10-19 17:29:22 -0400169func (c *Client) GetCoreServiceClient() (core_service.CoreServiceClient, error) {
khenaidoo26721882021-08-11 17:42:52 -0400170 c.connectionLock.RLock()
171 defer c.connectionLock.RUnlock()
172 if c.service == nil {
khenaidoob9503212021-12-08 14:22:21 -0500173 return nil, fmt.Errorf("no core connection to %s", c.serverEndPoint)
khenaidoo26721882021-08-11 17:42:52 -0400174 }
khenaidooa5feb8e2021-10-19 17:29:22 -0400175 client, ok := c.service.(core_service.CoreServiceClient)
khenaidoo26721882021-08-11 17:42:52 -0400176 if ok {
177 return client, nil
178 }
179 return nil, fmt.Errorf("invalid-service-%s", reflect.TypeOf(c.service))
180}
181
182// GetOnuAdapterServiceClient is a helper function that returns a concrete service instead of the GetClient() API
183// which returns an interface
khenaidooa5feb8e2021-10-19 17:29:22 -0400184func (c *Client) GetOnuInterAdapterServiceClient() (onu_inter_adapter_service.OnuInterAdapterServiceClient, error) {
khenaidoo26721882021-08-11 17:42:52 -0400185 c.connectionLock.RLock()
186 defer c.connectionLock.RUnlock()
187 if c.service == nil {
khenaidoob9503212021-12-08 14:22:21 -0500188 return nil, fmt.Errorf("no child adapter connection to %s", c.serverEndPoint)
khenaidoo26721882021-08-11 17:42:52 -0400189 }
khenaidooa5feb8e2021-10-19 17:29:22 -0400190 client, ok := c.service.(onu_inter_adapter_service.OnuInterAdapterServiceClient)
khenaidoo26721882021-08-11 17:42:52 -0400191 if ok {
192 return client, nil
193 }
194 return nil, fmt.Errorf("invalid-service-%s", reflect.TypeOf(c.service))
195}
196
197// GetOltAdapterServiceClient is a helper function that returns a concrete service instead of the GetClient() API
198// which returns an interface
khenaidooa5feb8e2021-10-19 17:29:22 -0400199func (c *Client) GetOltInterAdapterServiceClient() (olt_inter_adapter_service.OltInterAdapterServiceClient, error) {
khenaidoo26721882021-08-11 17:42:52 -0400200 c.connectionLock.RLock()
201 defer c.connectionLock.RUnlock()
202 if c.service == nil {
khenaidoob9503212021-12-08 14:22:21 -0500203 return nil, fmt.Errorf("no parent adapter connection to %s", c.serverEndPoint)
khenaidoo26721882021-08-11 17:42:52 -0400204 }
khenaidooa5feb8e2021-10-19 17:29:22 -0400205 client, ok := c.service.(olt_inter_adapter_service.OltInterAdapterServiceClient)
khenaidoo26721882021-08-11 17:42:52 -0400206 if ok {
207 return client, nil
208 }
209 return nil, fmt.Errorf("invalid-service-%s", reflect.TypeOf(c.service))
210}
211
khenaidoo0927c722021-12-15 16:49:32 -0500212// GetAdapterServiceClient is a helper function that returns a concrete service instead of the GetClient() API
213// which returns an interface
214func (c *Client) GetAdapterServiceClient() (adapter_service.AdapterServiceClient, error) {
215 c.connectionLock.RLock()
216 defer c.connectionLock.RUnlock()
217 if c.service == nil {
218 return nil, fmt.Errorf("no adapter service connection to %s", c.serverEndPoint)
219 }
220 client, ok := c.service.(adapter_service.AdapterServiceClient)
221 if ok {
222 return client, nil
223 }
224 return nil, fmt.Errorf("invalid-service-%s", reflect.TypeOf(c.service))
225}
226
khenaidoo26721882021-08-11 17:42:52 -0400227func (c *Client) Reset(ctx context.Context) {
khenaidoo0927c722021-12-15 16:49:32 -0500228 logger.Debugw(ctx, "resetting-client-connection", log.Fields{"api-endpoint": c.serverEndPoint, "client": c.clientEndpoint})
khenaidoo26721882021-08-11 17:42:52 -0400229 c.stateLock.Lock()
230 defer c.stateLock.Unlock()
231 if c.state == stateConnected {
232 c.state = stateDisconnected
233 c.events <- eventDisconnected
234 }
235}
236
khenaidoo0927c722021-12-15 16:49:32 -0500237// executeWithTimeout runs a sending function (sf) along with a receiving one(rf) and returns an error, if any.
nikesh.krishnanb547c1a2023-03-11 03:05:16 +0530238// If the deadline elapses first, it returns a grpc DeadlineExceeded error instead.
khenaidoo0927c722021-12-15 16:49:32 -0500239func (c *Client) executeWithTimeout(sf func(*common.Connection) error, rf func() (interface{}, error), conn *common.Connection, d time.Duration) error {
240 errChan := make(chan error, 1)
241 go func() {
242 err := sf(conn)
243 logger.Debugw(context.Background(), "message-sent", log.Fields{"error": err, "qpi-endpoint": c.serverEndPoint, "client": c.clientEndpoint})
244 if err == nil {
245 response, err := rf()
246 logger.Debugw(context.Background(), "message-received", log.Fields{"error": err, "qpi-endpoint": c.serverEndPoint, "client": c.clientEndpoint, "health": response})
247 }
248 errChan <- err
249 close(errChan)
250 }()
251 t := time.NewTimer(d)
252 select {
253 case <-t.C:
254 return status.Errorf(codes.DeadlineExceeded, "timeout-on-sending-message")
255 case err := <-errChan:
256 if !t.Stop() {
257 <-t.C
khenaidoo26721882021-08-11 17:42:52 -0400258 }
259 return err
260 }
khenaidoo26721882021-08-11 17:42:52 -0400261}
262
khenaidoo0927c722021-12-15 16:49:32 -0500263func (c *Client) monitorConnection(ctx context.Context) {
264 logger.Debugw(ctx, "monitor-connection-started", log.Fields{"qpi-endpoint": c.serverEndPoint, "client": c.clientEndpoint})
khenaidoo26721882021-08-11 17:42:52 -0400265
khenaidoo0927c722021-12-15 16:49:32 -0500266 // If we exit, assume disconnected
267 defer func() {
268 c.stateLock.Lock()
269 if !c.done && (c.state == stateConnected || c.state == stateValidatingConnection) {
270 // Handle only connected state here. We need the validating state to know if we need to backoff before a retry
Sridhar Ravindra729e4b02025-02-10 16:41:14 +0530271 if c.state == stateConnected {
272 c.state = stateDisconnected
273 }
khenaidoo0927c722021-12-15 16:49:32 -0500274 logger.Warnw(ctx, "sending-disconnect-event", log.Fields{"api-endpoint": c.serverEndPoint, "client": c.clientEndpoint, "curr-state": stateConnected, "new-state": c.state})
275 c.events <- eventDisconnected
276 } else {
277 logger.Debugw(ctx, "no-state-change-needed", log.Fields{"api-endpoint": c.serverEndPoint, "client": c.clientEndpoint, "state": c.state, "client-done": c.done})
khenaidoo26721882021-08-11 17:42:52 -0400278 }
khenaidoo0927c722021-12-15 16:49:32 -0500279 c.stateLock.Unlock()
280 logger.Debugw(ctx, "monitor-connection-ended", log.Fields{"api-endpoint": c.serverEndPoint, "client": c.clientEndpoint})
281 }()
282
283 c.connectionLock.RLock()
284 conn := c.connection
285 c.connectionLock.RUnlock()
286 if conn == nil {
287 logger.Errorw(ctx, "connection-nil", log.Fields{"api-endpoint": c.serverEndPoint, "client": c.clientEndpoint})
288 return
khenaidoo26721882021-08-11 17:42:52 -0400289 }
khenaidoo26721882021-08-11 17:42:52 -0400290
khenaidoo0927c722021-12-15 16:49:32 -0500291 // Get a new client using reflection. The server can implement any grpc service, but it
292 // needs to also implement the "StartKeepAliveStream" API
293 grpcReflectClient := grpcreflect.NewClient(ctx, rpb.NewServerReflectionClient(conn))
294 if grpcReflectClient == nil {
295 logger.Errorw(ctx, "grpc-reflect-client-nil", log.Fields{"api-endpoint": c.serverEndPoint, "client": c.clientEndpoint})
296 return
khenaidoo26721882021-08-11 17:42:52 -0400297 }
khenaidoo26721882021-08-11 17:42:52 -0400298
khenaidoo0927c722021-12-15 16:49:32 -0500299 // Get the list of services - there should be 2 services: a server reflection and the voltha service we are interested in
300 services, err := grpcReflectClient.ListServices()
301 if err != nil {
302 logger.Errorw(ctx, "list-services-error", log.Fields{"api-endpoint": c.serverEndPoint, "client": c.clientEndpoint, "error": err})
303 return
304 }
khenaidoo26721882021-08-11 17:42:52 -0400305
khenaidoo0927c722021-12-15 16:49:32 -0500306 // Filter out the service
307 logger.Debugw(ctx, "services", log.Fields{"services": services})
308 serviceOfInterest := ""
309 for _, service := range services {
310 if strings.EqualFold(service, c.remoteServiceName) {
311 serviceOfInterest = service
312 break
313 }
314 }
315 if serviceOfInterest == "" {
316 logger.Errorw(ctx, "no-service-found", log.Fields{"api-endpoint": c.serverEndPoint, "client": c.clientEndpoint, "services": services, "expected-remote-service": c.remoteServiceName})
317 return
318 }
khenaidooaa290962021-10-22 18:14:33 -0400319
khenaidoo0927c722021-12-15 16:49:32 -0500320 // Resolve the service
321 resolvedService, err := grpcReflectClient.ResolveService(serviceOfInterest)
322 if err != nil {
323 logger.Errorw(ctx, "service-error", log.Fields{"api-endpoint": c.serverEndPoint, "client": c.clientEndpoint, "service": resolvedService, "error": err})
324 return
325 }
326
327 // Find the method of interest
328 method := resolvedService.FindMethodByName("GetHealthStatus")
329 if method == nil {
330 logger.Errorw(ctx, "nil-method", log.Fields{"api-endpoint": c.serverEndPoint, "client": c.clientEndpoint, "service": resolvedService})
331 return
332 }
333 logger.Debugw(ctx, "resolved-to-method", log.Fields{"service": resolvedService.GetName(), "method": method.GetName()})
334
335 // Get a dynamic connection
336 dynamicConn := grpcdynamic.NewStub(conn)
337
338 // Get the stream and send this client information
339 streamCtx, streamDone := context.WithCancel(log.WithSpanFromContext(context.Background(), ctx))
340 defer streamDone()
341 stream, err := dynamicConn.InvokeRpcBidiStream(streamCtx, method)
342 if err != nil {
343 logger.Errorw(ctx, "stream-error", log.Fields{"api-endpoint": c.serverEndPoint, "client": c.clientEndpoint, "service": resolvedService, "error": err})
344 return
345 }
346
347 clientInfo := &common.Connection{
348 Endpoint: c.clientEndpoint,
349 ContextInfo: c.clientContextData,
350 KeepAliveInterval: int64(c.monitorInterval),
351 }
352
353 initialConnection := true
khenaidoo26721882021-08-11 17:42:52 -0400354loop:
355 for {
khenaidoo0927c722021-12-15 16:49:32 -0500356 // Let's send a keep alive message with our info
357 err := c.executeWithTimeout(
358 func(conn *common.Connection) error { return stream.SendMsg(conn) },
359 func() (interface{}, error) { return stream.RecvMsg() },
360 clientInfo,
361 c.monitorInterval)
khenaidoo26721882021-08-11 17:42:52 -0400362
khenaidoo0927c722021-12-15 16:49:32 -0500363 if err != nil {
364 // Any error means the far end is gone
365 logger.Errorw(ctx, "sending-stream-error", log.Fields{"error": err, "api-endpoint": c.serverEndPoint, "client": c.clientEndpoint, "context": stream.Context().Err()})
khenaidoo26721882021-08-11 17:42:52 -0400366 break loop
khenaidoo0927c722021-12-15 16:49:32 -0500367 }
368 // Send a connect event
369 if initialConnection {
370 logger.Debugw(ctx, "first-stream-data-sent", log.Fields{"api-endpoint": c.serverEndPoint, "client": c.clientEndpoint})
371 c.events <- eventConnected
372 initialConnection = false
373 }
374 logger.Debugw(ctx, "stream-data-sent", log.Fields{"api-endpoint": c.serverEndPoint, "client": c.clientEndpoint})
375 // Update liveness, if configured
376 c.livenessLock.RLock()
377 if c.livenessCallback != nil {
378 go c.livenessCallback(time.Now())
379 }
380 c.livenessLock.RUnlock()
khenaidoo26721882021-08-11 17:42:52 -0400381
khenaidoo0927c722021-12-15 16:49:32 -0500382 // Wait to send the next keep alive
383 keepAliveTimer := time.NewTimer(time.Duration(clientInfo.KeepAliveInterval))
384 select {
385 case <-ctx.Done():
386 logger.Warnw(ctx, "context-done", log.Fields{"api-endpont": c.serverEndPoint, "client": c.clientEndpoint})
387 break loop
388 case <-stream.Context().Done():
389 logger.Debugw(ctx, "stream-context-done", log.Fields{"api-endpoint": c.serverEndPoint, "stream-info": stream.Context(), "client": c.clientEndpoint})
390 break loop
391 case <-keepAliveTimer.C:
392 continue
khenaidoo26721882021-08-11 17:42:52 -0400393 }
394 }
khenaidoo0927c722021-12-15 16:49:32 -0500395 if stream != nil {
396 if err := stream.CloseSend(); err != nil {
397 logger.Warnw(ctx, "closing-stream-error", log.Fields{"error": err, "api-endpoint": c.serverEndPoint, "client": c.clientEndpoint})
398 }
399 }
khenaidoo26721882021-08-11 17:42:52 -0400400}
401
402// Start kicks off the adapter agent by trying to connect to the adapter
nikesh.krishnanb547c1a2023-03-11 03:05:16 +0530403func (c *Client) Start(ctx context.Context, handler GetServiceClient, retry_interceptor ...grpc.UnaryClientInterceptor) {
khenaidoob9503212021-12-08 14:22:21 -0500404 logger.Debugw(ctx, "Starting GRPC - Client", log.Fields{"api-endpoint": c.serverEndPoint})
khenaidoo26721882021-08-11 17:42:52 -0400405
406 // If the context contains a k8s probe then register services
407 p := probe.GetProbeFromContext(ctx)
408 if p != nil {
khenaidoob9503212021-12-08 14:22:21 -0500409 p.RegisterService(ctx, c.serverEndPoint)
khenaidoo26721882021-08-11 17:42:52 -0400410 }
411
khenaidoo0927c722021-12-15 16:49:32 -0500412 var monitorConnectionCtx context.Context
413 var monitorConnectionDone func()
khenaidoo26721882021-08-11 17:42:52 -0400414
415 initialConnection := true
416 c.events <- eventConnecting
417 backoff := NewBackoff(c.backoffInitialInterval, c.backoffMaxInterval, c.backoffMaxElapsedTime)
418 attempt := 1
419loop:
420 for {
421 select {
422 case <-ctx.Done():
khenaidoo0927c722021-12-15 16:49:32 -0500423 logger.Warnw(ctx, "context-closing", log.Fields{"api_endpoint": c.serverEndPoint, "client": c.clientEndpoint, "context": ctx})
424 c.connectionLock.Lock()
425 if !c.done {
426 c.done = true
427 c.events <- eventStopped
428 close(c.events)
429 }
430 c.connectionLock.Unlock()
431 // break loop
khenaidoo26721882021-08-11 17:42:52 -0400432 case event := <-c.events:
khenaidoo0927c722021-12-15 16:49:32 -0500433 logger.Debugw(ctx, "received-event", log.Fields{"event": event, "api-endpoint": c.serverEndPoint, "client": c.clientEndpoint})
khenaidoofe90ac32021-11-08 18:17:32 -0500434 c.connectionLock.RLock()
435 // On a client stopped, just allow the stop event to go through
436 if c.done && event != eventStopped {
437 c.connectionLock.RUnlock()
khenaidoo0927c722021-12-15 16:49:32 -0500438 logger.Debugw(ctx, "ignoring-event-on-client-stop", log.Fields{"event": event, "api-endpoint": c.serverEndPoint, "client": c.clientEndpoint})
khenaidoofe90ac32021-11-08 18:17:32 -0500439 continue
440 }
441 c.connectionLock.RUnlock()
khenaidoo26721882021-08-11 17:42:52 -0400442 switch event {
443 case eventConnecting:
khenaidoo26721882021-08-11 17:42:52 -0400444 c.stateLock.Lock()
khenaidoo0927c722021-12-15 16:49:32 -0500445 logger.Debugw(ctx, "connection-start", log.Fields{"api-endpoint": c.serverEndPoint, "attempts": attempt, "curr-state": c.state, "client": c.clientEndpoint})
khenaidoo26721882021-08-11 17:42:52 -0400446 if c.state == stateConnected {
447 c.state = stateDisconnected
448 }
449 if c.state != stateConnecting {
450 c.state = stateConnecting
451 go func() {
nikesh.krishnanb547c1a2023-03-11 03:05:16 +0530452 var err error
453 if len(retry_interceptor) > 0 {
454 err = c.connectToEndpoint(ctx, p, retry_interceptor...)
455 } else {
456 err = c.connectToEndpoint(ctx, p)
457 }
458
459 if err != nil {
khenaidoo26721882021-08-11 17:42:52 -0400460 c.stateLock.Lock()
461 c.state = stateDisconnected
462 c.stateLock.Unlock()
khenaidoo0927c722021-12-15 16:49:32 -0500463 logger.Errorw(ctx, "connection-failed", log.Fields{"api-endpoint": c.serverEndPoint, "attempt": attempt, "client": c.clientEndpoint, "error": err})
khenaidoo26721882021-08-11 17:42:52 -0400464
465 // Retry connection after a delay
466 if err = backoff.Backoff(ctx); err != nil {
467 // Context has closed or reached maximum elapsed time, if set
khenaidoo0927c722021-12-15 16:49:32 -0500468 logger.Errorw(ctx, "retry-aborted", log.Fields{"api-endpoint": c.serverEndPoint, "client": c.clientEndpoint, "error": err})
khenaidoo26721882021-08-11 17:42:52 -0400469 return
470 }
471 attempt += 1
khenaidoofe90ac32021-11-08 18:17:32 -0500472 c.connectionLock.RLock()
473 if !c.done {
474 c.events <- eventConnecting
475 }
476 c.connectionLock.RUnlock()
khenaidoo26721882021-08-11 17:42:52 -0400477 }
478 }()
479 }
480 c.stateLock.Unlock()
481
khenaidoo0927c722021-12-15 16:49:32 -0500482 case eventValidatingConnection:
483 logger.Debugw(ctx, "connection-validation", log.Fields{"api-endpoint": c.serverEndPoint, "client": c.clientEndpoint})
484 c.stateLock.Lock()
485 if c.state != stateConnected {
486 c.state = stateValidatingConnection
487 }
488 c.stateLock.Unlock()
489 monitorConnectionCtx, monitorConnectionDone = context.WithCancel(context.Background())
490 go c.monitorConnection(monitorConnectionCtx)
491
khenaidoo26721882021-08-11 17:42:52 -0400492 case eventConnected:
khenaidoo26721882021-08-11 17:42:52 -0400493 attempt = 1
khenaidoo0927c722021-12-15 16:49:32 -0500494 backoff.Reset()
khenaidoo26721882021-08-11 17:42:52 -0400495 c.stateLock.Lock()
khenaidoo0927c722021-12-15 16:49:32 -0500496 logger.Debugw(ctx, "endpoint-connected", log.Fields{"api-endpoint": c.serverEndPoint, "curr-state": c.state, "client": c.clientEndpoint})
khenaidoo26721882021-08-11 17:42:52 -0400497 if c.state != stateConnected {
khenaidoo0927c722021-12-15 16:49:32 -0500498 // Setup the service
499 c.connectionLock.RLock()
500 conn := c.connection
501 c.connectionLock.RUnlock()
502
503 subCtx, cancel := context.WithTimeout(ctx, c.backoffMaxInterval)
504 svc := handler(subCtx, conn)
505 if svc != nil {
506 c.service = svc
507 if p != nil {
508 p.UpdateStatus(ctx, c.serverEndPoint, probe.ServiceStatusRunning)
509 }
510 logger.Infow(ctx, "connected-to-endpoint", log.Fields{"api-endpoint": c.serverEndPoint, "client": c.clientEndpoint})
511 } else {
512 // Should never happen, but just in case
513 logger.Warnw(ctx, "service-is-nil", log.Fields{"api-endpoint": c.serverEndPoint, "client": c.clientEndpoint})
514 c.events <- eventDisconnected
515 }
516 cancel()
khenaidoo26721882021-08-11 17:42:52 -0400517 c.state = stateConnected
518 if initialConnection {
khenaidoo0927c722021-12-15 16:49:32 -0500519 logger.Debugw(ctx, "initial-endpoint-connection", log.Fields{"api-endpoint": c.serverEndPoint, "client": c.clientEndpoint})
khenaidoo26721882021-08-11 17:42:52 -0400520 initialConnection = false
521 } else {
khenaidoo0927c722021-12-15 16:49:32 -0500522 logger.Debugw(ctx, "endpoint-reconnection", log.Fields{"api-endpoint": c.serverEndPoint, "client": c.clientEndpoint})
khenaidoo26721882021-08-11 17:42:52 -0400523 // Trigger any callback on a restart
524 go func() {
khenaidoob9503212021-12-08 14:22:21 -0500525 err := c.onRestart(log.WithSpanFromContext(context.Background(), ctx), c.serverEndPoint)
khenaidoo26721882021-08-11 17:42:52 -0400526 if err != nil {
khenaidoo0927c722021-12-15 16:49:32 -0500527 logger.Errorw(ctx, "unable-to-restart-endpoint", log.Fields{"error": err, "api-endpoint": c.serverEndPoint, "client": c.clientEndpoint})
khenaidoo26721882021-08-11 17:42:52 -0400528 }
529 }()
530 }
531 }
532 c.stateLock.Unlock()
533
534 case eventDisconnected:
535 if p != nil {
khenaidoob9503212021-12-08 14:22:21 -0500536 p.UpdateStatus(ctx, c.serverEndPoint, probe.ServiceStatusNotReady)
khenaidoo26721882021-08-11 17:42:52 -0400537 }
khenaidoo0927c722021-12-15 16:49:32 -0500538 connectionValidationFail := false
539 c.stateLock.Lock()
540 logger.Debugw(ctx, "endpoint-disconnected", log.Fields{"api-endpoint": c.serverEndPoint, "curr-state": c.state, "client": c.clientEndpoint})
Sridhar Ravindra729e4b02025-02-10 16:41:14 +0530541 if c.state == stateValidatingConnection {
khenaidoo0927c722021-12-15 16:49:32 -0500542 connectionValidationFail = true
543 c.state = stateDisconnected
544 }
545 c.stateLock.Unlock()
khenaidoo26721882021-08-11 17:42:52 -0400546
khenaidoo0927c722021-12-15 16:49:32 -0500547 // Stop the streaming connection
548 if monitorConnectionDone != nil {
549 monitorConnectionDone()
550 monitorConnectionDone = nil
551 }
552
553 if connectionValidationFail {
554 // Retry connection after a delay
555 if err := backoff.Backoff(ctx); err != nil {
556 // Context has closed or reached maximum elapsed time, if set
557 logger.Errorw(ctx, "retry-aborted", log.Fields{"api-endpoint": c.serverEndPoint, "client": c.clientEndpoint, "error": err})
558 return
559 }
560 }
561 c.connectionLock.RLock()
562 if !c.done {
Sridhar Ravindra729e4b02025-02-10 16:41:14 +0530563 c.events <- eventConnecting
khenaidoo0927c722021-12-15 16:49:32 -0500564 }
565 c.connectionLock.RUnlock()
khenaidoo26721882021-08-11 17:42:52 -0400566
567 case eventStopped:
khenaidoo0927c722021-12-15 16:49:32 -0500568 logger.Debugw(ctx, "endpoint-stopped", log.Fields{"api-endpoint": c.serverEndPoint, "client": c.clientEndpoint})
569
570 if monitorConnectionDone != nil {
571 monitorConnectionDone()
572 monitorConnectionDone = nil
573 }
574 if err := c.closeConnection(ctx, p); err != nil {
575 logger.Errorw(ctx, "endpoint-closing-connection-failed", log.Fields{"api-endpoint": c.serverEndPoint, "client": c.clientEndpoint, "error": err})
576 }
khenaidoo26721882021-08-11 17:42:52 -0400577 break loop
578 case eventError:
khenaidoo0927c722021-12-15 16:49:32 -0500579 logger.Errorw(ctx, "endpoint-error-event", log.Fields{"api-endpoint": c.serverEndPoint, "client": c.clientEndpoint})
khenaidoo26721882021-08-11 17:42:52 -0400580 default:
khenaidoo0927c722021-12-15 16:49:32 -0500581 logger.Errorw(ctx, "endpoint-unknown-event", log.Fields{"api-endpoint": c.serverEndPoint, "client": c.clientEndpoint, "error": event})
khenaidoo26721882021-08-11 17:42:52 -0400582 }
583 }
584 }
khenaidoo0927c722021-12-15 16:49:32 -0500585
586 // Stop the streaming connection
587 if monitorConnectionDone != nil {
588 logger.Debugw(ctx, "closing-connection-monitoring", log.Fields{"api-endpoint": c.serverEndPoint, "client": c.clientEndpoint})
589 monitorConnectionDone()
590 }
591
592 logger.Infow(ctx, "client-stopped", log.Fields{"api-endpoint": c.serverEndPoint, "client": c.clientEndpoint})
khenaidoo26721882021-08-11 17:42:52 -0400593}
594
nikesh.krishnanb547c1a2023-03-11 03:05:16 +0530595func (c *Client) connectToEndpoint(ctx context.Context, p *probe.Probe, retry_interceptor ...grpc.UnaryClientInterceptor) error {
khenaidoo26721882021-08-11 17:42:52 -0400596 if p != nil {
khenaidoob9503212021-12-08 14:22:21 -0500597 p.UpdateStatus(ctx, c.serverEndPoint, probe.ServiceStatusPreparing)
khenaidoo26721882021-08-11 17:42:52 -0400598 }
599
600 c.connectionLock.Lock()
601 defer c.connectionLock.Unlock()
602
603 if c.connection != nil {
604 _ = c.connection.Close()
605 c.connection = nil
606 }
607
608 c.service = nil
609
610 // Use Interceptors to:
611 // 1. automatically inject
612 // 2. publish Open Tracing Spans by this GRPC Client
613 // 3. detect connection failure on client calls such that the reconnection process can begin
nikesh.krishnanb547c1a2023-03-11 03:05:16 +0530614 interceptor_opts := []grpc.UnaryClientInterceptor{grpc_opentracing.UnaryClientInterceptor(grpc_opentracing.WithTracer(log.ActiveTracerProxy{}))}
615
616 if len(retry_interceptor) > 0 {
617 interceptor_opts = append(interceptor_opts, retry_interceptor...)
618 }
khenaidoob9503212021-12-08 14:22:21 -0500619 conn, err := grpc.Dial(c.serverEndPoint,
khenaidoo26721882021-08-11 17:42:52 -0400620 grpc.WithInsecure(),
abhay116c4d42025-03-21 00:35:07 +0530621 grpc.WithDefaultCallOptions(grpc.MaxCallRecvMsgSize(grpcRecvMsgSizeLimit*1024*1024)),
khenaidoo26721882021-08-11 17:42:52 -0400622 grpc.WithStreamInterceptor(grpc_middleware.ChainStreamClient(
623 grpc_opentracing.StreamClientInterceptor(grpc_opentracing.WithTracer(log.ActiveTracerProxy{})),
624 )),
nikesh.krishnanb547c1a2023-03-11 03:05:16 +0530625 grpc.WithUnaryInterceptor(grpc_middleware.ChainUnaryClient(interceptor_opts...)),
khenaidoo26721882021-08-11 17:42:52 -0400626 )
627
628 if err == nil {
khenaidoo0927c722021-12-15 16:49:32 -0500629 c.connection = conn
630 c.events <- eventValidatingConnection
631 return nil
632 } else {
633 logger.Warnw(ctx, "no-connection-to-endpoint", log.Fields{"api-endpoint": c.serverEndPoint, "client": c.clientEndpoint, "error": err})
khenaidoo26721882021-08-11 17:42:52 -0400634 }
khenaidoo26721882021-08-11 17:42:52 -0400635
636 if p != nil {
khenaidoob9503212021-12-08 14:22:21 -0500637 p.UpdateStatus(ctx, c.serverEndPoint, probe.ServiceStatusFailed)
khenaidoo26721882021-08-11 17:42:52 -0400638 }
khenaidoo0927c722021-12-15 16:49:32 -0500639 return fmt.Errorf("no connection to api endpoint %s", c.serverEndPoint)
khenaidoo26721882021-08-11 17:42:52 -0400640}
641
642func (c *Client) closeConnection(ctx context.Context, p *probe.Probe) error {
643 if p != nil {
khenaidoob9503212021-12-08 14:22:21 -0500644 p.UpdateStatus(ctx, c.serverEndPoint, probe.ServiceStatusStopped)
khenaidoo26721882021-08-11 17:42:52 -0400645 }
khenaidoo0927c722021-12-15 16:49:32 -0500646 logger.Infow(ctx, "client-closing-connection", log.Fields{"api-endpoint": c.serverEndPoint, "client": c.clientEndpoint})
khenaidoo26721882021-08-11 17:42:52 -0400647
648 c.connectionLock.Lock()
649 defer c.connectionLock.Unlock()
650
651 if c.connection != nil {
652 err := c.connection.Close()
khenaidoo0927c722021-12-15 16:49:32 -0500653 c.service = nil
khenaidoo26721882021-08-11 17:42:52 -0400654 c.connection = nil
655 return err
656 }
657
658 return nil
659}
660
661func (c *Client) Stop(ctx context.Context) {
khenaidoo0927c722021-12-15 16:49:32 -0500662 logger.Infow(ctx, "client-stop-request-event-received", log.Fields{"api-endpoint": c.serverEndPoint, "client": c.clientEndpoint})
khenaidoofe90ac32021-11-08 18:17:32 -0500663 c.connectionLock.Lock()
664 defer c.connectionLock.Unlock()
khenaidoo26721882021-08-11 17:42:52 -0400665 if !c.done {
khenaidoofe90ac32021-11-08 18:17:32 -0500666 c.done = true
khenaidoo26721882021-08-11 17:42:52 -0400667 c.events <- eventStopped
668 close(c.events)
khenaidoo26721882021-08-11 17:42:52 -0400669 }
khenaidoo0927c722021-12-15 16:49:32 -0500670 logger.Infow(ctx, "client-stop-request-event-sent", log.Fields{"api-endpoint": c.serverEndPoint, "client": c.clientEndpoint})
khenaidoo26721882021-08-11 17:42:52 -0400671}
672
673// SetService is used for testing only
674func (c *Client) SetService(srv interface{}) {
675 c.connectionLock.Lock()
676 defer c.connectionLock.Unlock()
677 c.service = srv
678}
679
680func (c *Client) SubscribeForLiveness(callback func(timestamp time.Time)) {
khenaidoo0927c722021-12-15 16:49:32 -0500681 c.livenessLock.Lock()
682 defer c.livenessLock.Unlock()
khenaidoo26721882021-08-11 17:42:52 -0400683 c.livenessCallback = callback
684}