[VOL-4292] OpenOLT Adapter changes for gRPC migration
Change-Id: I5af2125f2c2f53ffc78c474a94314bba408f8bae
diff --git a/vendor/github.com/opencord/voltha-lib-go/v7/pkg/grpc/client.go b/vendor/github.com/opencord/voltha-lib-go/v7/pkg/grpc/client.go
new file mode 100644
index 0000000..de649d6
--- /dev/null
+++ b/vendor/github.com/opencord/voltha-lib-go/v7/pkg/grpc/client.go
@@ -0,0 +1,541 @@
+/*
+ * Copyright 2021-present Open Networking Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package grpc
+
+import (
+ "context"
+ "fmt"
+ "reflect"
+ "strings"
+ "sync"
+ "time"
+
+ grpc_middleware "github.com/grpc-ecosystem/go-grpc-middleware"
+ grpc_opentracing "github.com/grpc-ecosystem/go-grpc-middleware/tracing/opentracing"
+ "github.com/opencord/voltha-lib-go/v7/pkg/log"
+ "github.com/opencord/voltha-lib-go/v7/pkg/probe"
+ "github.com/opencord/voltha-protos/v5/go/adapter_services"
+ "github.com/opencord/voltha-protos/v5/go/core"
+ "google.golang.org/grpc"
+ "google.golang.org/grpc/keepalive"
+)
+
+type event byte
+type state byte
+type SetAndTestServiceHandler func(context.Context, *grpc.ClientConn) interface{}
+type RestartedHandler func(ctx context.Context, endPoint string) error
+
+type contextKey string
+
+func (c contextKey) String() string {
+ return string(c)
+}
+
+var (
+ grpcMonitorContextKey = contextKey("grpc-monitor")
+)
+
+const (
+ grpcBackoffInitialInterval = "GRPC_BACKOFF_INITIAL_INTERVAL"
+ grpcBackoffMaxInterval = "GRPC_BACKOFF_MAX_INTERVAL"
+ grpcBackoffMaxElapsedTime = "GRPC_BACKOFF_MAX_ELAPSED_TIME"
+ grpcMonitorInterval = "GRPC_MONITOR_INTERVAL"
+)
+
+const (
+ DefaultBackoffInitialInterval = 100 * time.Millisecond
+ DefaultBackoffMaxInterval = 5 * time.Second
+ DefaultBackoffMaxElapsedTime = 0 * time.Second // No time limit
+ DefaultGRPCMonitorInterval = 5 * time.Second
+)
+
+const (
+ connectionErrorSubString = "SubConns are in TransientFailure"
+ connectionClosedSubstring = "client connection is closing"
+ connectionError = "connection error"
+ connectionSystemNotReady = "system is not ready"
+)
+
+const (
+ eventConnecting = event(iota)
+ eventConnected
+ eventDisconnected
+ eventStopped
+ eventError
+
+ stateConnected = state(iota)
+ stateConnecting
+ stateDisconnected
+)
+
+type Client struct {
+ apiEndPoint string
+ connection *grpc.ClientConn
+ connectionLock sync.RWMutex
+ stateLock sync.RWMutex
+ state state
+ service interface{}
+ events chan event
+ onRestart RestartedHandler
+ backoffInitialInterval time.Duration
+ backoffMaxInterval time.Duration
+ backoffMaxElapsedTime time.Duration
+ activityCheck bool
+ monitorInterval time.Duration
+ activeCh chan struct{}
+ activeChMutex sync.RWMutex
+ done bool
+ livenessCallback func(timestamp time.Time)
+}
+
+type ClientOption func(*Client)
+
+func ActivityCheck(enable bool) ClientOption {
+ return func(args *Client) {
+ args.activityCheck = enable
+ }
+}
+
+func NewClient(endpoint string, onRestart RestartedHandler, opts ...ClientOption) (*Client, error) {
+ c := &Client{
+ apiEndPoint: endpoint,
+ onRestart: onRestart,
+ events: make(chan event, 1),
+ state: stateDisconnected,
+ backoffInitialInterval: DefaultBackoffInitialInterval,
+ backoffMaxInterval: DefaultBackoffMaxInterval,
+ backoffMaxElapsedTime: DefaultBackoffMaxElapsedTime,
+ monitorInterval: DefaultGRPCMonitorInterval,
+ }
+ for _, option := range opts {
+ option(c)
+ }
+
+ // Check for environment variables
+ if err := SetFromEnvVariable(grpcBackoffInitialInterval, &c.backoffInitialInterval); err != nil {
+ logger.Warnw(context.Background(), "failure-reading-env-variable", log.Fields{"error": err, "variable": grpcBackoffInitialInterval})
+ }
+
+ if err := SetFromEnvVariable(grpcBackoffMaxInterval, &c.backoffMaxInterval); err != nil {
+ logger.Warnw(context.Background(), "failure-reading-env-variable", log.Fields{"error": err, "variable": grpcBackoffMaxInterval})
+ }
+
+ if err := SetFromEnvVariable(grpcBackoffMaxElapsedTime, &c.backoffMaxElapsedTime); err != nil {
+ logger.Warnw(context.Background(), "failure-reading-env-variable", log.Fields{"error": err, "variable": grpcBackoffMaxElapsedTime})
+ }
+
+ if err := SetFromEnvVariable(grpcMonitorInterval, &c.monitorInterval); err != nil {
+ logger.Warnw(context.Background(), "failure-reading-env-variable", log.Fields{"error": err, "variable": grpcMonitorInterval})
+ }
+
+ logger.Infow(context.Background(), "initialized-client", log.Fields{"client": c})
+
+ // Sanity check
+ if c.backoffInitialInterval > c.backoffMaxInterval {
+ return nil, fmt.Errorf("initial retry delay %v is greater than maximum retry delay %v", c.backoffInitialInterval, c.backoffMaxInterval)
+ }
+
+ return c, nil
+}
+
+func (c *Client) GetClient() (interface{}, error) {
+ c.connectionLock.RLock()
+ defer c.connectionLock.RUnlock()
+ if c.service == nil {
+ return nil, fmt.Errorf("no connection to %s", c.apiEndPoint)
+ }
+ return c.service, nil
+}
+
+// GetCoreServiceClient is a helper function that returns a concrete service instead of the GetClient() API
+// which returns an interface
+func (c *Client) GetCoreServiceClient() (core.CoreServiceClient, error) {
+ c.connectionLock.RLock()
+ defer c.connectionLock.RUnlock()
+ if c.service == nil {
+ return nil, fmt.Errorf("no core connection to %s", c.apiEndPoint)
+ }
+ client, ok := c.service.(core.CoreServiceClient)
+ if ok {
+ return client, nil
+ }
+ return nil, fmt.Errorf("invalid-service-%s", reflect.TypeOf(c.service))
+}
+
+// GetOnuAdapterServiceClient is a helper function that returns a concrete service instead of the GetClient() API
+// which returns an interface
+func (c *Client) GetOnuInterAdapterServiceClient() (adapter_services.OnuInterAdapterServiceClient, error) {
+ c.connectionLock.RLock()
+ defer c.connectionLock.RUnlock()
+ if c.service == nil {
+ return nil, fmt.Errorf("no child adapter connection to %s", c.apiEndPoint)
+ }
+ client, ok := c.service.(adapter_services.OnuInterAdapterServiceClient)
+ if ok {
+ return client, nil
+ }
+ return nil, fmt.Errorf("invalid-service-%s", reflect.TypeOf(c.service))
+}
+
+// GetOltAdapterServiceClient is a helper function that returns a concrete service instead of the GetClient() API
+// which returns an interface
+func (c *Client) GetOltInterAdapterServiceClient() (adapter_services.OltInterAdapterServiceClient, error) {
+ c.connectionLock.RLock()
+ defer c.connectionLock.RUnlock()
+ if c.service == nil {
+ return nil, fmt.Errorf("no parent adapter connection to %s", c.apiEndPoint)
+ }
+ client, ok := c.service.(adapter_services.OltInterAdapterServiceClient)
+ if ok {
+ return client, nil
+ }
+ return nil, fmt.Errorf("invalid-service-%s", reflect.TypeOf(c.service))
+}
+
+func (c *Client) Reset(ctx context.Context) {
+ logger.Debugw(ctx, "resetting-client-connection", log.Fields{"endpoint": c.apiEndPoint})
+ c.stateLock.Lock()
+ defer c.stateLock.Unlock()
+ if c.state == stateConnected {
+ c.state = stateDisconnected
+ c.events <- eventDisconnected
+ }
+}
+
+func (c *Client) clientInterceptor(ctx context.Context, method string, req interface{}, reply interface{},
+ cc *grpc.ClientConn, invoker grpc.UnaryInvoker, opts ...grpc.CallOption) error {
+ // Nothing to do before intercepting the call
+ err := invoker(ctx, method, req, reply, cc, opts...)
+ // On connection failure, start the reconnect process depending on the error response
+ if err != nil {
+ logger.Errorw(ctx, "received-error", log.Fields{"error": err, "context": ctx, "endpoint": c.apiEndPoint})
+ if strings.Contains(err.Error(), connectionErrorSubString) ||
+ strings.Contains(err.Error(), connectionError) ||
+ strings.Contains(err.Error(), connectionSystemNotReady) ||
+ isGrpcMonitorKeyPresentInContext(ctx) {
+ c.stateLock.Lock()
+ if c.state == stateConnected {
+ logger.Warnw(context.Background(), "sending-disconnect-event", log.Fields{"endpoint": c.apiEndPoint, "error": err})
+ c.state = stateDisconnected
+ c.events <- eventDisconnected
+ }
+ c.stateLock.Unlock()
+ } else if strings.Contains(err.Error(), connectionClosedSubstring) {
+ logger.Errorw(context.Background(), "invalid-client-connection-closed", log.Fields{"endpoint": c.apiEndPoint, "error": err})
+ }
+ return err
+ }
+ // Update activity on success only
+ c.updateActivity(ctx)
+ return nil
+}
+
+// updateActivity pushes an activity indication on the channel so that the monitoring routine does not validate
+// the gRPC connection when the connection is being used. Note that this update is done both when the connection
+// is alive or a connection error is returned. A separate routine takes care of doing the re-connect.
+func (c *Client) updateActivity(ctx context.Context) {
+ c.activeChMutex.RLock()
+ defer c.activeChMutex.RUnlock()
+ if c.activeCh != nil {
+ logger.Debugw(ctx, "update-activity", log.Fields{"api-endpoint": c.apiEndPoint})
+ c.activeCh <- struct{}{}
+
+ // Update liveness only in connected state
+ if c.livenessCallback != nil {
+ c.stateLock.RLock()
+ if c.state == stateConnected {
+ c.livenessCallback(time.Now())
+ }
+ c.stateLock.RUnlock()
+ }
+ }
+}
+
+func WithGrpcMonitorContext(ctx context.Context, name string) context.Context {
+ ctx = context.WithValue(ctx, grpcMonitorContextKey, name)
+ return ctx
+}
+
+func isGrpcMonitorKeyPresentInContext(ctx context.Context) bool {
+ if ctx != nil {
+ _, present := ctx.Value(grpcMonitorContextKey).(string)
+ return present
+ }
+ return false
+}
+
+// monitorActivity monitors the activity on the gRPC connection. If there are no activity after a specified
+// timeout, it will send a default API request on that connection. If the connection is good then nothing
+// happens. If it's bad this will trigger reconnection attempts.
+func (c *Client) monitorActivity(ctx context.Context, handler SetAndTestServiceHandler) {
+ logger.Infow(ctx, "start-activity-monitor", log.Fields{"endpoint": c.apiEndPoint})
+
+ // Create an activity monitor channel. Unbuffered channel works well. However, we use a buffered
+ // channel here as a safeguard of having the grpc interceptor publishing too many events that can be
+ // consumed by this monitoring thread
+ c.activeChMutex.Lock()
+ c.activeCh = make(chan struct{}, 10)
+ c.activeChMutex.Unlock()
+
+ // Interval to wait for no activity before probing the connection
+ timeout := c.monitorInterval
+loop:
+ for {
+ timeoutTimer := time.NewTimer(timeout)
+ select {
+
+ case <-c.activeCh:
+ logger.Debugw(ctx, "received-active-notification", log.Fields{"endpoint": c.apiEndPoint})
+
+ // Reset timer
+ if !timeoutTimer.Stop() {
+ <-timeoutTimer.C
+ }
+
+ case <-ctx.Done():
+ break loop
+
+ case <-timeoutTimer.C:
+ // Trigger an activity check if the state is connected. If the state is not connected then there is already
+ // a backoff retry mechanism in place to retry establishing connection.
+ c.stateLock.RLock()
+ runCheck := c.state == stateConnected
+ c.stateLock.RUnlock()
+ if runCheck {
+ go func() {
+ logger.Debugw(ctx, "connection-check-start", log.Fields{"api-endpoint": c.apiEndPoint})
+ subCtx, cancel := context.WithTimeout(ctx, c.backoffMaxInterval)
+ defer cancel()
+ subCtx = WithGrpcMonitorContext(subCtx, "grpc-monitor")
+ c.connectionLock.RLock()
+ defer c.connectionLock.RUnlock()
+ if c.connection != nil {
+ response := handler(subCtx, c.connection)
+ logger.Debugw(ctx, "connection-check-response", log.Fields{"api-endpoint": c.apiEndPoint, "up": response != nil})
+ }
+ }()
+ }
+ }
+ }
+ logger.Infow(ctx, "activity-monitor-stopping", log.Fields{"endpoint": c.apiEndPoint})
+}
+
+// Start kicks off the adapter agent by trying to connect to the adapter
+func (c *Client) Start(ctx context.Context, handler SetAndTestServiceHandler) {
+ logger.Debugw(ctx, "Starting GRPC - Client", log.Fields{"api-endpoint": c.apiEndPoint})
+
+ // If the context contains a k8s probe then register services
+ p := probe.GetProbeFromContext(ctx)
+ if p != nil {
+ p.RegisterService(ctx, c.apiEndPoint)
+ }
+
+ // Enable activity check, if required
+ if c.activityCheck {
+ go c.monitorActivity(ctx, handler)
+ }
+
+ initialConnection := true
+ c.events <- eventConnecting
+ backoff := NewBackoff(c.backoffInitialInterval, c.backoffMaxInterval, c.backoffMaxElapsedTime)
+ attempt := 1
+loop:
+ for {
+ select {
+ case <-ctx.Done():
+ logger.Debugw(ctx, "context-closing", log.Fields{"endpoint": c.apiEndPoint})
+ return
+ case event := <-c.events:
+ logger.Debugw(ctx, "received-event", log.Fields{"event": event, "endpoint": c.apiEndPoint})
+ switch event {
+ case eventConnecting:
+ logger.Debugw(ctx, "connection-start", log.Fields{"endpoint": c.apiEndPoint, "attempts": attempt})
+
+ c.stateLock.Lock()
+ if c.state == stateConnected {
+ c.state = stateDisconnected
+ }
+ if c.state != stateConnecting {
+ c.state = stateConnecting
+ go func() {
+ if err := c.connectToEndpoint(ctx, handler, p); err != nil {
+ c.stateLock.Lock()
+ c.state = stateDisconnected
+ c.stateLock.Unlock()
+ logger.Errorw(ctx, "connection-failed", log.Fields{"endpoint": c.apiEndPoint, "attempt": attempt, "error": err})
+
+ // Retry connection after a delay
+ if err = backoff.Backoff(ctx); err != nil {
+ // Context has closed or reached maximum elapsed time, if set
+ logger.Errorw(ctx, "retry-aborted", log.Fields{"endpoint": c.apiEndPoint, "error": err})
+ return
+ }
+ attempt += 1
+ c.events <- eventConnecting
+ } else {
+ backoff.Reset()
+ }
+ }()
+ }
+ c.stateLock.Unlock()
+
+ case eventConnected:
+ logger.Debugw(ctx, "endpoint-connected", log.Fields{"endpoint": c.apiEndPoint})
+ attempt = 1
+ c.stateLock.Lock()
+ if c.state != stateConnected {
+ c.state = stateConnected
+ if initialConnection {
+ logger.Debugw(ctx, "initial-endpoint-connection", log.Fields{"endpoint": c.apiEndPoint})
+ initialConnection = false
+ } else {
+ logger.Debugw(ctx, "endpoint-reconnection", log.Fields{"endpoint": c.apiEndPoint})
+ // Trigger any callback on a restart
+ go func() {
+ err := c.onRestart(log.WithSpanFromContext(context.Background(), ctx), c.apiEndPoint)
+ if err != nil {
+ logger.Errorw(ctx, "unable-to-restart-endpoint", log.Fields{"error": err, "endpoint": c.apiEndPoint})
+ }
+ }()
+ }
+ }
+ c.stateLock.Unlock()
+
+ case eventDisconnected:
+ if p != nil {
+ p.UpdateStatus(ctx, c.apiEndPoint, probe.ServiceStatusNotReady)
+ }
+ logger.Debugw(ctx, "endpoint-disconnected", log.Fields{"endpoint": c.apiEndPoint, "status": c.state})
+
+ // Try to connect again
+ c.events <- eventConnecting
+
+ case eventStopped:
+ logger.Debugw(ctx, "endPoint-stopped", log.Fields{"adapter": c.apiEndPoint})
+ go func() {
+ if err := c.closeConnection(ctx, p); err != nil {
+ logger.Errorw(ctx, "endpoint-closing-connection-failed", log.Fields{"endpoint": c.apiEndPoint, "error": err})
+ }
+ }()
+ break loop
+ case eventError:
+ logger.Errorw(ctx, "endpoint-error-event", log.Fields{"endpoint": c.apiEndPoint})
+ default:
+ logger.Errorw(ctx, "endpoint-unknown-event", log.Fields{"endpoint": c.apiEndPoint, "error": event})
+ }
+ }
+ }
+ logger.Infow(ctx, "endpoint-stopped", log.Fields{"endpoint": c.apiEndPoint})
+}
+
+func (c *Client) connectToEndpoint(ctx context.Context, handler SetAndTestServiceHandler, p *probe.Probe) error {
+ if p != nil {
+ p.UpdateStatus(ctx, c.apiEndPoint, probe.ServiceStatusPreparing)
+ }
+
+ c.connectionLock.Lock()
+ defer c.connectionLock.Unlock()
+
+ if c.connection != nil {
+ _ = c.connection.Close()
+ c.connection = nil
+ }
+
+ c.service = nil
+
+ // Use Interceptors to:
+ // 1. automatically inject
+ // 2. publish Open Tracing Spans by this GRPC Client
+ // 3. detect connection failure on client calls such that the reconnection process can begin
+ conn, err := grpc.Dial(c.apiEndPoint,
+ grpc.WithInsecure(),
+ grpc.WithStreamInterceptor(grpc_middleware.ChainStreamClient(
+ grpc_opentracing.StreamClientInterceptor(grpc_opentracing.WithTracer(log.ActiveTracerProxy{})),
+ )),
+ grpc.WithUnaryInterceptor(grpc_middleware.ChainUnaryClient(
+ grpc_opentracing.UnaryClientInterceptor(grpc_opentracing.WithTracer(log.ActiveTracerProxy{})),
+ )),
+ grpc.WithUnaryInterceptor(c.clientInterceptor),
+ // Set keealive parameter - use default grpc values
+ grpc.WithKeepaliveParams(keepalive.ClientParameters{
+ Time: c.monitorInterval,
+ Timeout: c.backoffMaxInterval,
+ PermitWithoutStream: true,
+ }),
+ )
+
+ if err == nil {
+ subCtx, cancel := context.WithTimeout(ctx, c.backoffMaxInterval)
+ defer cancel()
+ svc := handler(subCtx, conn)
+ if svc != nil {
+ c.connection = conn
+ c.service = svc
+ if p != nil {
+ p.UpdateStatus(ctx, c.apiEndPoint, probe.ServiceStatusRunning)
+ }
+ logger.Infow(ctx, "connected-to-endpoint", log.Fields{"endpoint": c.apiEndPoint})
+ c.events <- eventConnected
+ return nil
+ }
+ }
+ logger.Warnw(ctx, "Failed to connect to endpoint",
+ log.Fields{
+ "endpoint": c.apiEndPoint,
+ "error": err,
+ })
+
+ if p != nil {
+ p.UpdateStatus(ctx, c.apiEndPoint, probe.ServiceStatusFailed)
+ }
+ return fmt.Errorf("no connection to endpoint %s", c.apiEndPoint)
+}
+
+func (c *Client) closeConnection(ctx context.Context, p *probe.Probe) error {
+ if p != nil {
+ p.UpdateStatus(ctx, c.apiEndPoint, probe.ServiceStatusStopped)
+ }
+
+ c.connectionLock.Lock()
+ defer c.connectionLock.Unlock()
+
+ if c.connection != nil {
+ err := c.connection.Close()
+ c.connection = nil
+ return err
+ }
+
+ return nil
+}
+
+func (c *Client) Stop(ctx context.Context) {
+ if !c.done {
+ c.events <- eventStopped
+ close(c.events)
+ c.done = true
+ }
+}
+
+// SetService is used for testing only
+func (c *Client) SetService(srv interface{}) {
+ c.connectionLock.Lock()
+ defer c.connectionLock.Unlock()
+ c.service = srv
+}
+
+func (c *Client) SubscribeForLiveness(callback func(timestamp time.Time)) {
+ c.livenessCallback = callback
+}