[VOL-4442] grpc streaming connection monitoring
Change-Id: I8a361473a252f6d2b64578a97980b2b7b3618f55
diff --git a/internal/pkg/core/openonu.go b/internal/pkg/core/openonu.go
index 2c9ebd4..890b92d 100755
--- a/internal/pkg/core/openonu.go
+++ b/internal/pkg/core/openonu.go
@@ -29,6 +29,7 @@
vgrpc "github.com/opencord/voltha-lib-go/v7/pkg/grpc"
conf "github.com/opencord/voltha-lib-go/v7/pkg/config"
+ "github.com/opencord/voltha-protos/v5/go/adapter_service"
"github.com/opencord/voltha-protos/v5/go/common"
"github.com/opencord/voltha-protos/v5/go/health"
"github.com/opencord/voltha-protos/v5/go/olt_inter_adapter_service"
@@ -163,15 +164,14 @@
return nil
}
-/*
-//stop terminates the session
-func (oo *OpenONUAC) stop(ctx context.Context) error {
- logger.Info(ctx,"stopping-device-manager")
- oo.exitChannel <- 1
- logger.Info(ctx,"device-manager-stopped")
+//Stop terminates the session
+func (oo *OpenONUAC) Stop(ctx context.Context) error {
+ logger.Info(ctx, "stopping-device-manager")
+ close(oo.exitChannel)
+ oo.stopAllGrpcClients(ctx)
+ logger.Info(ctx, "device-manager-stopped")
return nil
}
-*/
func (oo *OpenONUAC) addDeviceHandlerToMap(ctx context.Context, agent *deviceHandler) {
oo.mutexDeviceHandlersMap.Lock()
@@ -223,14 +223,6 @@
return agent
}
-// GetHealthStatus is used as a service readiness validation as a grpc connection
-func (oo *OpenONUAC) GetHealthStatus(ctx context.Context, clientConn *common.Connection) (*health.HealthStatus, error) {
- // Update the remote reachability
- oo.updateReachabilityFromRemote(ctx, clientConn)
-
- return &health.HealthStatus{State: health.HealthStatus_HEALTHY}, nil
-}
-
// AdoptDevice creates a new device handler if not present already and then adopts the device
func (oo *OpenONUAC) AdoptDevice(ctx context.Context, device *voltha.Device) (*empty.Empty, error) {
if device == nil {
@@ -263,7 +255,7 @@
logger.Infow(ctx, "reconcile-device", log.Fields{"device-id": device.Id, "parent-id": device.ParentId})
// Check whether the grpc client in the adapter of the parent device can reach us yet
- if !oo.isReachableFromRemote(device.ProxyAddress.AdapterEndpoint, device.ParentId) {
+ if !oo.isReachableFromRemote(ctx, device.ProxyAddress.AdapterEndpoint, device.ProxyAddress.DeviceId) {
return nil, status.Errorf(codes.Unavailable, "adapter-not-reachable-from-parent-%s", device.ProxyAddress.AdapterEndpoint)
}
@@ -988,11 +980,13 @@
oo.reachableFromRemote[endpointHash] = &reachabilityFromRemote{lastKeepAlive: time.Now(), keepAliveInterval: remote.KeepAliveInterval}
}
-func (oo *OpenONUAC) isReachableFromRemote(endpoint string, contextInfo string) bool {
+func (oo *OpenONUAC) isReachableFromRemote(ctx context.Context, endpoint string, contextInfo string) bool {
+ logger.Debugw(ctx, "checking-remote-reachability", log.Fields{"endpoint": endpoint, "context": contextInfo})
oo.lockReachableFromRemote.RLock()
defer oo.lockReachableFromRemote.RUnlock()
endpointHash := getHash(endpoint, contextInfo)
if _, ok := oo.reachableFromRemote[endpointHash]; ok {
+ logger.Debugw(ctx, "endpoint-exists", log.Fields{"last-keep-alive": time.Since(oo.reachableFromRemote[endpointHash].lastKeepAlive)})
// Assume the connection is down if we did not receive 2 keep alives in succession
maxKeepAliveWait := time.Duration(oo.reachableFromRemote[endpointHash].keepAliveInterval * 2)
return time.Since(oo.reachableFromRemote[endpointHash].lastKeepAlive) <= maxKeepAliveWait
@@ -1000,6 +994,20 @@
return false
}
+//stopAllGrpcClients stops all grpc clients in use
+func (oo *OpenONUAC) stopAllGrpcClients(ctx context.Context) {
+ // Stop the clients that connect to the parent
+ oo.lockParentAdapterClients.Lock()
+ for key := range oo.parentAdapterClients {
+ oo.parentAdapterClients[key].Stop(ctx)
+ delete(oo.parentAdapterClients, key)
+ }
+ oo.lockParentAdapterClients.Unlock()
+
+ // Stop core client connection
+ oo.coreClient.Stop(ctx)
+}
+
func (oo *OpenONUAC) setupParentInterAdapterClient(ctx context.Context, endpoint string) error {
logger.Infow(ctx, "setting-parent-adapter-connection", log.Fields{"parent-endpoint": endpoint})
oo.lockParentAdapterClients.Lock()
@@ -1011,6 +1019,7 @@
childClient, err := vgrpc.NewClient(
oo.config.AdapterEndpoint,
endpoint,
+ "olt_inter_adapter_service.OltInterAdapterService",
oo.oltAdapterRestarted)
if err != nil {
@@ -1019,7 +1028,7 @@
oo.parentAdapterClients[endpoint] = childClient
- go oo.parentAdapterClients[endpoint].Start(log.WithSpanFromContext(context.TODO(), ctx), setAndTestOltInterAdapterServiceHandler)
+ go oo.parentAdapterClients[endpoint].Start(log.WithSpanFromContext(context.TODO(), ctx), getOltInterAdapterServiceClientHandler)
// Wait until we have a connection to the child adapter.
// Unlimited retries or until context expires
@@ -1074,13 +1083,12 @@
return nil
}
-// setAndTestOltInterAdapterServiceHandler is used to test whether the remote gRPC service is up
-func setAndTestOltInterAdapterServiceHandler(ctx context.Context, conn *grpc.ClientConn, clientConn *common.Connection) interface{} {
- svc := olt_inter_adapter_service.NewOltInterAdapterServiceClient(conn)
- if h, err := svc.GetHealthStatus(ctx, clientConn); err != nil || h.State != health.HealthStatus_HEALTHY {
+// getOltInterAdapterServiceClientHandler is used to setup the remote gRPC service
+func getOltInterAdapterServiceClientHandler(ctx context.Context, conn *grpc.ClientConn) interface{} {
+ if conn == nil {
return nil
}
- return svc
+ return olt_inter_adapter_service.NewOltInterAdapterServiceClient(conn)
}
func (oo *OpenONUAC) forceDeleteDeviceKvData(ctx context.Context, aDeviceID string) error {
@@ -1126,6 +1134,49 @@
return nil
}
+// GetHealthStatus is used by the voltha core to open a streaming connection with the onu adapter.
+func (oo *OpenONUAC) GetHealthStatus(stream adapter_service.AdapterService_GetHealthStatusServer) error {
+ ctx := context.Background()
+ logger.Debugw(ctx, "receive-stream-connection", log.Fields{"stream": stream})
+
+ if stream == nil {
+ return fmt.Errorf("conn-is-nil %v", stream)
+ }
+ initialRequestTime := time.Now()
+ var remoteClient *common.Connection
+ var tempClient *common.Connection
+ var err error
+loop:
+ for {
+ tempClient, err = stream.Recv()
+ if err != nil {
+ logger.Warnw(ctx, "received-stream-error", log.Fields{"remote-client": remoteClient, "error": err})
+ break loop
+ }
+ // Send a response back
+ err = stream.Send(&health.HealthStatus{State: health.HealthStatus_HEALTHY})
+ if err != nil {
+ logger.Warnw(ctx, "sending-stream-error", log.Fields{"remote-client": remoteClient, "error": err})
+ break loop
+ }
+ remoteClient = tempClient
+
+ logger.Debugw(ctx, "received-keep-alive", log.Fields{"remote-client": remoteClient})
+
+ select {
+ case <-stream.Context().Done():
+ logger.Infow(ctx, "stream-keep-alive-context-done", log.Fields{"remote-client": remoteClient, "error": stream.Context().Err()})
+ break loop
+ case <-oo.exitChannel:
+ logger.Warnw(ctx, "received-stop", log.Fields{"remote-client": remoteClient, "initial-conn-time": initialRequestTime})
+ break loop
+ default:
+ }
+ }
+ logger.Errorw(ctx, "connection-down", log.Fields{"remote-client": remoteClient, "error": err, "initial-conn-time": initialRequestTime})
+ return err
+}
+
/*
*
* Unimplemented APIs
diff --git a/internal/pkg/core/openonuInterAdapter.go b/internal/pkg/core/openonuInterAdapter.go
new file mode 100644
index 0000000..8e9bdec
--- /dev/null
+++ b/internal/pkg/core/openonuInterAdapter.go
@@ -0,0 +1,125 @@
+/*
+ * Copyright 2022-present Open Networking Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+//Package core provides the utility for onu devices, flows and statistics
+package core
+
+import (
+ "context"
+ "fmt"
+ "time"
+
+ "github.com/golang/protobuf/ptypes/empty"
+ "github.com/opencord/voltha-lib-go/v7/pkg/log"
+ "github.com/opencord/voltha-protos/v5/go/common"
+ "github.com/opencord/voltha-protos/v5/go/health"
+ ia "github.com/opencord/voltha-protos/v5/go/inter_adapter"
+ "github.com/opencord/voltha-protos/v5/go/onu_inter_adapter_service"
+)
+
+//OpenONUACInterAdapter structure holds a reference to ONU adapter
+type OpenONUACInterAdapter struct {
+ onuAdapter *OpenONUAC
+ exitChannel chan struct{}
+}
+
+//NewOpenONUACAdapter returns a new instance of OpenONUACAdapter
+func NewOpenONUACAdapter(ctx context.Context, onuAdapter *OpenONUAC) *OpenONUACInterAdapter {
+ return &OpenONUACInterAdapter{onuAdapter: onuAdapter, exitChannel: make(chan struct{})}
+}
+
+//Start starts (logs) the adapter
+func (oo *OpenONUACInterAdapter) Start(ctx context.Context) error {
+ logger.Info(ctx, "starting-openonu-inter-adapter")
+ return nil
+}
+
+//OnuIndication redirects the request the the core ONU adapter handler
+func (oo *OpenONUACInterAdapter) OnuIndication(ctx context.Context, onuInd *ia.OnuIndicationMessage) (*empty.Empty, error) {
+ return oo.onuAdapter.OnuIndication(ctx, onuInd)
+}
+
+//OmciIndication redirects the request the the core ONU adapter handler
+func (oo *OpenONUACInterAdapter) OmciIndication(ctx context.Context, msg *ia.OmciMessage) (*empty.Empty, error) {
+ return oo.onuAdapter.OmciIndication(ctx, msg)
+}
+
+//DownloadTechProfile redirects the request the the core ONU adapter handler
+func (oo *OpenONUACInterAdapter) DownloadTechProfile(ctx context.Context, tProfile *ia.TechProfileDownloadMessage) (*empty.Empty, error) {
+ return oo.onuAdapter.DownloadTechProfile(ctx, tProfile)
+}
+
+//DeleteGemPort redirects the request the the core ONU adapter handler
+func (oo *OpenONUACInterAdapter) DeleteGemPort(ctx context.Context, gPort *ia.DeleteGemPortMessage) (*empty.Empty, error) {
+ return oo.onuAdapter.DeleteGemPort(ctx, gPort)
+}
+
+//DeleteTCont redirects the request the the core ONU adapter handler
+func (oo *OpenONUACInterAdapter) DeleteTCont(ctx context.Context, tConf *ia.DeleteTcontMessage) (*empty.Empty, error) {
+ return oo.onuAdapter.DeleteTCont(ctx, tConf)
+}
+
+//Stop terminates the session
+func (oo *OpenONUACInterAdapter) Stop(ctx context.Context) error {
+ close(oo.exitChannel)
+ logger.Info(ctx, "openonu-inter-adapter-stopped")
+ return nil
+}
+
+// GetHealthStatus is used by a OnuInterAdapterService client to detect a connection
+// lost with the gRPC server hosting the OnuInterAdapterService service
+func (oo *OpenONUACInterAdapter) GetHealthStatus(stream onu_inter_adapter_service.OnuInterAdapterService_GetHealthStatusServer) error {
+ ctx := context.Background()
+ logger.Debugw(ctx, "receive-stream-connection", log.Fields{"stream": stream})
+
+ if stream == nil {
+ return fmt.Errorf("conn-is-nil %v", stream)
+ }
+ initialRequestTime := time.Now()
+ var remoteClient *common.Connection
+ var tempClient *common.Connection
+ var err error
+loop:
+ for {
+ tempClient, err = stream.Recv()
+ if err != nil {
+ logger.Warnw(ctx, "received-stream-error", log.Fields{"remote-client": remoteClient, "error": err})
+ break loop
+ }
+ // Send a response back
+ err = stream.Send(&health.HealthStatus{State: health.HealthStatus_HEALTHY})
+ if err != nil {
+ logger.Warnw(ctx, "sending-stream-error", log.Fields{"remote-client": remoteClient, "error": err})
+ break loop
+ }
+
+ remoteClient = tempClient
+ logger.Debugw(ctx, "received-keep-alive", log.Fields{"remote-client": remoteClient})
+ oo.onuAdapter.updateReachabilityFromRemote(context.Background(), remoteClient)
+
+ select {
+ case <-stream.Context().Done():
+ logger.Infow(ctx, "stream-keep-alive-context-done", log.Fields{"remote-client": remoteClient, "error": stream.Context().Err()})
+ break loop
+ case <-oo.exitChannel:
+ logger.Warnw(ctx, "received-stop", log.Fields{"remote-client": remoteClient, "initial-conn-time": initialRequestTime})
+ break loop
+ default:
+ }
+ }
+ logger.Errorw(ctx, "connection-down", log.Fields{"remote-client": remoteClient, "error": err, "initial-conn-time": initialRequestTime})
+ return err
+}