[VOL-4479] openonu panic in periodic-voltha-multi-uni-multiple-olts-test-bbsim-2.8 +
Onu adapter reconcilement may stuck on VLAN processing, especially in TT traffic scenarios

Signed-off-by: mpagenko <michael.pagenkopf@adtran.com>
Change-Id: I806f97c85e09bc2c741e40569a67ab3ce21300ab
diff --git a/VERSION b/VERSION
index 92ee6ac..25b22e0 100755
--- a/VERSION
+++ b/VERSION
@@ -1 +1 @@
-1.3.15
\ No newline at end of file
+1.3.16
diff --git a/internal/pkg/onuadaptercore/device_handler.go b/internal/pkg/onuadaptercore/device_handler.go
index c737e74..77b1dbd 100644
--- a/internal/pkg/onuadaptercore/device_handler.go
+++ b/internal/pkg/onuadaptercore/device_handler.go
@@ -3753,6 +3753,7 @@
 						logger.Errorw(ctx, "No valid OnuDevice - aborting Core DeviceStateUpdate",
 							log.Fields{"device-id": dh.deviceID})
 					} else {
+						onuDevEntry.mutexPersOnuConfig.RLock()
 						if onuDevEntry.sOnuPersistentData.PersOperState == "up" {
 							connectStatus = voltha.ConnectStatus_REACHABLE
 							if !onuDevEntry.sOnuPersistentData.PersUniDisableDone {
@@ -3767,7 +3768,7 @@
 							onuDevEntry.sOnuPersistentData.PersOperState == "" {
 							operState = voltha.OperStatus_DISCOVERED
 						}
-
+						onuDevEntry.mutexPersOnuConfig.RUnlock()
 						logger.Debugw(ctx, "Core DeviceStateUpdate", log.Fields{"connectStatus": connectStatus, "operState": operState})
 					}
 					logger.Debugw(ctx, "reconciling has been finished in time",
@@ -3783,8 +3784,12 @@
 					if onuDevEntry := dh.getOnuDeviceEntry(ctx, true); onuDevEntry == nil {
 						logger.Errorw(ctx, "No valid OnuDevice",
 							log.Fields{"device-id": dh.deviceID})
-					} else if onuDevEntry.sOnuPersistentData.PersOperState == "up" {
-						connectStatus = voltha.ConnectStatus_REACHABLE
+					} else {
+						onuDevEntry.mutexPersOnuConfig.RLock()
+						if onuDevEntry.sOnuPersistentData.PersOperState == "up" {
+							connectStatus = voltha.ConnectStatus_REACHABLE
+						}
+						onuDevEntry.mutexPersOnuConfig.RUnlock()
 					}
 
 					dh.deviceReconcileFailedUpdate(ctx, drReconcileCanceled, connectStatus)
@@ -3796,12 +3801,14 @@
 				if onuDevEntry := dh.getOnuDeviceEntry(ctx, true); onuDevEntry == nil {
 					logger.Errorw(ctx, "No valid OnuDevice",
 						log.Fields{"device-id": dh.deviceID})
-				} else if onuDevEntry.sOnuPersistentData.PersOperState == "up" {
-					connectStatus = voltha.ConnectStatus_REACHABLE
+				} else {
+					onuDevEntry.mutexPersOnuConfig.RLock()
+					if onuDevEntry.sOnuPersistentData.PersOperState == "up" {
+						connectStatus = voltha.ConnectStatus_REACHABLE
+					}
+					onuDevEntry.mutexPersOnuConfig.RUnlock()
 				}
-
 				dh.deviceReconcileFailedUpdate(ctx, drReconcileMaxTimeout, connectStatus)
-
 			}
 			dh.mutexReconcilingFlag.Lock()
 			dh.reconciling = cNoReconciling
diff --git a/internal/pkg/onuadaptercore/omci_vlan_config.go b/internal/pkg/onuadaptercore/omci_vlan_config.go
index 01a98df..daa4473 100644
--- a/internal/pkg/onuadaptercore/omci_vlan_config.go
+++ b/internal/pkg/onuadaptercore/omci_vlan_config.go
@@ -1313,9 +1313,14 @@
 	if oFsm.pDeviceHandler.isSkipOnuConfigReconciling() {
 		oFsm.configuredUniFlow = oFsm.numUniFlows
 		if oFsm.lastFlowToReconcile {
-			logger.Debugw(ctx, "reconciling - flow processing finished", log.Fields{"device-id": oFsm.deviceID})
+			logger.Debugw(ctx, "reconciling - flow processing finished", log.Fields{
+				"device-id": oFsm.deviceID, "uni-id": oFsm.pOnuUniPort.uniID})
 			oFsm.pDeviceHandler.setReconcilingFlows(false)
-			oFsm.pDeviceHandler.chReconcilingFlowsFinished <- true
+			//use asynchronous channel sending to avoid stucking on non-waiting receiver
+			select {
+			case oFsm.pDeviceHandler.chReconcilingFlowsFinished <- true:
+			default:
+			}
 		}
 		logger.Debugw(ctx, "reconciling - skip enterVlanConfigDone processing",
 			log.Fields{"numUniFlows": oFsm.numUniFlows, "configuredUniFlow": oFsm.configuredUniFlow, "device-id": oFsm.deviceID})
diff --git a/internal/pkg/onuadaptercore/onu_device_entry.go b/internal/pkg/onuadaptercore/onu_device_entry.go
index cd2799f..64929d5 100644
--- a/internal/pkg/onuadaptercore/onu_device_entry.go
+++ b/internal/pkg/onuadaptercore/onu_device_entry.go
@@ -307,8 +307,6 @@
 	//  within the FSM event procedures
 	omciMessageReceived              chan bool    //seperate channel needed by DownloadFsm
 	omciRebootMessageReceivedChannel chan Message // channel needed by Reboot request
-
-	mutexTcontMap sync.RWMutex
 }
 
 //newOnuDeviceEntry returns a new instance of a OnuDeviceEntry
@@ -947,8 +945,8 @@
 	logger.Debugw(ctx, "allocate-free-tcont", log.Fields{"device-id": oo.deviceID, "allocID": allocID,
 		"allocated-instances": oo.sOnuPersistentData.PersTcontMap})
 
-	oo.mutexTcontMap.Lock()
-	defer oo.mutexTcontMap.Unlock()
+	oo.mutexPersOnuConfig.Lock()
+	defer oo.mutexPersOnuConfig.Unlock()
 	if entityID, ok := oo.sOnuPersistentData.PersTcontMap[allocID]; ok {
 		//tcont already allocated before, return the used instance-id
 		return entityID, true, nil
@@ -972,12 +970,11 @@
 		}
 	}
 	return 0, false, fmt.Errorf(fmt.Sprintf("no-free-tcont-left-for-device-%s", oo.deviceID))
-
 }
 
 func (oo *OnuDeviceEntry) freeTcont(ctx context.Context, allocID uint16) {
 	logger.Debugw(ctx, "free-tcont", log.Fields{"device-id": oo.deviceID, "alloc": allocID})
-	oo.mutexTcontMap.Lock()
-	defer oo.mutexTcontMap.Unlock()
+	oo.mutexPersOnuConfig.Lock()
+	defer oo.mutexPersOnuConfig.Unlock()
 	delete(oo.sOnuPersistentData.PersTcontMap, allocID)
 }
diff --git a/internal/pkg/onuadaptercore/onu_metrics_manager.go b/internal/pkg/onuadaptercore/onu_metrics_manager.go
index c33fb99..ab3000c 100644
--- a/internal/pkg/onuadaptercore/onu_metrics_manager.go
+++ b/internal/pkg/onuadaptercore/onu_metrics_manager.go
@@ -2959,9 +2959,11 @@
 func (mm *onuMetricsManager) getEthernetFrameExtendedMETypeFromKvStore(ctx context.Context) (bool, error) {
 	// Check if the data is already available in KV store, if yes, do not send the request for get me.
 	var data me.ClassID
+	mm.pDeviceHandler.pOnuOmciDevice.mutexPersOnuConfig.RLock()
 	key := fmt.Sprintf("%s/%s/%s", mm.pDeviceHandler.pOnuOmciDevice.sOnuPersistentData.PersVendorID,
 		mm.pDeviceHandler.pOnuOmciDevice.sOnuPersistentData.PersEquipmentID,
 		mm.pDeviceHandler.pOnuOmciDevice.sOnuPersistentData.PersActiveSwVersion)
+	mm.pDeviceHandler.pOnuOmciDevice.mutexPersOnuConfig.RUnlock()
 	Value, err := mm.extPmKvStore.Get(ctx, key)
 	if err == nil {
 		if Value != nil {
@@ -3059,9 +3061,11 @@
 }
 
 func (mm *onuMetricsManager) putExtPmMeKvStore(ctx context.Context) {
+	mm.pDeviceHandler.pOnuOmciDevice.mutexPersOnuConfig.RLock()
 	key := fmt.Sprintf("%s/%s/%s", mm.pDeviceHandler.pOnuOmciDevice.sOnuPersistentData.PersVendorID,
 		mm.pDeviceHandler.pOnuOmciDevice.sOnuPersistentData.PersEquipmentID,
 		mm.pDeviceHandler.pOnuOmciDevice.sOnuPersistentData.PersActiveSwVersion)
+	mm.pDeviceHandler.pOnuOmciDevice.mutexPersOnuConfig.RUnlock()
 	// check if we get the supported type me for ethernet frame extended pm class id
 	if mm.supportedEthernetFrameExtendedPMClass == 0 {
 		logger.Error(ctx, "unable-to-get-any-supported-extended-pm-me-class")