[VOL-4259] R2.8 OpenOnuAdapter Flow/GemPort removal deadlock if both start concurrently

Signed-off-by: mpagenko <michael.pagenkopf@adtran.com>
Change-Id: Ic2a54b5878fc95022fc5430a0b8d4a74b3c2b048
diff --git a/VERSION b/VERSION
index 3336003..e05cb33 100755
--- a/VERSION
+++ b/VERSION
@@ -1 +1 @@
-1.3.7
+1.3.8
diff --git a/internal/pkg/onuadaptercore/omci_ani_config.go b/internal/pkg/onuadaptercore/omci_ani_config.go
index 4473c7e..3985f00 100644
--- a/internal/pkg/onuadaptercore/omci_ani_config.go
+++ b/internal/pkg/onuadaptercore/omci_ani_config.go
@@ -335,10 +335,10 @@
 			"device-id":         oFsm.deviceID})
 
 		// Access critical state with lock
-		oFsm.pUniTechProf.mutexTPState.Lock()
+		oFsm.pUniTechProf.mutexTPState.RLock()
 		oFsm.alloc0ID = oFsm.pUniTechProf.mapPonAniConfig[oFsm.uniTpKey].tcontParams.allocID
 		mapGemPortParams := oFsm.pUniTechProf.mapPonAniConfig[oFsm.uniTpKey].mapGemPortParams
-		oFsm.pUniTechProf.mutexTPState.Unlock()
+		oFsm.pUniTechProf.mutexTPState.RUnlock()
 
 		//for all TechProfile set GemIndices
 		for _, gemEntry := range mapGemPortParams {
@@ -760,7 +760,8 @@
 }
 
 func (oFsm *uniPonAniConfigFsm) enterRemovingGemIW(ctx context.Context, e *fsm.Event) {
-	oFsm.pUniTechProf.mutexTPState.Lock()
+	// no need to protect access to oFsm.waitFlowDeleteChannel, only used in synchronized state entries
+	//  or CancelProcessing() that uses separate isWaitingForFlowDelete to write to the channel
 	//flush the waitFlowDeleteChannel - possibly already/still set by some previous activity
 	select {
 	case <-oFsm.waitFlowDeleteChannel:
@@ -769,8 +770,8 @@
 	}
 
 	if oFsm.pDeviceHandler.UniVlanConfigFsmMap[oFsm.pOnuUniPort.uniID] != nil {
+		// ensure mutexTPState not locked before calling some VlanConfigFsm activity (that might already be pending on it)
 		if oFsm.pDeviceHandler.UniVlanConfigFsmMap[oFsm.pOnuUniPort.uniID].IsFlowRemovePending(oFsm.waitFlowDeleteChannel) {
-			oFsm.pUniTechProf.mutexTPState.Unlock()
 			logger.Debugw(ctx, "flow remove pending - wait before processing gem port delete",
 				log.Fields{"device-id": oFsm.deviceID, "uni-id": oFsm.pOnuUniPort.uniID, "techProfile-id": oFsm.techProfileID})
 			// if flow remove is pending then wait for flow remove to finish first before proceeding with gem port delete
@@ -792,9 +793,10 @@
 			log.Fields{"device-id": oFsm.deviceID, "techProfile-id": oFsm.techProfileID})
 	}
 
+	oFsm.pUniTechProf.mutexTPState.RLock()
 	// get the related GemPort entity Id from pUniTechProf, OMCI Gem* entityID is set to be equal to GemPortId!
 	loGemPortID := (*(oFsm.pUniTechProf.mapRemoveGemEntry[oFsm.uniTpKey])).gemPortID
-	oFsm.pUniTechProf.mutexTPState.Unlock()
+	oFsm.pUniTechProf.mutexTPState.RUnlock()
 	logger.Debugw(ctx, "uniPonAniConfigFsm - start removing one GemIwTP", log.Fields{
 		"device-id": oFsm.deviceID, "uni-id": oFsm.pOnuUniPort.uniID,
 		"GemIwTp-entity-id": loGemPortID})
@@ -894,9 +896,9 @@
 }
 
 func (oFsm *uniPonAniConfigFsm) enterRemovingGemNCTP(ctx context.Context, e *fsm.Event) {
-	oFsm.pUniTechProf.mutexTPState.Lock()
+	oFsm.pUniTechProf.mutexTPState.RLock()
 	loGemPortID := (*(oFsm.pUniTechProf.mapRemoveGemEntry[oFsm.uniTpKey])).gemPortID
-	oFsm.pUniTechProf.mutexTPState.Unlock()
+	oFsm.pUniTechProf.mutexTPState.RUnlock()
 	logger.Debugw(ctx, "uniPonAniConfigFsm - start removing one GemNCTP", log.Fields{
 		"device-id": oFsm.deviceID, "uni-id": oFsm.pOnuUniPort.uniID,
 		"GemNCTP-entity-id": loGemPortID})
@@ -928,9 +930,9 @@
 	}
 }
 func (oFsm *uniPonAniConfigFsm) enterRemovingTD(ctx context.Context, e *fsm.Event) {
-	oFsm.pUniTechProf.mutexTPState.Lock()
+	oFsm.pUniTechProf.mutexTPState.RLock()
 	loGemPortID := (*(oFsm.pUniTechProf.mapRemoveGemEntry[oFsm.uniTpKey])).gemPortID
-	oFsm.pUniTechProf.mutexTPState.Unlock()
+	oFsm.pUniTechProf.mutexTPState.RUnlock()
 	logger.Debugw(ctx, "uniPonAniConfigFsm - start removing Traffic Descriptor", log.Fields{
 		"device-id": oFsm.deviceID, "uni-id": oFsm.pOnuUniPort.uniID,
 		"TD-entity-id": loGemPortID})
diff --git a/internal/pkg/onuadaptercore/omci_vlan_config.go b/internal/pkg/onuadaptercore/omci_vlan_config.go
index 56deddf..26ecac0 100644
--- a/internal/pkg/onuadaptercore/omci_vlan_config.go
+++ b/internal/pkg/onuadaptercore/omci_vlan_config.go
@@ -883,11 +883,14 @@
 		//at this point it is evident that no flow anymore will refer to a still possibly active Techprofile
 		//request that this profile gets deleted before a new flow add is allowed (except for some aborted add)
 		if !cancelPendingConfig {
+			// ensure mutexFlowParams not locked before calling some TPProcessing activity (that might already be pending on it)
+			oFsm.mutexFlowParams.Unlock()
 			logger.Debugw(ctx, "UniVlanConfigFsm flow removal requested - set TechProfile to-delete", log.Fields{
 				"device-id": oFsm.deviceID})
 			if oFsm.pUniTechProf != nil {
 				oFsm.pUniTechProf.setProfileToDelete(oFsm.pOnuUniPort.uniID, usedTpID, true)
 			}
+			oFsm.mutexFlowParams.Lock()
 		}
 	} else {
 		if !cancelPendingConfig {
@@ -1030,10 +1033,13 @@
 	} else {
 		logger.Debugw(ctx, "UniVlanConfigFsm tp-id used in deleted flow is not used anymore - set TechProfile to-delete", log.Fields{
 			"device-id": oFsm.deviceID, "tp-id": usedTpID})
+		// ensure mutexFlowParams not locked before calling some TPProcessing activity (that might already be pending on it)
+		oFsm.mutexFlowParams.Unlock()
 		if oFsm.pUniTechProf != nil {
 			//request that this profile gets deleted before a new flow add is allowed
 			oFsm.pUniTechProf.setProfileToDelete(oFsm.pOnuUniPort.uniID, usedTpID, true)
 		}
+		oFsm.mutexFlowParams.Lock()
 	}
 }
 
@@ -1201,10 +1207,12 @@
 			oFsm.mutexFlowParams.RLock()
 			tpID := oFsm.actualUniVlanConfigRule.TpID
 			vlanID := oFsm.actualUniVlanConfigRule.SetVid
+			configuredUniFlows := oFsm.configuredUniFlow
+			// ensure mutexFlowParams not locked before calling some TPProcessing activity (that might already be pending on it)
+			oFsm.mutexFlowParams.RUnlock()
 			for _, gemPort := range oFsm.pUniTechProf.getMulticastGemPorts(ctx, oFsm.pOnuUniPort.uniID, uint8(tpID)) {
 				logger.Infow(ctx, "Setting multicast MEs, with first flow", log.Fields{"deviceID": oFsm.deviceID,
-					"techProfile": tpID, "gemPort": gemPort, "vlanID": vlanID, "configuredUniFlow": oFsm.configuredUniFlow})
-				oFsm.mutexFlowParams.RUnlock()
+					"techProfile": tpID, "gemPort": gemPort, "vlanID": vlanID, "configuredUniFlow": configuredUniFlows})
 				errCreateAllMulticastME := oFsm.performSettingMulticastME(ctx, tpID, gemPort,
 					vlanID)
 				if errCreateAllMulticastME != nil {
@@ -1212,9 +1220,7 @@
 						log.Fields{"device-id": oFsm.deviceID})
 					_ = oFsm.pAdaptFsm.pFsm.Event(vlanEvReset)
 				}
-				oFsm.mutexFlowParams.RLock()
 			}
-			oFsm.mutexFlowParams.RUnlock()
 			//If this first flow contains a meter, then create TD for related gems.
 			if oFsm.actualUniVlanConfigMeter != nil {
 				logger.Debugw(ctx, "Creating Traffic Descriptor", log.Fields{"device-id": oFsm.deviceID, "meter": oFsm.actualUniVlanConfigMeter})
@@ -1481,6 +1487,7 @@
 		oFsm.mutexFlowParams.RLock()
 		tpID := oFsm.actualUniVlanConfigRule.TpID
 		configuredUniFlow := oFsm.configuredUniFlow
+		// ensure mutexFlowParams not locked before calling some TPProcessing activity (that might already be pending on it)
 		oFsm.mutexFlowParams.RUnlock()
 		errEvto := oFsm.performConfigEvtocdEntries(ctx, configuredUniFlow)
 		//This is correct passing scenario