[VOL-4024] openonuAdapterGo - soft reboot fails in multi-ONU tests (unexpected states)

Signed-off-by: mpagenko <michael.pagenkopf@adtran.com>
Change-Id: I3a09e16d4611468d0c1df2b620ececfd9b48393f
diff --git a/VERSION b/VERSION
index 1f6cc1f..ab41631 100755
--- a/VERSION
+++ b/VERSION
@@ -1 +1 @@
-1.3.0-dev194
+1.3.0-dev195
diff --git a/internal/pkg/onuadaptercore/omci_ani_config.go b/internal/pkg/onuadaptercore/omci_ani_config.go
index 423db39..87d5207 100644
--- a/internal/pkg/onuadaptercore/omci_ani_config.go
+++ b/internal/pkg/onuadaptercore/omci_ani_config.go
@@ -114,6 +114,7 @@
 	uniTpKey                 uniTP
 	requestEvent             OnuDeviceEvent
 	mutexIsAwaitingResponse  sync.RWMutex
+	isCanceled               bool
 	isAwaitingResponse       bool
 	omciMIdsResponseReceived chan bool //separate channel needed for checking multiInstance OMCI message responses
 	pAdaptFsm                *AdapterFsm
@@ -248,16 +249,27 @@
 	//early indication about started reset processing
 	oFsm.pUniTechProf.setProfileResetting(ctx, oFsm.pOnuUniPort.uniID, oFsm.techProfileID, true)
 	//mutex protection is required for possible concurrent access to FSM members
-	oFsm.mutexIsAwaitingResponse.RLock()
-	defer oFsm.mutexIsAwaitingResponse.RUnlock()
+	oFsm.mutexIsAwaitingResponse.Lock()
+	oFsm.isCanceled = true
 	if oFsm.isAwaitingResponse {
+		//attention: for an unbuffered channel the sender is blocked until the value is received (processed)!
+		// accordingly the mutex must be released before sending to channel here (mutex acquired in receiver)
+		oFsm.mutexIsAwaitingResponse.Unlock()
 		//use channel to indicate that the response waiting shall be aborted
 		oFsm.omciMIdsResponseReceived <- false
+	} else {
+		oFsm.mutexIsAwaitingResponse.Unlock()
 	}
+
+	oFsm.mutexIsAwaitingResponse.Lock()
 	if oFsm.isWaitingForFlowDelete {
+		oFsm.mutexIsAwaitingResponse.Unlock()
 		//use channel to indicate that the response waiting shall be aborted
 		oFsm.waitFlowDeleteChannel <- false
+	} else {
+		oFsm.mutexIsAwaitingResponse.Unlock()
 	}
+
 	// in any case (even if it might be automatically requested by above cancellation of waiting) ensure resetting the FSM
 	pAdaptFsm := oFsm.pAdaptFsm
 	if pAdaptFsm != nil {
@@ -466,6 +478,10 @@
 	}
 	//ensure internal slices are empty (which might be set from previous run) - release memory
 	oFsm.gemPortAttribsSlice = nil
+	oFsm.mutexIsAwaitingResponse.Lock()
+	//reset the canceled state possibly existing from previous reset
+	oFsm.isCanceled = false
+	oFsm.mutexIsAwaitingResponse.Unlock()
 
 	// start go routine for processing of ANI config messages
 	go oFsm.processOmciAniMessages(ctx)
@@ -1391,6 +1407,12 @@
 
 func (oFsm *uniPonAniConfigFsm) waitforOmciResponse(ctx context.Context) error {
 	oFsm.mutexIsAwaitingResponse.Lock()
+	if oFsm.isCanceled {
+		// FSM already canceled before entering wait
+		logger.Debugw(ctx, "uniPonAniConfigFsm wait-for-multi-entity-response aborted (on enter)", log.Fields{"for device-id": oFsm.deviceID})
+		oFsm.mutexIsAwaitingResponse.Unlock()
+		return fmt.Errorf(cErrWaitAborted)
+	}
 	oFsm.isAwaitingResponse = true
 	oFsm.mutexIsAwaitingResponse.Unlock()
 	select {
@@ -1405,14 +1427,14 @@
 		return fmt.Errorf("uniPonAniConfigFsm multi entity timeout %s", oFsm.deviceID)
 	case success := <-oFsm.omciMIdsResponseReceived:
 		if success {
-			logger.Debug(ctx, "uniPonAniConfigFsm multi entity response received")
+			logger.Debugw(ctx, "uniPonAniConfigFsm multi entity response received", log.Fields{"for device-id": oFsm.deviceID})
 			oFsm.mutexIsAwaitingResponse.Lock()
 			oFsm.isAwaitingResponse = false
 			oFsm.mutexIsAwaitingResponse.Unlock()
 			return nil
 		}
 		// waiting was aborted (probably on external request)
-		logger.Debugw(ctx, "uniPonAniConfigFsm wait for multi entity response aborted", log.Fields{"for device-id": oFsm.deviceID})
+		logger.Debugw(ctx, "uniPonAniConfigFsm wait-for-multi-entity-response aborted", log.Fields{"for device-id": oFsm.deviceID})
 		oFsm.mutexIsAwaitingResponse.Lock()
 		oFsm.isAwaitingResponse = false
 		oFsm.mutexIsAwaitingResponse.Unlock()
diff --git a/internal/pkg/onuadaptercore/omci_vlan_config.go b/internal/pkg/onuadaptercore/omci_vlan_config.go
index 89433d4..cb010c9 100644
--- a/internal/pkg/onuadaptercore/omci_vlan_config.go
+++ b/internal/pkg/onuadaptercore/omci_vlan_config.go
@@ -158,6 +158,7 @@
 	pAdaptFsm                   *AdapterFsm
 	acceptIncrementalEvtoOption bool
 	clearPersistency            bool
+	isCanceled                  bool
 	isAwaitingResponse          bool
 	mutexIsAwaitingResponse     sync.RWMutex
 	mutexFlowParams             sync.RWMutex
@@ -335,17 +336,23 @@
 //CancelProcessing ensures that suspended processing at waiting on some response is aborted and reset of FSM
 func (oFsm *UniVlanConfigFsm) CancelProcessing(ctx context.Context) {
 	//mutex protection is required for possible concurrent access to FSM members
-	oFsm.mutexIsAwaitingResponse.RLock()
-	defer oFsm.mutexIsAwaitingResponse.RUnlock()
+	oFsm.mutexIsAwaitingResponse.Lock()
+	oFsm.isCanceled = true
 	if oFsm.isAwaitingResponse {
+		//attention: for an unbuffered channel the sender is blocked until the value is received (processed)!
+		// accordingly the mutex must be released before sending to channel here (mutex acquired in receiver)
+		oFsm.mutexIsAwaitingResponse.Unlock()
 		//use channel to indicate that the response waiting shall be aborted
 		oFsm.omciMIdsResponseReceived <- false
+	} else {
+		oFsm.mutexIsAwaitingResponse.Unlock()
 	}
+
 	// in any case (even if it might be automatically requested by above cancellation of waiting) ensure resetting the FSM
 	pAdaptFsm := oFsm.pAdaptFsm
 	if pAdaptFsm != nil {
 		if fsmErr := pAdaptFsm.pFsm.Event(vlanEvReset); fsmErr != nil {
-			logger.Errorw(ctx, "error in FsmEvent handling UniVlanConfigFsm!",
+			logger.Errorw(ctx, "reset-event failed in UniVlanConfigFsm!",
 				log.Fields{"fsmState": oFsm.pAdaptFsm.pFsm.Current(), "error": fsmErr, "device-id": oFsm.deviceID})
 		}
 	}
@@ -1101,6 +1108,10 @@
 		logger.Debugw(ctx, "UniVlanConfigFsm: no VTFD config required", log.Fields{
 			"in state": e.FSM.Current(), "device-id": oFsm.deviceID})
 	} else {
+		//TODO!!!: it was not really intended to keep this enter* FSM method waiting on OMCI response (preventing other state transitions)
+		// so it would be conceptually better to wait for the response in background like for the other multi-entity processing
+		// but as the OMCI sequence must be ensured, a separate new state would be required - perhaps later
+		// in practice should have no influence by now as no other state transition is currently accepted (while cancel() is ensured)
 		if oFsm.numVlanFilterEntries == 0 {
 			// This attribute uniquely identifies each instance of this managed entity. Through an identical ID,
 			// this managed entity is implicitly linked to an instance of the MAC bridge port configuration data ME.
@@ -2150,6 +2161,12 @@
 
 func (oFsm *UniVlanConfigFsm) waitforOmciResponse(ctx context.Context) error {
 	oFsm.mutexIsAwaitingResponse.Lock()
+	if oFsm.isCanceled {
+		// FSM already canceled before entering wait
+		logger.Debugw(ctx, "UniVlanConfigFsm wait-for-multi-entity-response aborted (on enter)", log.Fields{"for device-id": oFsm.deviceID})
+		oFsm.mutexIsAwaitingResponse.Unlock()
+		return fmt.Errorf(cErrWaitAborted)
+	}
 	oFsm.isAwaitingResponse = true
 	oFsm.mutexIsAwaitingResponse.Unlock()
 	select {
@@ -2164,14 +2181,14 @@
 		return fmt.Errorf("uniVlanConfigFsm multi entity timeout %s", oFsm.deviceID)
 	case success := <-oFsm.omciMIdsResponseReceived:
 		if success {
-			logger.Debug(ctx, "UniVlanConfigFsm multi entity response received")
+			logger.Debugw(ctx, "UniVlanConfigFsm multi entity response received", log.Fields{"for device-id": oFsm.deviceID})
 			oFsm.mutexIsAwaitingResponse.Lock()
 			oFsm.isAwaitingResponse = false
 			oFsm.mutexIsAwaitingResponse.Unlock()
 			return nil
 		}
 		// waiting was aborted (probably on external request)
-		logger.Debugw(ctx, "UniVlanConfigFsm wait for multi entity response aborted", log.Fields{"for device-id": oFsm.deviceID})
+		logger.Debugw(ctx, "UniVlanConfigFsm wait-for-multi-entity-response aborted", log.Fields{"for device-id": oFsm.deviceID})
 		oFsm.mutexIsAwaitingResponse.Lock()
 		oFsm.isAwaitingResponse = false
 		oFsm.mutexIsAwaitingResponse.Unlock()