[VOL-4024] openonuAdapterGo - soft reboot fails in multi-ONU tests (unexpected states)
Signed-off-by: mpagenko <michael.pagenkopf@adtran.com>
Change-Id: I3a09e16d4611468d0c1df2b620ececfd9b48393f
diff --git a/internal/pkg/onuadaptercore/omci_ani_config.go b/internal/pkg/onuadaptercore/omci_ani_config.go
index 423db39..87d5207 100644
--- a/internal/pkg/onuadaptercore/omci_ani_config.go
+++ b/internal/pkg/onuadaptercore/omci_ani_config.go
@@ -114,6 +114,7 @@
uniTpKey uniTP
requestEvent OnuDeviceEvent
mutexIsAwaitingResponse sync.RWMutex
+ isCanceled bool
isAwaitingResponse bool
omciMIdsResponseReceived chan bool //separate channel needed for checking multiInstance OMCI message responses
pAdaptFsm *AdapterFsm
@@ -248,16 +249,27 @@
//early indication about started reset processing
oFsm.pUniTechProf.setProfileResetting(ctx, oFsm.pOnuUniPort.uniID, oFsm.techProfileID, true)
//mutex protection is required for possible concurrent access to FSM members
- oFsm.mutexIsAwaitingResponse.RLock()
- defer oFsm.mutexIsAwaitingResponse.RUnlock()
+ oFsm.mutexIsAwaitingResponse.Lock()
+ oFsm.isCanceled = true
if oFsm.isAwaitingResponse {
+ //attention: for an unbuffered channel the sender is blocked until the value is received (processed)!
+ // accordingly the mutex must be released before sending to channel here (mutex acquired in receiver)
+ oFsm.mutexIsAwaitingResponse.Unlock()
//use channel to indicate that the response waiting shall be aborted
oFsm.omciMIdsResponseReceived <- false
+ } else {
+ oFsm.mutexIsAwaitingResponse.Unlock()
}
+
+ oFsm.mutexIsAwaitingResponse.Lock()
if oFsm.isWaitingForFlowDelete {
+ oFsm.mutexIsAwaitingResponse.Unlock()
//use channel to indicate that the response waiting shall be aborted
oFsm.waitFlowDeleteChannel <- false
+ } else {
+ oFsm.mutexIsAwaitingResponse.Unlock()
}
+
// in any case (even if it might be automatically requested by above cancellation of waiting) ensure resetting the FSM
pAdaptFsm := oFsm.pAdaptFsm
if pAdaptFsm != nil {
@@ -466,6 +478,10 @@
}
//ensure internal slices are empty (which might be set from previous run) - release memory
oFsm.gemPortAttribsSlice = nil
+ oFsm.mutexIsAwaitingResponse.Lock()
+ //reset the canceled state possibly existing from previous reset
+ oFsm.isCanceled = false
+ oFsm.mutexIsAwaitingResponse.Unlock()
// start go routine for processing of ANI config messages
go oFsm.processOmciAniMessages(ctx)
@@ -1391,6 +1407,12 @@
func (oFsm *uniPonAniConfigFsm) waitforOmciResponse(ctx context.Context) error {
oFsm.mutexIsAwaitingResponse.Lock()
+ if oFsm.isCanceled {
+ // FSM already canceled before entering wait
+ logger.Debugw(ctx, "uniPonAniConfigFsm wait-for-multi-entity-response aborted (on enter)", log.Fields{"for device-id": oFsm.deviceID})
+ oFsm.mutexIsAwaitingResponse.Unlock()
+ return fmt.Errorf(cErrWaitAborted)
+ }
oFsm.isAwaitingResponse = true
oFsm.mutexIsAwaitingResponse.Unlock()
select {
@@ -1405,14 +1427,14 @@
return fmt.Errorf("uniPonAniConfigFsm multi entity timeout %s", oFsm.deviceID)
case success := <-oFsm.omciMIdsResponseReceived:
if success {
- logger.Debug(ctx, "uniPonAniConfigFsm multi entity response received")
+ logger.Debugw(ctx, "uniPonAniConfigFsm multi entity response received", log.Fields{"for device-id": oFsm.deviceID})
oFsm.mutexIsAwaitingResponse.Lock()
oFsm.isAwaitingResponse = false
oFsm.mutexIsAwaitingResponse.Unlock()
return nil
}
// waiting was aborted (probably on external request)
- logger.Debugw(ctx, "uniPonAniConfigFsm wait for multi entity response aborted", log.Fields{"for device-id": oFsm.deviceID})
+ logger.Debugw(ctx, "uniPonAniConfigFsm wait-for-multi-entity-response aborted", log.Fields{"for device-id": oFsm.deviceID})
oFsm.mutexIsAwaitingResponse.Lock()
oFsm.isAwaitingResponse = false
oFsm.mutexIsAwaitingResponse.Unlock()
diff --git a/internal/pkg/onuadaptercore/omci_vlan_config.go b/internal/pkg/onuadaptercore/omci_vlan_config.go
index 89433d4..cb010c9 100644
--- a/internal/pkg/onuadaptercore/omci_vlan_config.go
+++ b/internal/pkg/onuadaptercore/omci_vlan_config.go
@@ -158,6 +158,7 @@
pAdaptFsm *AdapterFsm
acceptIncrementalEvtoOption bool
clearPersistency bool
+ isCanceled bool
isAwaitingResponse bool
mutexIsAwaitingResponse sync.RWMutex
mutexFlowParams sync.RWMutex
@@ -335,17 +336,23 @@
//CancelProcessing ensures that suspended processing at waiting on some response is aborted and reset of FSM
func (oFsm *UniVlanConfigFsm) CancelProcessing(ctx context.Context) {
//mutex protection is required for possible concurrent access to FSM members
- oFsm.mutexIsAwaitingResponse.RLock()
- defer oFsm.mutexIsAwaitingResponse.RUnlock()
+ oFsm.mutexIsAwaitingResponse.Lock()
+ oFsm.isCanceled = true
if oFsm.isAwaitingResponse {
+ //attention: for an unbuffered channel the sender is blocked until the value is received (processed)!
+ // accordingly the mutex must be released before sending to channel here (mutex acquired in receiver)
+ oFsm.mutexIsAwaitingResponse.Unlock()
//use channel to indicate that the response waiting shall be aborted
oFsm.omciMIdsResponseReceived <- false
+ } else {
+ oFsm.mutexIsAwaitingResponse.Unlock()
}
+
// in any case (even if it might be automatically requested by above cancellation of waiting) ensure resetting the FSM
pAdaptFsm := oFsm.pAdaptFsm
if pAdaptFsm != nil {
if fsmErr := pAdaptFsm.pFsm.Event(vlanEvReset); fsmErr != nil {
- logger.Errorw(ctx, "error in FsmEvent handling UniVlanConfigFsm!",
+ logger.Errorw(ctx, "reset-event failed in UniVlanConfigFsm!",
log.Fields{"fsmState": oFsm.pAdaptFsm.pFsm.Current(), "error": fsmErr, "device-id": oFsm.deviceID})
}
}
@@ -1101,6 +1108,10 @@
logger.Debugw(ctx, "UniVlanConfigFsm: no VTFD config required", log.Fields{
"in state": e.FSM.Current(), "device-id": oFsm.deviceID})
} else {
+ //TODO!!!: it was not really intended to keep this enter* FSM method waiting on OMCI response (preventing other state transitions)
+ // so it would be conceptually better to wait for the response in background like for the other multi-entity processing
+ // but as the OMCI sequence must be ensured, a separate new state would be required - perhaps later
+ // in practice should have no influence by now as no other state transition is currently accepted (while cancel() is ensured)
if oFsm.numVlanFilterEntries == 0 {
// This attribute uniquely identifies each instance of this managed entity. Through an identical ID,
// this managed entity is implicitly linked to an instance of the MAC bridge port configuration data ME.
@@ -2150,6 +2161,12 @@
func (oFsm *UniVlanConfigFsm) waitforOmciResponse(ctx context.Context) error {
oFsm.mutexIsAwaitingResponse.Lock()
+ if oFsm.isCanceled {
+ // FSM already canceled before entering wait
+ logger.Debugw(ctx, "UniVlanConfigFsm wait-for-multi-entity-response aborted (on enter)", log.Fields{"for device-id": oFsm.deviceID})
+ oFsm.mutexIsAwaitingResponse.Unlock()
+ return fmt.Errorf(cErrWaitAborted)
+ }
oFsm.isAwaitingResponse = true
oFsm.mutexIsAwaitingResponse.Unlock()
select {
@@ -2164,14 +2181,14 @@
return fmt.Errorf("uniVlanConfigFsm multi entity timeout %s", oFsm.deviceID)
case success := <-oFsm.omciMIdsResponseReceived:
if success {
- logger.Debug(ctx, "UniVlanConfigFsm multi entity response received")
+ logger.Debugw(ctx, "UniVlanConfigFsm multi entity response received", log.Fields{"for device-id": oFsm.deviceID})
oFsm.mutexIsAwaitingResponse.Lock()
oFsm.isAwaitingResponse = false
oFsm.mutexIsAwaitingResponse.Unlock()
return nil
}
// waiting was aborted (probably on external request)
- logger.Debugw(ctx, "UniVlanConfigFsm wait for multi entity response aborted", log.Fields{"for device-id": oFsm.deviceID})
+ logger.Debugw(ctx, "UniVlanConfigFsm wait-for-multi-entity-response aborted", log.Fields{"for device-id": oFsm.deviceID})
oFsm.mutexIsAwaitingResponse.Lock()
oFsm.isAwaitingResponse = false
oFsm.mutexIsAwaitingResponse.Unlock()