[VOL-3889] missing device reason update from flow removal after ONU reboot
Signed-off-by: mpagenko <michael.pagenkopf@adtran.com>
Change-Id: I1e3eb63e044901c814f453b99388db56ae171d00
diff --git a/internal/pkg/onuadaptercore/device_handler.go b/internal/pkg/onuadaptercore/device_handler.go
index 59cd8d0..b720f4f 100644
--- a/internal/pkg/onuadaptercore/device_handler.go
+++ b/internal/pkg/onuadaptercore/device_handler.go
@@ -1669,23 +1669,20 @@
// FSM stop maybe encapsulated as OnuTP method - perhaps later in context of module splitting
if dh.pOnuTP.pAniConfigFsm != nil {
for uniTP := range dh.pOnuTP.pAniConfigFsm {
- _ = dh.pOnuTP.pAniConfigFsm[uniTP].pAdaptFsm.pFsm.Event(aniEvReset)
+ dh.pOnuTP.pAniConfigFsm[uniTP].CancelProcessing()
}
}
for _, uniPort := range dh.uniEntityMap {
// reset the possibly existing VlanConfigFsm
dh.lockVlanConfig.RLock()
if pVlanFilterFsm, exist := dh.UniVlanConfigFsmMap[uniPort.uniID]; exist {
- dh.lockVlanConfig.RUnlock()
//VlanFilterFsm exists and was already started
- pVlanFilterStatemachine := pVlanFilterFsm.pAdaptFsm.pFsm
- if pVlanFilterStatemachine != nil {
- //reset of all Fsm is always accompanied by global persistency data removal
- // no need to remove specific data
- pVlanFilterFsm.RequestClearPersistency(false)
- //and reset the UniVlanConfig FSM
- _ = pVlanFilterStatemachine.Event(vlanEvReset)
- }
+ dh.lockVlanConfig.RUnlock()
+ //reset of all Fsm is always accompanied by global persistency data removal
+ // no need to remove specific data
+ pVlanFilterFsm.RequestClearPersistency(false)
+ //ensure the FSM processing is stopped in case waiting for some response
+ pVlanFilterFsm.CancelProcessing()
} else {
dh.lockVlanConfig.RUnlock()
}
@@ -1708,6 +1705,7 @@
}
dh.lockUpgradeFsm.RUnlock()
+ logger.Infow(ctx, "resetFsms done", log.Fields{"device-id": dh.deviceID})
return nil
}
@@ -2596,8 +2594,10 @@
dh.pOpenOnuAc.AcceptIncrementalEvto, aCookieSlice, aMatchVlan, aSetVlan, aSetPcp)
if pVlanFilterFsm != nil {
dh.lockVlanConfig.Lock()
+ //ensure the mutex is locked throughout the state transition to 'starting' to prevent unintended (ignored) events to be sent there
+ // (from parallel processing)
+ defer dh.lockVlanConfig.Unlock()
dh.UniVlanConfigFsmMap[apUniPort.uniID] = pVlanFilterFsm
- dh.lockVlanConfig.Unlock()
pVlanFilterStatemachine := pVlanFilterFsm.pAdaptFsm.pFsm
if pVlanFilterStatemachine != nil {
if pVlanFilterStatemachine.Is(vlanStDisabled) {
diff --git a/internal/pkg/onuadaptercore/omci_ani_config.go b/internal/pkg/onuadaptercore/omci_ani_config.go
index db0b916..01b2f34 100644
--- a/internal/pkg/onuadaptercore/omci_ani_config.go
+++ b/internal/pkg/onuadaptercore/omci_ani_config.go
@@ -23,6 +23,7 @@
"fmt"
"net"
"strconv"
+ "sync"
"time"
"github.com/cevaris/ordered_map"
@@ -112,6 +113,8 @@
techProfileID uint8
uniTpKey uniTP
requestEvent OnuDeviceEvent
+ mutexIsAwaitingResponse sync.RWMutex
+ isAwaitingResponse bool
omciMIdsResponseReceived chan bool //separate channel needed for checking multiInstance OMCI message responses
pAdaptFsm *AdapterFsm
chSuccess chan<- uint8
@@ -235,6 +238,27 @@
oFsm.chanSet = true
}
+//CancelProcessing ensures that suspended processing at waiting on some response is aborted and reset of FSM
+func (oFsm *uniPonAniConfigFsm) CancelProcessing() {
+ //mutex protection is required for possible concurrent access to FSM members
+ oFsm.mutexIsAwaitingResponse.RLock()
+ defer oFsm.mutexIsAwaitingResponse.RUnlock()
+ if oFsm.isAwaitingResponse {
+ //use channel to indicate that the response waiting shall be aborted
+ oFsm.omciMIdsResponseReceived <- false
+ }
+ // in any case (even if it might be automatically requested by above cancellation of waiting) ensure resetting the FSM
+ pAdaptFsm := oFsm.pAdaptFsm
+ if pAdaptFsm != nil {
+ // obviously calling some FSM event here directly does not work - so trying to decouple it ...
+ go func(aPAFsm *AdapterFsm) {
+ if aPAFsm.pFsm != nil {
+ _ = oFsm.pAdaptFsm.pFsm.Event(aniEvReset)
+ }
+ }(pAdaptFsm)
+ }
+}
+
func (oFsm *uniPonAniConfigFsm) prepareAndEnterConfigState(ctx context.Context, aPAFsm *AdapterFsm) {
if aPAFsm != nil && aPAFsm.pFsm != nil {
//stick to pythonAdapter numbering scheme
@@ -1191,20 +1215,32 @@
}
func (oFsm *uniPonAniConfigFsm) waitforOmciResponse(ctx context.Context) error {
+ oFsm.mutexIsAwaitingResponse.Lock()
+ oFsm.isAwaitingResponse = true
+ oFsm.mutexIsAwaitingResponse.Unlock()
select {
// maybe be also some outside cancel (but no context modeled for the moment ...)
// case <-ctx.Done():
// logger.Infow("LockState-bridge-init message reception canceled", log.Fields{"for device-id": oFsm.deviceID})
case <-time.After(30 * time.Second): //3s was detected to be to less in 8*8 bbsim test with debug Info/Debug
logger.Warnw(ctx, "UniPonAniConfigFsm multi entity timeout", log.Fields{"for device-id": oFsm.deviceID})
+ oFsm.mutexIsAwaitingResponse.Lock()
+ oFsm.isAwaitingResponse = false
+ oFsm.mutexIsAwaitingResponse.Unlock()
return fmt.Errorf("uniPonAniConfigFsm multi entity timeout %s", oFsm.deviceID)
case success := <-oFsm.omciMIdsResponseReceived:
if success {
logger.Debug(ctx, "uniPonAniConfigFsm multi entity response received")
+ oFsm.mutexIsAwaitingResponse.Lock()
+ oFsm.isAwaitingResponse = false
+ oFsm.mutexIsAwaitingResponse.Unlock()
return nil
}
- // should not happen so far
- logger.Warnw(ctx, "uniPonAniConfigFsm multi entity response error", log.Fields{"for device-id": oFsm.deviceID})
- return fmt.Errorf("uniPonAniConfigFsm multi entity responseError %s", oFsm.deviceID)
+ // waiting was aborted (probably on external request)
+ logger.Debugw(ctx, "uniPonAniConfigFsm wait for multi entity response aborted", log.Fields{"for device-id": oFsm.deviceID})
+ oFsm.mutexIsAwaitingResponse.Lock()
+ oFsm.isAwaitingResponse = false
+ oFsm.mutexIsAwaitingResponse.Unlock()
+ return fmt.Errorf(cErrWaitAborted)
}
}
diff --git a/internal/pkg/onuadaptercore/omci_vlan_config.go b/internal/pkg/onuadaptercore/omci_vlan_config.go
index 912af08..39a7bee 100644
--- a/internal/pkg/onuadaptercore/omci_vlan_config.go
+++ b/internal/pkg/onuadaptercore/omci_vlan_config.go
@@ -157,6 +157,8 @@
pAdaptFsm *AdapterFsm
acceptIncrementalEvtoOption bool
clearPersistency bool
+ isAwaitingResponse bool
+ mutexIsAwaitingResponse sync.RWMutex
mutexFlowParams sync.RWMutex
chCookieDeleted chan bool //channel to indicate that a specificly indicated cookie was deleted
actualUniVlanConfigRule uniVlanRuleParams
@@ -327,6 +329,27 @@
return nil
}
+//CancelProcessing ensures that suspended processing at waiting on some response is aborted and reset of FSM
+func (oFsm *UniVlanConfigFsm) CancelProcessing() {
+ //mutex protection is required for possible concurrent access to FSM members
+ oFsm.mutexIsAwaitingResponse.RLock()
+ defer oFsm.mutexIsAwaitingResponse.RUnlock()
+ if oFsm.isAwaitingResponse {
+ //use channel to indicate that the response waiting shall be aborted
+ oFsm.omciMIdsResponseReceived <- false
+ }
+ // in any case (even if it might be automatically requested by above cancellation of waiting) ensure resetting the FSM
+ pAdaptFsm := oFsm.pAdaptFsm
+ if pAdaptFsm != nil {
+ // obviously calling some FSM event here directly does not work - so trying to decouple it ...
+ go func(aPAFsm *AdapterFsm) {
+ if aPAFsm.pFsm != nil {
+ _ = oFsm.pAdaptFsm.pFsm.Event(vlanEvReset)
+ }
+ }(pAdaptFsm)
+ }
+}
+
//GetWaitingTpID returns the TpId that the FSM might be waiting for continuation (0 if none)
func (oFsm *UniVlanConfigFsm) GetWaitingTpID() uint8 {
//mutex protection is required for possible concurrent access to FSM members
@@ -2095,21 +2118,33 @@
}
func (oFsm *UniVlanConfigFsm) waitforOmciResponse(ctx context.Context) error {
+ oFsm.mutexIsAwaitingResponse.Lock()
+ oFsm.isAwaitingResponse = true
+ oFsm.mutexIsAwaitingResponse.Unlock()
select {
// maybe be also some outside cancel (but no context modeled for the moment ...)
// case <-ctx.Done():
// logger.Infow(ctx,"LockState-bridge-init message reception canceled", log.Fields{"for device-id": oFsm.deviceID})
case <-time.After(30 * time.Second): //AS FOR THE OTHER OMCI FSM's
logger.Warnw(ctx, "UniVlanConfigFsm multi entity timeout", log.Fields{"for device-id": oFsm.deviceID})
+ oFsm.mutexIsAwaitingResponse.Lock()
+ oFsm.isAwaitingResponse = false
+ oFsm.mutexIsAwaitingResponse.Unlock()
return fmt.Errorf("uniVlanConfigFsm multi entity timeout %s", oFsm.deviceID)
case success := <-oFsm.omciMIdsResponseReceived:
if success {
logger.Debug(ctx, "UniVlanConfigFsm multi entity response received")
+ oFsm.mutexIsAwaitingResponse.Lock()
+ oFsm.isAwaitingResponse = false
+ oFsm.mutexIsAwaitingResponse.Unlock()
return nil
}
- // should not happen so far
- logger.Warnw(ctx, "UniVlanConfigFsm multi entity response error", log.Fields{"for device-id": oFsm.deviceID})
- return fmt.Errorf("uniVlanConfigFsm multi entity responseError %s", oFsm.deviceID)
+ // waiting was aborted (probably on external request)
+ logger.Debugw(ctx, "UniVlanConfigFsm wait for multi entity response aborted", log.Fields{"for device-id": oFsm.deviceID})
+ oFsm.mutexIsAwaitingResponse.Lock()
+ oFsm.isAwaitingResponse = false
+ oFsm.mutexIsAwaitingResponse.Unlock()
+ return fmt.Errorf(cErrWaitAborted)
}
}
diff --git a/internal/pkg/onuadaptercore/onu_device_entry.go b/internal/pkg/onuadaptercore/onu_device_entry.go
index 86c34cf..db33270 100644
--- a/internal/pkg/onuadaptercore/onu_device_entry.go
+++ b/internal/pkg/onuadaptercore/onu_device_entry.go
@@ -160,6 +160,11 @@
// Add other events here as needed (alarms separate???)
)
+//AdapterFsm related error string
+//error string could be checked on waitforOmciResponse() e.g. to avoid misleading error log
+// but not used that way so far (permit error log even for wanted cancellation)
+const cErrWaitAborted = "waitResponse aborted"
+
type activityDescr struct {
databaseClass func(context.Context) error
//advertiseEvents bool
diff --git a/internal/pkg/onuadaptercore/onu_uni_tp.go b/internal/pkg/onuadaptercore/onu_uni_tp.go
index 3fdbe19..ac59445 100644
--- a/internal/pkg/onuadaptercore/onu_uni_tp.go
+++ b/internal/pkg/onuadaptercore/onu_uni_tp.go
@@ -552,6 +552,7 @@
"device-id": onuTP.deviceID, "uni-id": aUniID, "path": aPathString, "Resource": aResource})
uniTPKey := uniTP{uniID: aUniID, tpID: aTpID}
+ bDeviceProcStatusUpdate := true
if cResourceGemPort == aResource {
logger.Debugw(ctx, "remove GemPort from the list of existing ones of the TP", log.Fields{
"device-id": onuTP.deviceID, "uni-id": aUniID, "path": aPathString, "GemPort": aEntryID})
@@ -640,6 +641,12 @@
return
}
} else {
+ //if we can't do the OMCI processing we also suppress the ProcStatusUpdate
+ //this is needed as in the device-down case where all FSM's are getting reset and internal data gets cleared
+ //as a consequence a possible remove-flow does not see any dependency on the TechProfile anymore and is executed (pro forma) directly
+ //a later TechProfile removal would cause the device-reason to be updated to 'techProfile-delete-success' which is not the expected state
+ // and anyway is no real useful information at that stage
+ bDeviceProcStatusUpdate = false
logger.Debugw(ctx, "uniPonAniConfigFsm delete Gem on OMCI skipped based on device state", log.Fields{
"device-id": onuTP.deviceID, "device-state": onuTP.baseDeviceHandler.getDeviceReasonString()})
}
@@ -720,6 +727,8 @@
return
}
} else {
+ //see gemPort comments
+ bDeviceProcStatusUpdate = false
logger.Debugw(ctx, "uniPonAniConfigFsm TCont cleanup on OMCI skipped based on device state", log.Fields{
"device-id": onuTP.deviceID, "device-state": onuTP.baseDeviceHandler.getDeviceReasonString()})
}
@@ -729,8 +738,10 @@
// FSM stop maybe encapsulated as OnuTP method - perhaps later in context of module splitting
_ = onuTP.pAniConfigFsm[uniTPKey].pAdaptFsm.pFsm.Event(aniEvReset)
}
- // generate deviceHandler StatusEvent in case the FSM was not invoked
- go onuTP.baseDeviceHandler.deviceProcStatusUpdate(ctx, OmciAniResourceRemoved)
+ if bDeviceProcStatusUpdate {
+ // generate deviceHandler StatusEvent in case the FSM was not invoked and OMCI processing not locked due to device state
+ go onuTP.baseDeviceHandler.deviceProcStatusUpdate(ctx, OmciAniResourceRemoved)
+ }
}
func (onuTP *onuUniTechProf) waitForTimeoutOrCompletion(