[VOL-4023] openonuAdapterGo - Ani side configuration fails while waiting for FlowDeletion, and upgrade looplab/fsm
Signed-off-by: mpagenko <michael.pagenkopf@adtran.com>
Change-Id: I9053d2ee774ff46c6d3a9cff5743a141bf14ad58
diff --git a/internal/pkg/onuadaptercore/device_handler.go b/internal/pkg/onuadaptercore/device_handler.go
index 2d4d67d..93c4fe5 100644
--- a/internal/pkg/onuadaptercore/device_handler.go
+++ b/internal/pkg/onuadaptercore/device_handler.go
@@ -2746,35 +2746,6 @@
dh.lockVlanConfig.Unlock()
}
-//ProcessPendingTpDelete processes any pending TP delete (if available)
-func (dh *deviceHandler) ProcessPendingTpDelete(ctx context.Context, apUniPort *onuUniPort, aTpID uint8) {
- logger.Debugw(ctx, "enter processing pending tp delete", log.Fields{"device-id": dh.deviceID, "tpID": aTpID})
- if apUniPort == nil {
- logger.Errorw(ctx, "uni port is nil", log.Fields{"device-id": dh.deviceID})
- return
- }
- k := uniTP{uniID: apUniPort.uniID, tpID: aTpID}
- if pAniConfigFsm, ok := dh.pOnuTP.pAniConfigFsm[k]; pAniConfigFsm != nil && ok {
- pAniConfigStatemachine := pAniConfigFsm.pAdaptFsm.pFsm
- if pAniConfigStatemachine != nil {
- //If the gem port delete was waiting on flow remove, indicate event that flow remove is done
- if pAniConfigStatemachine.Is(aniStWaitingFlowRem) {
- logger.Debugw(ctx, "ani fsm in aniStWaitingFlowRem state - handling aniEvFlowRemDone event",
- log.Fields{"device-id": dh.deviceID, "tpID": aTpID})
- if err := pAniConfigStatemachine.Event(aniEvFlowRemDone); err != nil {
- logger.Warnw(ctx, "AniConfigFsm: can't continue processing", log.Fields{"err": err,
- "device-id": dh.deviceID, "UniPort": apUniPort.portNo, "tpID": aTpID})
- return
- }
- } else {
- logger.Debugw(ctx, "ani fsm not in aniStWaitingFlowRem state", log.Fields{"device-id": dh.deviceID, "tpID": aTpID})
- return
- }
- }
- return
- }
-}
-
//startWritingOnuDataToKvStore initiates the KVStore write of ONU persistent data
func (dh *deviceHandler) startWritingOnuDataToKvStore(ctx context.Context, aPDevEntry *OnuDeviceEntry) error {
dh.mutexKvStoreContext.Lock() //this write routine may (could) be called with the same context,
diff --git a/internal/pkg/onuadaptercore/omci_ani_config.go b/internal/pkg/onuadaptercore/omci_ani_config.go
index 19167f6..c7433c8 100644
--- a/internal/pkg/onuadaptercore/omci_ani_config.go
+++ b/internal/pkg/onuadaptercore/omci_ani_config.go
@@ -127,6 +127,8 @@
gemPortAttribsSlice []ponAniGemPortAttribs
pLastTxMeInstance *me.ManagedEntity
requestEventOffset uint8 //used to indicate ConfigDone or Removed using successor (enum)
+ isWaitingForFlowDelete bool
+ waitFlowDeleteChannel chan bool
}
//newUniPonAniConfigFsm is the 'constructor' for the state machine to config the PON ANI ports of ONU UNI ports via OMCI
@@ -145,6 +147,7 @@
chanSet: false,
}
instFsm.uniTpKey = uniTP{uniID: apUniPort.uniID, tpID: aTechProfileID}
+ instFsm.waitFlowDeleteChannel = make(chan bool)
instFsm.pAdaptFsm = NewAdapterFsm(aName, instFsm.deviceID, aCommChannel)
if instFsm.pAdaptFsm == nil {
@@ -194,7 +197,7 @@
// exceptional treatment for all states except aniStResetting
{Name: aniEvReset, Src: []string{aniStStarting, aniStCreatingDot1PMapper, aniStCreatingMBPCD,
aniStSettingTconts, aniStCreatingGemNCTPs, aniStCreatingGemIWs, aniStSettingPQs, aniStSettingDot1PMapper,
- aniStConfigDone, aniStRemovingGemIW, aniStRemovingGemNCTP,
+ aniStConfigDone, aniStRemovingGemIW, aniStWaitingFlowRem, aniStRemovingGemNCTP,
aniStResetTcont, aniStRemDot1PMapper, aniStRemAniBPCD, aniStRemoveDone}, Dst: aniStResetting},
// the only way to get to resource-cleared disabled state again is via "resseting"
{Name: aniEvRestart, Src: []string{aniStResetting}, Dst: aniStDisabled},
@@ -212,6 +215,7 @@
("enter_" + aniStSettingDot1PMapper): func(e *fsm.Event) { instFsm.enterSettingDot1PMapper(ctx, e) },
("enter_" + aniStConfigDone): func(e *fsm.Event) { instFsm.enterAniConfigDone(ctx, e) },
("enter_" + aniStRemovingGemIW): func(e *fsm.Event) { instFsm.enterRemovingGemIW(ctx, e) },
+ ("enter_" + aniStWaitingFlowRem): func(e *fsm.Event) { instFsm.enterWaitingFlowRem(ctx, e) },
("enter_" + aniStRemovingGemNCTP): func(e *fsm.Event) { instFsm.enterRemovingGemNCTP(ctx, e) },
("enter_" + aniStResetTcont): func(e *fsm.Event) { instFsm.enterResettingTcont(ctx, e) },
("enter_" + aniStRemDot1PMapper): func(e *fsm.Event) { instFsm.enterRemoving1pMapper(ctx, e) },
@@ -249,6 +253,10 @@
//use channel to indicate that the response waiting shall be aborted
oFsm.omciMIdsResponseReceived <- false
}
+ if oFsm.isWaitingForFlowDelete {
+ //use channel to indicate that the response waiting shall be aborted
+ oFsm.waitFlowDeleteChannel <- false
+ }
// in any case (even if it might be automatically requested by above cancellation of waiting) ensure resetting the FSM
pAdaptFsm := oFsm.pAdaptFsm
if pAdaptFsm != nil {
@@ -672,9 +680,14 @@
}
func (oFsm *uniPonAniConfigFsm) enterRemovingGemIW(ctx context.Context, e *fsm.Event) {
-
oFsm.pUniTechProf.mutexTPState.Lock()
- if oFsm.pDeviceHandler.UniVlanConfigFsmMap[oFsm.pOnuUniPort.uniID].IsFlowRemovePending() {
+ //flush the waitFlowDeleteChannel - possibly already/still set by some previous activity
+ select {
+ case <-oFsm.waitFlowDeleteChannel:
+ logger.Debug(ctx, "flushed waitFlowDeleteChannel")
+ default:
+ }
+ if oFsm.pDeviceHandler.UniVlanConfigFsmMap[oFsm.pOnuUniPort.uniID].IsFlowRemovePending(oFsm.waitFlowDeleteChannel) {
oFsm.pUniTechProf.mutexTPState.Unlock()
logger.Debugw(ctx, "flow remove pending - wait before processing gem port delete",
log.Fields{"device-id": oFsm.deviceID, "uni-id": oFsm.pOnuUniPort.uniID, "techProfile-id": oFsm.techProfileID})
@@ -707,6 +720,76 @@
oFsm.pLastTxMeInstance = meInstance
}
+func (oFsm *uniPonAniConfigFsm) enterWaitingFlowRem(ctx context.Context, e *fsm.Event) {
+ oFsm.mutexIsAwaitingResponse.Lock()
+ oFsm.isWaitingForFlowDelete = true
+ oFsm.mutexIsAwaitingResponse.Unlock()
+ select {
+ // maybe be also some outside cancel (but no context modeled for the moment ...)
+ // case <-ctx.Done():
+ // logger.Infow("LockState-bridge-init message reception canceled", log.Fields{"for device-id": oFsm.deviceID})
+ case <-time.After(10 * time.Second): //give flow processing enough time to finish (but try to be less than rwCore flow timeouts)
+ logger.Warnw(ctx, "uniPonAniConfigFsm WaitingFlowRem timeout", log.Fields{
+ "for device-id": oFsm.deviceID, "uni-id": oFsm.pOnuUniPort.uniID, "techProfile-id": oFsm.techProfileID})
+ oFsm.mutexIsAwaitingResponse.Lock()
+ oFsm.isWaitingForFlowDelete = false
+ oFsm.mutexIsAwaitingResponse.Unlock()
+ //if the flow is not removed as expected we just try to continue with GemPort removal and hope things are clearing up afterwards
+ pConfigAniStateAFsm := oFsm.pAdaptFsm
+ if pConfigAniStateAFsm != nil {
+ // obviously calling some FSM event here directly does not work - so trying to decouple it ...
+ go func(aPAFsm *AdapterFsm) {
+ if aPAFsm != nil && aPAFsm.pFsm != nil {
+ _ = oFsm.pAdaptFsm.pFsm.Event(aniEvFlowRemDone)
+ }
+ }(pConfigAniStateAFsm)
+ } else {
+ logger.Errorw(ctx, "pConfigAniStateAFsm is nil", log.Fields{
+ "device-id": oFsm.deviceID, "uni-id": oFsm.pOnuUniPort.uniID, "techProfile-id": oFsm.techProfileID})
+ }
+ return
+
+ case success := <-oFsm.waitFlowDeleteChannel:
+ if success {
+ logger.Debugw(ctx, "uniPonAniConfigFsm flow removed info received", log.Fields{
+ "device-id": oFsm.deviceID, "uni-id": oFsm.pOnuUniPort.uniID, "techProfile-id": oFsm.techProfileID})
+ oFsm.mutexIsAwaitingResponse.Lock()
+ oFsm.isWaitingForFlowDelete = false
+ oFsm.mutexIsAwaitingResponse.Unlock()
+ pConfigAniStateAFsm := oFsm.pAdaptFsm
+ if pConfigAniStateAFsm != nil {
+ // obviously calling some FSM event here directly does not work - so trying to decouple it ...
+ go func(aPAFsm *AdapterFsm) {
+ if aPAFsm != nil && aPAFsm.pFsm != nil {
+ _ = oFsm.pAdaptFsm.pFsm.Event(aniEvFlowRemDone)
+ }
+ }(pConfigAniStateAFsm)
+ } else {
+ logger.Errorw(ctx, "pConfigAniStateAFsm is nil", log.Fields{
+ "device-id": oFsm.deviceID, "uni-id": oFsm.pOnuUniPort.uniID, "techProfile-id": oFsm.techProfileID})
+ }
+ return
+ }
+ // waiting was aborted (probably on external request)
+ logger.Debugw(ctx, "uniPonAniConfigFsm WaitingFlowRem aborted", log.Fields{
+ "device-id": oFsm.deviceID, "uni-id": oFsm.pOnuUniPort.uniID, "techProfile-id": oFsm.techProfileID})
+ oFsm.mutexIsAwaitingResponse.Lock()
+ oFsm.isWaitingForFlowDelete = false
+ oFsm.mutexIsAwaitingResponse.Unlock()
+ //to be sure we can just generate the reset-event to ensure leaving this state towards 'reset'
+ pConfigAniStateAFsm := oFsm.pAdaptFsm
+ if pConfigAniStateAFsm != nil {
+ // obviously calling some FSM event here directly does not work - so trying to decouple it ...
+ go func(aPAFsm *AdapterFsm) {
+ if aPAFsm != nil && aPAFsm.pFsm != nil {
+ _ = aPAFsm.pFsm.Event(aniEvReset)
+ }
+ }(pConfigAniStateAFsm)
+ }
+ return
+ }
+}
+
func (oFsm *uniPonAniConfigFsm) enterRemovingGemNCTP(ctx context.Context, e *fsm.Event) {
oFsm.pUniTechProf.mutexTPState.Lock()
loGemPortID := (*(oFsm.pUniTechProf.mapRemoveGemEntry[oFsm.uniTpKey])).gemPortID
diff --git a/internal/pkg/onuadaptercore/omci_vlan_config.go b/internal/pkg/onuadaptercore/omci_vlan_config.go
index 45e0944..89433d4 100644
--- a/internal/pkg/onuadaptercore/omci_vlan_config.go
+++ b/internal/pkg/onuadaptercore/omci_vlan_config.go
@@ -144,7 +144,8 @@
vlanRuleParams uniVlanRuleParams
}
-//UniVlanConfigFsm defines the structure for the state machine to config the PON ANI ports of ONU UNI ports via OMCI
+//UniVlanConfigFsm defines the structure for the state machine for configuration of the VLAN related setting via OMCI
+// builds upon 'VLAN rules' that are derived from multiple flows
type UniVlanConfigFsm struct {
pDeviceHandler *deviceHandler
deviceID string
@@ -160,7 +161,7 @@
isAwaitingResponse bool
mutexIsAwaitingResponse sync.RWMutex
mutexFlowParams sync.RWMutex
- chCookieDeleted chan bool //channel to indicate that a specificly indicated cookie was deleted
+ chCookieDeleted chan bool //channel to indicate that a specific cookie (related to the active rule) was deleted
actualUniVlanConfigRule uniVlanRuleParams
uniVlanFlowParamsSlice []uniVlanFlowParams
uniRemoveFlowsSlice []uniRemoveVlanFlowParams
@@ -173,6 +174,8 @@
pLastTxMeInstance *me.ManagedEntity
requestEventOffset uint8
TpIDWaitingFor uint8
+ signalOnFlowDelete bool
+ flowDeleteChannel chan<- bool
//cookie value that indicates that a rule to add is delayed by waiting for deletion of some other existing rule with the same cookie
delayNewRuleCookie uint64
}
@@ -341,8 +344,9 @@
// in any case (even if it might be automatically requested by above cancellation of waiting) ensure resetting the FSM
pAdaptFsm := oFsm.pAdaptFsm
if pAdaptFsm != nil {
- if pAdaptFsm.pFsm != nil {
- _ = pAdaptFsm.pFsm.Event(vlanEvReset)
+ if fsmErr := pAdaptFsm.pFsm.Event(vlanEvReset); fsmErr != nil {
+ logger.Errorw(ctx, "error in FsmEvent handling UniVlanConfigFsm!",
+ log.Fields{"fsmState": oFsm.pAdaptFsm.pFsm.Current(), "error": fsmErr, "device-id": oFsm.deviceID})
}
}
}
@@ -480,9 +484,14 @@
if oFsm.pDeviceHandler.isSkipOnuConfigReconciling() {
logger.Debugw(ctx, "reconciling - skip omci-config of additional vlan rule",
log.Fields{"fsmState": oFsm.pAdaptFsm.pFsm.Current(), "device-id": oFsm.deviceID})
+ //attention: take care to release the mutexFlowParams when calling the FSM directly -
+ // synchronous FSM 'event/state' functions may rely on this mutex
oFsm.mutexFlowParams.Unlock()
if pConfigVlanStateBaseFsm.Is(vlanStConfigDone) {
- _ = pConfigVlanStateBaseFsm.Event(vlanEvSkipOmciConfig)
+ if fsmErr := pConfigVlanStateBaseFsm.Event(vlanEvSkipOmciConfig); fsmErr != nil {
+ logger.Errorw(ctx, "error in FsmEvent handling UniVlanConfigFsm!",
+ log.Fields{"fsmState": oFsm.pAdaptFsm.pFsm.Current(), "error": fsmErr, "device-id": oFsm.deviceID})
+ }
}
return nil
}
@@ -496,9 +505,13 @@
if oFsm.configuredUniFlow == 0 {
// this is a restart with a complete new flow, we can re-use the initial flow config control
// including the check, if the related techProfile is (still) available (probably also removed in between)
- go func(a_pBaseFsm *fsm.FSM) {
- _ = a_pBaseFsm.Event(vlanEvRenew)
- }(pConfigVlanStateBaseFsm)
+ //attention: take care to release the mutexFlowParams when calling the FSM directly -
+ // synchronous FSM 'event/state' functions may rely on this mutex
+ oFsm.mutexFlowParams.Unlock()
+ if fsmErr := pConfigVlanStateBaseFsm.Event(vlanEvRenew); fsmErr != nil {
+ logger.Errorw(ctx, "error in FsmEvent handling UniVlanConfigFsm!",
+ log.Fields{"fsmState": pConfigVlanStateBaseFsm.Current(), "error": fsmErr, "device-id": oFsm.deviceID})
+ }
} else {
//some further flows are to be configured
//store the actual rule that shall be worked upon in the following transient states
@@ -510,19 +523,28 @@
logger.Debugw(ctx, "UniVlanConfigFsm - incremental config request (on setConfig)", log.Fields{
"device-id": oFsm.deviceID, "uni-id": oFsm.pOnuUniPort.uniID,
"set-Vlan": oFsm.actualUniVlanConfigRule.SetVid, "tp-id": tpID, "ProfDone": loTechProfDone})
- go func(aPBaseFsm *fsm.FSM, aTechProfDone bool) {
- if aTechProfDone {
- // let the vlan processing continue with next rule
- _ = aPBaseFsm.Event(vlanEvIncrFlowConfig)
- } else {
- // set to waiting for Techprofile
- _ = aPBaseFsm.Event(vlanEvWaitTPIncr)
- }
- }(pConfigVlanStateBaseFsm, loTechProfDone)
+
+ //attention: take care to release the mutexFlowParams when calling the FSM directly -
+ // synchronous FSM 'event/state' functions may rely on this mutex
+ oFsm.mutexFlowParams.Unlock()
+ var fsmErr error
+ if loTechProfDone {
+ // let the vlan processing continue with next rule
+ fsmErr = pConfigVlanStateBaseFsm.Event(vlanEvIncrFlowConfig)
+ } else {
+ // set to waiting for Techprofile
+ fsmErr = pConfigVlanStateBaseFsm.Event(vlanEvWaitTPIncr)
+ }
+ if fsmErr != nil {
+ logger.Errorw(ctx, "error in FsmEvent handling UniVlanConfigFsm!",
+ log.Fields{"fsmState": pConfigVlanStateBaseFsm.Current(), "error": fsmErr, "device-id": oFsm.deviceID})
+ }
}
- } // if not in the appropriate state a new entry will be automatically considered later
- // when the configDone state is reached
- oFsm.mutexFlowParams.Unlock()
+ } else {
+ // if not in the appropriate state a new entry will be automatically considered later
+ // when the configDone state is reached
+ oFsm.mutexFlowParams.Unlock()
+ }
} else {
logger.Errorw(ctx, "UniVlanConfigFsm flow limit exceeded", log.Fields{
"device-id": oFsm.deviceID, "flow-number": oFsm.numUniFlows})
@@ -733,9 +755,14 @@
//trigger the FSM to remove the relevant rule
if cancelPendingConfig {
oFsm.requestEventOffset = uint8(cDeviceEventOffsetRemoveWithKvStore) //offset for last flow-remove activity (with kvStore request)
- go func(a_pBaseFsm *fsm.FSM) {
- _ = a_pBaseFsm.Event(vlanEvCancelOutstandingConfig)
- }(pConfigVlanStateBaseFsm)
+ //attention: take care to release and re-take the mutexFlowParams when calling the FSM directly -
+ // synchronous FSM 'event/state' functions may rely on this mutex
+ oFsm.mutexFlowParams.Unlock()
+ if fsmErr := pConfigVlanStateBaseFsm.Event(vlanEvCancelOutstandingConfig); fsmErr != nil {
+ logger.Errorw(ctx, "error in FsmEvent handling UniVlanConfigFsm!",
+ log.Fields{"fsmState": pConfigVlanStateBaseFsm.Current(), "error": fsmErr, "device-id": oFsm.deviceID})
+ }
+ oFsm.mutexFlowParams.Lock()
} else {
if pConfigVlanStateBaseFsm.Is(vlanStConfigDone) {
logger.Debugw(ctx, "UniVlanConfigFsm rule removal request", log.Fields{
@@ -743,10 +770,14 @@
"tp-id": loRemoveParams.vlanRuleParams.TpID,
"set-Vlan": loRemoveParams.vlanRuleParams.SetVid})
//have to re-trigger the FSM to proceed with outstanding incremental flow configuration
- // Can't call FSM Event directly, decoupling it
- go func(a_pBaseFsm *fsm.FSM) {
- _ = a_pBaseFsm.Event(vlanEvRemFlowConfig)
- }(pConfigVlanStateBaseFsm)
+ //attention: take care to release and re-take the mutexFlowParams when calling the FSM directly -
+ // synchronous FSM 'event/state' functions may rely on this mutex
+ oFsm.mutexFlowParams.Unlock()
+ if fsmErr := pConfigVlanStateBaseFsm.Event(vlanEvRemFlowConfig); fsmErr != nil {
+ logger.Errorw(ctx, "error in FsmEvent handling UniVlanConfigFsm!",
+ log.Fields{"fsmState": pConfigVlanStateBaseFsm.Current(), "error": fsmErr, "device-id": oFsm.deviceID})
+ }
+ oFsm.mutexFlowParams.Lock()
} // if not in the appropriate state a new entry will be automatically considered later
// when the configDone state is reached
}
@@ -1350,7 +1381,7 @@
}(pConfigVlanStateAFsm)
}
- oFsm.mutexFlowParams.RLock()
+ oFsm.mutexFlowParams.Lock()
noOfFlowRem := len(oFsm.uniRemoveFlowsSlice)
if deletedCookie == oFsm.delayNewRuleCookie {
// flush the channel CookieDeleted to ensure it is not lingering from some previous (aborted) activity
@@ -1361,13 +1392,16 @@
}
oFsm.chCookieDeleted <- true // let the waiting AddFlow thread continue
}
- oFsm.mutexFlowParams.RUnlock()
- // If all pending flow removes are completed and TP ID is valid, processing any pending TP delete
- if noOfFlowRem == 0 && tpID > 0 {
- logger.Debugw(ctx, "processing pending tp delete", log.Fields{"device-id": oFsm.deviceID, "tpID": tpID})
+ // If all pending flow-removes are completed and TP ID is valid go on processing any pending TP delete
+ if oFsm.signalOnFlowDelete && noOfFlowRem == 0 && tpID > 0 {
+ logger.Debugw(ctx, "signal flow removal for pending TP delete", log.Fields{"device-id": oFsm.deviceID, "tpID": tpID})
// If we are here then all flows are removed.
- oFsm.pDeviceHandler.ProcessPendingTpDelete(ctx, oFsm.pOnuUniPort, tpID)
+ if len(oFsm.flowDeleteChannel) == 0 { //channel not yet in use
+ oFsm.flowDeleteChannel <- true
+ oFsm.signalOnFlowDelete = false
+ }
}
+ oFsm.mutexFlowParams.Unlock()
}
func (oFsm *UniVlanConfigFsm) enterResetting(ctx context.Context, e *fsm.Event) {
@@ -2358,8 +2392,14 @@
}
// IsFlowRemovePending returns true if there are pending flows to remove, else false.
-func (oFsm *UniVlanConfigFsm) IsFlowRemovePending() bool {
- oFsm.mutexFlowParams.RLock()
- defer oFsm.mutexFlowParams.RUnlock()
- return len(oFsm.uniRemoveFlowsSlice) > 0
+func (oFsm *UniVlanConfigFsm) IsFlowRemovePending(aFlowDeleteChannel chan<- bool) bool {
+ oFsm.mutexFlowParams.Lock()
+ defer oFsm.mutexFlowParams.Unlock()
+ if len(oFsm.uniRemoveFlowsSlice) > 0 {
+ //flow removal is still ongoing/pending
+ oFsm.signalOnFlowDelete = true
+ oFsm.flowDeleteChannel = aFlowDeleteChannel
+ return true
+ }
+ return false
}