[VOL-4253] OpenOnuAdapter Techprofile configuration/removal deadlock when conflicting with ONU down handling
Signed-off-by: mpagenko <michael.pagenkopf@adtran.com>
Change-Id: Ic35f6074d41b8a6539fcab523d53b7e2511c76e8
diff --git a/VERSION b/VERSION
index 4c02608..9188dab 100755
--- a/VERSION
+++ b/VERSION
@@ -1 +1 @@
-1.4.1-dev217
+1.4.1-dev218
diff --git a/internal/pkg/onuadaptercore/omci_ani_config.go b/internal/pkg/onuadaptercore/omci_ani_config.go
index 340dd1b..4473c7e 100644
--- a/internal/pkg/onuadaptercore/omci_ani_config.go
+++ b/internal/pkg/onuadaptercore/omci_ani_config.go
@@ -286,12 +286,11 @@
}(pAdaptFsm)
}
- //wait for completion of possibly ongoing techprofile config/remove requests to avoid
- // access conflicts on internal data by next needed data clearance
- //activity should be aborted in short time if running with FSM due to above FSM reset
- // or finished without FSM dependency in short time
- oFsm.pUniTechProf.lockTpProcMutex()
- defer oFsm.pUniTechProf.unlockTpProcMutex()
+ // possible access conflicts on internal data by next needed data clearance
+ // are avoided by using mutexTPState also from within clearAniSideConfig
+ // do not try to lock TpProcMutex here as done in previous code version
+ // as it may result in deadlock situations (as observed at soft-reboot handling where
+ // TpProcMutex is already locked by some ongoing TechProfile config/removal processing
//remove all TechProf related internal data to allow for new configuration
oFsm.pUniTechProf.clearAniSideConfig(ctx, oFsm.pOnuUniPort.uniID, oFsm.techProfileID)
}
@@ -324,11 +323,6 @@
tcontInstID, tcontAlreadyExist, err := pDevEntry.allocateFreeTcont(ctx, oFsm.pUniTechProf.mapPonAniConfig[oFsm.uniTpKey].tcontParams.allocID)
if err != nil {
logger.Errorw(ctx, "No TCont instances found", log.Fields{"device-id": oFsm.deviceID, "err": err})
- if oFsm.chanSet {
- // indicate processing error/abort to the caller
- oFsm.chSuccess <- 0
- oFsm.chanSet = false //reset the internal channel state
- }
//reset the state machine to enable usage on subsequent requests
_ = aPAFsm.pFsm.Event(aniEvReset)
return
@@ -1081,6 +1075,21 @@
logger.Debugw(ctx, "uniPonAniConfigFsm resetting", log.Fields{
"device-id": oFsm.deviceID, "uni-id": oFsm.pOnuUniPort.uniID})
+ if oFsm.isChanSet() {
+ // indicate processing error to the caller (in case there was still some open request)
+ logger.Debugw(ctx, "uniPonAniConfigFsm processingError on channel", log.Fields{
+ "ProcessingStep": oFsm.procStep, "from_State": e.FSM.Current(), "device-id": oFsm.deviceID})
+ //use non-blocking channel send to avoid blocking because of non-existing receiver
+ // (even though the channel is checked on 'set', the outside receiver channel might (theoretically) already be deleted)
+ select {
+ case oFsm.chSuccess <- 0:
+ default:
+ logger.Debugw(ctx, "uniPonAniConfigFsm processingError not send on channel (no receiver)", log.Fields{
+ "device-id": oFsm.deviceID})
+ }
+ oFsm.setChanSet(false) //reset the internal channel state
+ }
+
pConfigAniStateAFsm := oFsm.pAdaptFsm
if pConfigAniStateAFsm != nil {
// abort running message processing
diff --git a/internal/pkg/onuadaptercore/omci_vlan_config.go b/internal/pkg/onuadaptercore/omci_vlan_config.go
index 973d469..56deddf 100644
--- a/internal/pkg/onuadaptercore/omci_vlan_config.go
+++ b/internal/pkg/onuadaptercore/omci_vlan_config.go
@@ -589,15 +589,18 @@
oFsm.actualUniVlanConfigMeter = oFsm.uniVlanFlowParamsSlice[oFsm.configuredUniFlow].Meter
//tpId of the next rule to be configured
tpID := oFsm.actualUniVlanConfigRule.TpID
- loTechProfDone := oFsm.pUniTechProf.getTechProfileDone(ctx, oFsm.pOnuUniPort.uniID, tpID)
oFsm.TpIDWaitingFor = tpID
- logger.Debugw(ctx, "UniVlanConfigFsm - incremental config request (on setConfig)", log.Fields{
- "device-id": oFsm.deviceID, "uni-id": oFsm.pOnuUniPort.uniID,
- "set-Vlan": oFsm.actualUniVlanConfigRule.SetVid, "tp-id": tpID, "ProfDone": loTechProfDone})
-
+ loSetVlan := oFsm.actualUniVlanConfigRule.SetVid
//attention: take care to release the mutexFlowParams when calling the FSM directly -
// synchronous FSM 'event/state' functions may rely on this mutex
+ // but it must be released already before calling getTechProfileDone() as it may already be locked
+ // by the techProfile processing call to VlanFsm.IsFlowRemovePending() (see VOL-4207)
oFsm.mutexFlowParams.Unlock()
+ loTechProfDone := oFsm.pUniTechProf.getTechProfileDone(ctx, oFsm.pOnuUniPort.uniID, tpID)
+ logger.Debugw(ctx, "UniVlanConfigFsm - incremental config request (on setConfig)", log.Fields{
+ "device-id": oFsm.deviceID, "uni-id": oFsm.pOnuUniPort.uniID,
+ "set-Vlan": loSetVlan, "tp-id": tpID, "ProfDone": loTechProfDone})
+
var fsmErr error
if loTechProfDone {
// let the vlan processing continue with next rule
@@ -1091,12 +1094,16 @@
oFsm.TpIDWaitingFor = tpID
//cmp also usage in EVTOCDE create in omci_cc
oFsm.evtocdID = macBridgeServiceProfileEID + uint16(oFsm.pOnuUniPort.macBpNo)
+ loSetVlan := oFsm.actualUniVlanConfigRule.SetVid
+ //attention: take care to release the mutexFlowParams when calling the FSM directly -
+ // synchronous FSM 'event/state' functions may rely on this mutex
+ // but it must be released already before calling getTechProfileDone() as it may already be locked
+ // by the techProfile processing call to VlanFsm.IsFlowRemovePending() (see VOL-4207)
oFsm.mutexFlowParams.Unlock()
-
loTechProfDone := oFsm.pUniTechProf.getTechProfileDone(ctx, oFsm.pOnuUniPort.uniID, uint8(tpID))
logger.Debugw(ctx, "UniVlanConfigFsm - start with first rule", log.Fields{
"device-id": oFsm.deviceID, "uni-id": oFsm.pOnuUniPort.uniID,
- "set-Vlan": oFsm.actualUniVlanConfigRule.SetVid, "tp-id": tpID, "ProfDone": loTechProfDone})
+ "set-Vlan": loSetVlan, "tp-id": tpID, "ProfDone": loTechProfDone})
// Can't call FSM Event directly, decoupling it
go func(aPAFsm *AdapterFsm, aTechProfDone bool) {
@@ -1300,11 +1307,17 @@
//tpId of the next rule to be configured
tpID := oFsm.actualUniVlanConfigRule.TpID
oFsm.TpIDWaitingFor = tpID
+ loSetVlan := oFsm.actualUniVlanConfigRule.SetVid
+ //attention: take care to release the mutexFlowParams when calling the FSM directly -
+ // synchronous FSM 'event/state' functions may rely on this mutex
+ // but it must be released already before calling getTechProfileDone() as it may already be locked
+ // by the techProfile processing call to VlanFsm.IsFlowRemovePending() (see VOL-4207)
+ oFsm.mutexFlowParams.Unlock()
loTechProfDone := oFsm.pUniTechProf.getTechProfileDone(ctx, oFsm.pOnuUniPort.uniID, tpID)
logger.Debugw(ctx, "UniVlanConfigFsm - incremental config request", log.Fields{
"device-id": oFsm.deviceID, "uni-id": oFsm.pOnuUniPort.uniID,
- "set-Vlan": oFsm.actualUniVlanConfigRule.SetVid, "tp-id": tpID, "ProfDone": loTechProfDone})
- oFsm.mutexFlowParams.Unlock()
+ "set-Vlan": loSetVlan, "tp-id": tpID, "ProfDone": loTechProfDone})
+
// Can't call FSM Event directly, decoupling it
go func(aPBaseFsm *fsm.FSM, aTechProfDone bool) {
if aTechProfDone {