[VOL-4438] OnuDown indication can collide with FlowDeletion resulting in wrong flowDelete error indication
Signed-off-by: mpagenko <michael.pagenkopf@adtran.com>
Change-Id: I53bfcc7276e812992d6222c1fd40a250e69b8ef2
diff --git a/VERSION b/VERSION
index 61ee492..b6e9f71 100755
--- a/VERSION
+++ b/VERSION
@@ -1 +1 @@
-2.0.3-dev243
+2.0.3-dev244
diff --git a/internal/pkg/avcfg/omci_vlan_config.go b/internal/pkg/avcfg/omci_vlan_config.go
index df47501..ce6d13a 100755
--- a/internal/pkg/avcfg/omci_vlan_config.go
+++ b/internal/pkg/avcfg/omci_vlan_config.go
@@ -157,7 +157,6 @@
omciMIdsResponseReceived chan bool //seperate channel needed for checking multiInstance OMCI message responses
PAdaptFsm *cmn.AdapterFsm
acceptIncrementalEvtoOption bool
- clearPersistency bool
isCanceled bool
isAwaitingResponse bool
mutexIsAwaitingResponse sync.RWMutex
@@ -204,7 +203,6 @@
NumUniFlows: 0,
ConfiguredUniFlow: 0,
numRemoveFlows: 0,
- clearPersistency: true,
lastFlowToReconcile: lastFlowToRec,
}
@@ -395,18 +393,6 @@
return oFsm.TpIDWaitingFor
}
-//RequestClearPersistency sets the internal flag to not clear persistency data (false=NoClear)
-func (oFsm *UniVlanConfigFsm) RequestClearPersistency(ctx context.Context, aClear bool) {
- if oFsm == nil {
- logger.Error(ctx, "no valid UniVlanConfigFsm!")
- return
- }
- //mutex protection is required for possible concurrent access to FSM members
- oFsm.mutexFlowParams.Lock()
- defer oFsm.mutexFlowParams.Unlock()
- oFsm.clearPersistency = aClear
-}
-
//SetUniFlowParams verifies on existence of flow parameters to be configured,
// optionally udates the cookie list or appends a new flow if there is space
// if possible the FSM is trigggerd to start with the processing
@@ -1838,6 +1824,10 @@
func (oFsm *UniVlanConfigFsm) enterResetting(ctx context.Context, e *fsm.Event) {
logger.Debugw(ctx, "UniVlanConfigFsm resetting", log.Fields{"device-id": oFsm.deviceID})
+ oFsm.mutexPLastTxMeInstance.Lock()
+ oFsm.pLastTxMeInstance = nil //to avoid misinterpretation in case of some lingering frame reception processing
+ oFsm.mutexPLastTxMeInstance.Unlock()
+
pConfigVlanStateAFsm := oFsm.PAdaptFsm
if pConfigVlanStateAFsm != nil {
// abort running message processing
@@ -1849,68 +1839,98 @@
}
pConfigVlanStateAFsm.CommChan <- fsmAbortMsg
- //try to restart the FSM to 'disabled'
- // Can't call FSM Event directly, decoupling it
- go func(a_pAFsm *cmn.AdapterFsm) {
- if a_pAFsm != nil && a_pAFsm.PFsm != nil {
- _ = a_pAFsm.PFsm.Event(VlanEvRestart)
+ //internal data is not explicitly removed, this is left to garbage collection after complete FSM removal
+ // but some channels have to be cleared to avoid unintended waiting for events, that have no meaning anymore now
+
+ oFsm.mutexFlowParams.RLock()
+ if oFsm.delayNewRuleCookie != 0 {
+ // looks like the waiting AddFlow is stuck
+ oFsm.mutexFlowParams.RUnlock()
+ //use asynchronous channel sending to avoid stucking on non-waiting receiver
+ select {
+ case oFsm.chCookieDeleted <- false: // let the waiting AddFlow thread terminate
+ default:
}
- }(pConfigVlanStateAFsm)
+ oFsm.mutexFlowParams.RLock()
+ }
+ if len(oFsm.uniRemoveFlowsSlice) > 0 {
+ for _, removeUniFlowParams := range oFsm.uniRemoveFlowsSlice {
+ if removeUniFlowParams.isSuspendedOnAdd {
+ removeChannel := removeUniFlowParams.removeChannel
+ logger.Debugw(ctx, "UniVlanConfigFsm flow clear-up - abort suspended rule-add", log.Fields{
+ "device-id": oFsm.deviceID, "cookie": removeUniFlowParams.cookie})
+ oFsm.mutexFlowParams.RUnlock()
+ //use asynchronous channel sending to avoid stucking on non-waiting receiver
+ select {
+ case removeChannel <- false:
+ default:
+ }
+ oFsm.mutexFlowParams.RLock()
+ }
+ // Send response on response channel if the caller is waiting on it.
+ var err error = nil
+ if !oFsm.isCanceled {
+ //only if the FSM is not canceled on external request use some error indication for the respChan
+ // so only at real internal FSM abortion some error code is sent back
+ // on the deleteFlow with the hope the system may handle such error situation (possibly retrying)
+ err = fmt.Errorf("internal-error")
+ }
+ //if the FSM was cancelled on external request the assumption is, that all processing has to be stopped
+ // assumed in connection with some ONU down/removal indication in which case all flows can be considered as removed
+ oFsm.pushReponseOnFlowResponseChannel(ctx, removeUniFlowParams.respChan, err)
+ }
+ }
+
+ if oFsm.pDeviceHandler != nil {
+ if len(oFsm.uniVlanFlowParamsSlice) > 0 {
+ if !oFsm.isCanceled {
+ //if the FSM is not canceled on external request use "internal-error" for the respChan
+ for _, vlanRule := range oFsm.uniVlanFlowParamsSlice {
+ // Send response on response channel if the caller is waiting on it with according error indication.
+ oFsm.pushReponseOnFlowResponseChannel(ctx, vlanRule.RespChan, fmt.Errorf("internal-error"))
+ }
+ //permanently remove possibly stored persistent data
+ var emptySlice = make([]cmn.UniVlanFlowParams, 0)
+ _ = oFsm.pDeviceHandler.StorePersUniFlowConfig(ctx, oFsm.pOnuUniPort.UniID, &emptySlice, true) //ignore errors
+ } else {
+ // reset (cancel) of all Fsm is always accompanied by global persistency data removal
+ // no need to remove specific data in this case here
+ // TODO: cancelation may also abort a running flowAdd activity in which case it would be better
+ // to also resopnd on the respChan with some error ("config canceled"), but that is a bit hard to decide here
+ // so just left open by now
+ logger.Debugw(ctx, "UniVlanConfigFsm persistency data not cleared", log.Fields{"device-id": oFsm.deviceID})
+ }
+ }
+ oFsm.mutexFlowParams.RUnlock()
+
+ //try to let the FSM proceed to 'disabled'
+ // Can't call FSM Event directly, decoupling it
+ go func(a_pAFsm *cmn.AdapterFsm) {
+ if a_pAFsm != nil && a_pAFsm.PFsm != nil {
+ _ = a_pAFsm.PFsm.Event(VlanEvRestart)
+ }
+ }(pConfigVlanStateAFsm)
+ return
+ }
+ oFsm.mutexFlowParams.RUnlock()
+ logger.Warnw(ctx, "UniVlanConfigFsm - device handler already vanished",
+ log.Fields{"device-id": oFsm.deviceID})
+ return
}
+ logger.Warnw(ctx, "UniVlanConfigFsm - FSM pointer already vanished",
+ log.Fields{"device-id": oFsm.deviceID})
}
func (oFsm *UniVlanConfigFsm) enterDisabled(ctx context.Context, e *fsm.Event) {
logger.Debugw(ctx, "UniVlanConfigFsm enters disabled state", log.Fields{"device-id": oFsm.deviceID})
- oFsm.mutexPLastTxMeInstance.Lock()
- oFsm.pLastTxMeInstance = nil
- oFsm.mutexPLastTxMeInstance.Unlock()
-
- oFsm.mutexFlowParams.RLock()
- if oFsm.delayNewRuleCookie != 0 {
- // looks like the waiting AddFlow is stuck
- oFsm.mutexFlowParams.RUnlock()
- oFsm.chCookieDeleted <- false // let the waiting AddFlow thread terminate
- oFsm.mutexFlowParams.RLock()
- }
- if len(oFsm.uniRemoveFlowsSlice) > 0 {
- for _, removeUniFlowParams := range oFsm.uniRemoveFlowsSlice {
- if removeUniFlowParams.isSuspendedOnAdd {
- removeChannel := removeUniFlowParams.removeChannel
- logger.Debugw(ctx, "UniVlanConfigFsm flow clear-up - abort suspended rule-add", log.Fields{
- "device-id": oFsm.deviceID, "cookie": removeUniFlowParams.cookie})
- oFsm.mutexFlowParams.RUnlock()
- removeChannel <- false
- oFsm.mutexFlowParams.RLock()
- }
- // Send response on response channel if the caller is waiting on it.
- oFsm.pushReponseOnFlowResponseChannel(ctx, removeUniFlowParams.respChan, fmt.Errorf("internal-error"))
- }
- }
if oFsm.pDeviceHandler != nil {
- //TODO: to clarify with improved error treatment for VlanConfigFsm (timeout,reception) errors
- // current code removes the complete FSM including all flow/rule configuration done so far
- // this might be a bit to much, it would require fully new flow config from rwCore (at least on OnuDown/up)
- // maybe a more sophisticated approach is possible without clearing the data
- if oFsm.clearPersistency {
- //permanently remove possibly stored persistent data
- if len(oFsm.uniVlanFlowParamsSlice) > 0 {
- for _, vlanRule := range oFsm.uniVlanFlowParamsSlice {
- // Send response on response channel if the caller is waiting on it.
- oFsm.pushReponseOnFlowResponseChannel(ctx, vlanRule.RespChan, fmt.Errorf("internal-error"))
- }
- var emptySlice = make([]cmn.UniVlanFlowParams, 0)
- _ = oFsm.pDeviceHandler.StorePersUniFlowConfig(ctx, oFsm.pOnuUniPort.UniID, &emptySlice, true) //ignore errors
- }
- } else {
- logger.Debugw(ctx, "UniVlanConfigFsm persistency data not cleared", log.Fields{"device-id": oFsm.deviceID})
- }
- oFsm.mutexFlowParams.RUnlock()
//request removal of 'reference' in the Handler (completely clear the FSM and its data)
go oFsm.pDeviceHandler.RemoveVlanFilterFsm(ctx, oFsm.pOnuUniPort)
return
}
- oFsm.mutexFlowParams.RUnlock()
+ logger.Warnw(ctx, "UniVlanConfigFsm - device handler already vanished",
+ log.Fields{"device-id": oFsm.deviceID})
}
func (oFsm *UniVlanConfigFsm) processOmciVlanMessages(ctx context.Context) { //ctx context.Context?
diff --git a/internal/pkg/core/device_handler.go b/internal/pkg/core/device_handler.go
index f875553..d1bd5c1 100755
--- a/internal/pkg/core/device_handler.go
+++ b/internal/pkg/core/device_handler.go
@@ -422,7 +422,7 @@
}
func (dh *deviceHandler) handleDeleteGemPortRequest(ctx context.Context, delGemPortMsg *ic.DeleteGemPortMessage) error {
- logger.Infow(ctx, "delete-gem-port-request", log.Fields{"device-id": dh.DeviceID})
+ logger.Infow(ctx, "delete-gem-port-request start", log.Fields{"device-id": dh.DeviceID})
if dh.pOnuTP == nil {
//should normally not happen ...
@@ -434,17 +434,21 @@
dh.pOnuTP.LockTpProcMutex()
defer dh.pOnuTP.UnlockTpProcMutex()
- if delGemPortMsg.UniId > 255 {
+ if delGemPortMsg.UniId >= platform.MaxUnisPerOnu {
+ logger.Errorw(ctx, "delete-gem-port UniId exceeds range", log.Fields{
+ "device-id": dh.DeviceID, "uni-id": delGemPortMsg.UniId})
return fmt.Errorf(fmt.Sprintf("received UniId value exceeds range: %d, device-id: %s",
delGemPortMsg.UniId, dh.DeviceID))
}
uniID := uint8(delGemPortMsg.UniId)
tpID, err := cmn.GetTpIDFromTpPath(delGemPortMsg.TpInstancePath)
if err != nil {
- logger.Errorw(ctx, "error-extracting-tp-id-from-tp-path", log.Fields{"err": err, "tp-path": delGemPortMsg.TpInstancePath})
+ logger.Errorw(ctx, "error-extracting-tp-id-from-tp-path", log.Fields{
+ "device-id": dh.DeviceID, "err": err, "tp-path": delGemPortMsg.TpInstancePath})
return err
}
- logger.Infow(ctx, "delete-gem-port-request", log.Fields{"device-id": dh.DeviceID, "uni-id": uniID, "tpID": tpID, "gem": delGemPortMsg.GemPortId})
+ logger.Infow(ctx, "delete-gem-port-request", log.Fields{
+ "device-id": dh.DeviceID, "uni-id": uniID, "tpID": tpID, "gem": delGemPortMsg.GemPortId})
//a removal of some GemPort would never remove the complete TechProfile entry (done on T-Cont)
return dh.deleteTechProfileResource(ctx, uniID, tpID, delGemPortMsg.TpInstancePath,
@@ -453,7 +457,7 @@
}
func (dh *deviceHandler) handleDeleteTcontRequest(ctx context.Context, delTcontMsg *ic.DeleteTcontMessage) error {
- logger.Infow(ctx, "delete-tcont-request", log.Fields{"device-id": dh.DeviceID})
+ logger.Infow(ctx, "delete-tcont-request start", log.Fields{"device-id": dh.DeviceID})
pDevEntry := dh.GetOnuDeviceEntry(ctx, true)
if pDevEntry == nil {
@@ -471,7 +475,9 @@
dh.pOnuTP.LockTpProcMutex()
defer dh.pOnuTP.UnlockTpProcMutex()
- if delTcontMsg.UniId > 255 {
+ if delTcontMsg.UniId >= platform.MaxUnisPerOnu {
+ logger.Errorw(ctx, "delete-tcont UniId exceeds range", log.Fields{
+ "device-id": dh.DeviceID, "uni-id": delTcontMsg.UniId})
return fmt.Errorf(fmt.Sprintf("received UniId value exceeds range: %d, device-id: %s",
delTcontMsg.UniId, dh.DeviceID))
}
@@ -479,7 +485,8 @@
tpPath := delTcontMsg.TpInstancePath
tpID, err := cmn.GetTpIDFromTpPath(tpPath)
if err != nil {
- logger.Errorw(ctx, "error-extracting-tp-id-from-tp-path", log.Fields{"err": err, "tp-path": tpPath})
+ logger.Errorw(ctx, "error-extracting-tp-id-from-tp-path", log.Fields{
+ "device-id": dh.DeviceID, "err": err, "tp-path": tpPath})
return err
}
logger.Infow(ctx, "delete-tcont-request", log.Fields{"device-id": dh.DeviceID, "uni-id": uniID, "tpID": tpID, "tcont": delTcontMsg.AllocId})
@@ -2074,9 +2081,6 @@
if pVlanFilterFsm, exist := dh.UniVlanConfigFsmMap[uniPort.UniID]; exist {
//VlanFilterFsm exists and was already started
dh.lockVlanConfig.RUnlock()
- //reset of all Fsm is always accompanied by global persistency data removal
- // no need to remove specific data
- pVlanFilterFsm.RequestClearPersistency(ctx, false)
//ensure the FSM processing is stopped in case waiting for some response
pVlanFilterFsm.CancelProcessing(ctx)
} else {