openonuAdapterGo - avoid crash in flow processing TT scenario on suspended add-flow
Signed-off-by: mpagenko <michael.pagenkopf@adtran.com>
Change-Id: I764a7e6539b3997ba8c901dcfbcd6a68a1805a2f
diff --git a/VERSION b/VERSION
index cf3820c..aef3aa9 100755
--- a/VERSION
+++ b/VERSION
@@ -1 +1 @@
-1.3.2-dev210
+1.3.2-dev211
diff --git a/internal/pkg/onuadaptercore/omci_vlan_config.go b/internal/pkg/onuadaptercore/omci_vlan_config.go
index 5c2a0c9..a18a216 100644
--- a/internal/pkg/onuadaptercore/omci_vlan_config.go
+++ b/internal/pkg/onuadaptercore/omci_vlan_config.go
@@ -20,6 +20,7 @@
import (
"context"
"encoding/binary"
+ "errors"
"fmt"
"net"
"strconv"
@@ -434,7 +435,13 @@
if removeUniFlowParams.vlanRuleParams == loRuleParams {
// the flow to add is the same as the one already in progress of deleting
logger.Infow(ctx, "UniVlanConfigFsm flow setting - suspending rule-add due to ongoing removal", log.Fields{
- "device-id": oFsm.deviceID, "cookie": removeUniFlowParams.cookie})
+ "device-id": oFsm.deviceID, "cookie": removeUniFlowParams.cookie, "remove-index": flow})
+ if flow >= len(oFsm.uniRemoveFlowsSlice) {
+ logger.Errorw(ctx, "abort UniVlanConfigFsm flow add - inconsistent RemoveFlowsSlice", log.Fields{
+ "device-id": oFsm.deviceID, "slice length": len(oFsm.uniRemoveFlowsSlice)})
+ oFsm.mutexFlowParams.RUnlock()
+ return fmt.Errorf("abort UniVlanConfigFsm flow add - inconsistent RemoveFlowsSlice %s", oFsm.deviceID)
+ }
pRemoveParams := &oFsm.uniRemoveFlowsSlice[flow] //wants to modify the uniRemoveFlowsSlice element directly!
oFsm.mutexFlowParams.RUnlock()
if err := oFsm.suspendAddRule(ctx, pRemoveParams); err != nil {
@@ -443,6 +450,7 @@
return fmt.Errorf("abort UniVlanConfigFsm suspension on add %s", oFsm.deviceID)
}
oFsm.mutexFlowParams.RLock()
+ break //this specific rule should only exist once per uniRemoveFlowsSlice
}
}
}
@@ -661,10 +669,10 @@
apRemoveFlowParams.isSuspendedOnAdd = false
}
oFsm.mutexFlowParams.Unlock()
- logger.Errorw(ctx, "timeout waiting for deletion of rule, just try to continue", log.Fields{
+ logger.Errorw(ctx, "timeout waiting for deletion of rule, also aborting add-activity", log.Fields{
"device-id": oFsm.deviceID})
+ return fmt.Errorf("suspend aborted on timeout, also aborting add-activity: %s", oFsm.deviceID)
}
- return nil
}
// VOL-3828 flow config sequence workaround ########### start ##########
@@ -769,84 +777,14 @@
if cookie == aCookie {
logger.Debugw(ctx, "UniVlanConfigFsm flow removal - cookie found", log.Fields{
"device-id": oFsm.deviceID, "cookie": cookie})
- flowCookieMatch = true
deletedCookie = aCookie
kvStoreWrite := false //default setting is to not write to kvStore immediately - will be done on FSM execution finally
//remove the cookie from the cookie slice and verify it is getting empty
if len(storedUniFlowParams.CookieSlice) == 1 {
- pConfigVlanStateBaseFsm := oFsm.pAdaptFsm.pFsm
- var cancelPendingConfig bool = false
- var loRemoveParams uniRemoveVlanFlowParams = uniRemoveVlanFlowParams{}
- logger.Debugw(ctx, "UniVlanConfigFsm flow removal - full flow removal", log.Fields{
- "device-id": oFsm.deviceID})
- //rwCore flow recovery may be the reason for this delete, in which case the flowToBeDeleted may be the same
- // as the one still waiting in the FSM as toAdd but waiting for TechProfileConfig
- // so we have to check if we have to abort the outstanding AddRequest and regard the current DelRequest as done
- // if the Fsm is in some other transient (config) state, we will reach the DelRequest later and correctly process it then
- if pConfigVlanStateBaseFsm.Is(vlanStWaitingTechProf) {
- logger.Debugw(ctx, "UniVlanConfigFsm was waiting for TechProf config with add-request, just aborting the outstanding add",
- log.Fields{"device-id": oFsm.deviceID})
- cancelPendingConfig = true
- } else {
- //create a new element for the removeVlanFlow slice
- loRemoveParams = uniRemoveVlanFlowParams{
- vlanRuleParams: storedUniFlowParams.VlanRuleParams,
- cookie: aCookie,
- }
- loRemoveParams.removeChannel = make(chan bool)
- oFsm.uniRemoveFlowsSlice = append(oFsm.uniRemoveFlowsSlice, loRemoveParams)
- }
-
- usedTpID := storedUniFlowParams.VlanRuleParams.TpID
- if len(oFsm.uniVlanFlowParamsSlice) <= 1 {
- //at this point it is evident that no flow anymore will refer to a still possibly active Techprofile
- //request that this profile gets deleted before a new flow add is allowed (except for some aborted add)
- if !cancelPendingConfig {
- logger.Debugw(ctx, "UniVlanConfigFsm flow removal requested - set TechProfile to-delete", log.Fields{
- "device-id": oFsm.deviceID})
- if oFsm.pUniTechProf != nil {
- oFsm.pUniTechProf.setProfileToDelete(oFsm.pOnuUniPort.uniID, usedTpID, true)
- }
- }
- } else {
- if !cancelPendingConfig {
- oFsm.updateTechProfileToDelete(ctx, usedTpID)
- }
- }
- //trigger the FSM to remove the relevant rule
- if cancelPendingConfig {
- //as the uniFlow parameters are already stored (for add) but no explicit removal is done anymore
- // the paramSlice has to be updated with rule-removal, which also then updates numUniFlows
- oFsm.removeFlowFromParamsSlice(ctx, aCookie, false) //call from 'non-configured' state of the rule
-
- oFsm.requestEventOffset = uint8(cDeviceEventOffsetRemoveWithKvStore) //offset for last flow-remove activity (with kvStore request)
- //attention: take care to release and re-take the mutexFlowParams when calling the FSM directly -
- // synchronous FSM 'event/state' functions may rely on this mutex
- oFsm.mutexFlowParams.Unlock()
- if fsmErr := pConfigVlanStateBaseFsm.Event(vlanEvCancelOutstandingConfig); fsmErr != nil {
- logger.Errorw(ctx, "error in FsmEvent handling UniVlanConfigFsm!",
- log.Fields{"fsmState": pConfigVlanStateBaseFsm.Current(), "error": fsmErr, "device-id": oFsm.deviceID})
- }
- oFsm.mutexFlowParams.Lock()
- } else {
- if pConfigVlanStateBaseFsm.Is(vlanStConfigDone) {
- logger.Debugw(ctx, "UniVlanConfigFsm rule removal request", log.Fields{
- "device-id": oFsm.deviceID, "uni-id": oFsm.pOnuUniPort.uniID,
- "tp-id": loRemoveParams.vlanRuleParams.TpID,
- "set-Vlan": loRemoveParams.vlanRuleParams.SetVid})
- //have to re-trigger the FSM to proceed with outstanding incremental flow configuration
- //attention: take care to release and re-take the mutexFlowParams when calling the FSM directly -
- // synchronous FSM 'event/state' functions may rely on this mutex
- oFsm.mutexFlowParams.Unlock()
- if fsmErr := pConfigVlanStateBaseFsm.Event(vlanEvRemFlowConfig); fsmErr != nil {
- logger.Errorw(ctx, "error in FsmEvent handling UniVlanConfigFsm!",
- log.Fields{"fsmState": pConfigVlanStateBaseFsm.Current(), "error": fsmErr, "device-id": oFsm.deviceID})
- }
- oFsm.mutexFlowParams.Lock()
- } // if not in the appropriate state a new entry will be automatically considered later
- // when the configDone state is reached
- }
+ // had to shift content to function due to sca complexity
+ flowCookieMatch = oFsm.removeRuleComplete(ctx, storedUniFlowParams, aCookie)
} else {
+ flowCookieMatch = true
//cut off the requested cookie by slicing out this element
oFsm.uniVlanFlowParamsSlice[flow].CookieSlice = append(
oFsm.uniVlanFlowParamsSlice[flow].CookieSlice[:i],
@@ -897,24 +835,126 @@
return nil
}
+// removeRuleComplete initiates the complete removal of a VLAN rule (from single cookie element)
+func (oFsm *UniVlanConfigFsm) removeRuleComplete(ctx context.Context,
+ aUniFlowParams uniVlanFlowParams, aCookie uint64) bool {
+ pConfigVlanStateBaseFsm := oFsm.pAdaptFsm.pFsm
+ var cancelPendingConfig bool = false
+ var loRemoveParams uniRemoveVlanFlowParams = uniRemoveVlanFlowParams{}
+ logger.Debugw(ctx, "UniVlanConfigFsm flow removal - full flow removal", log.Fields{
+ "device-id": oFsm.deviceID})
+ //rwCore flow recovery may be the reason for this delete, in which case the flowToBeDeleted may be the same
+ // as the one still waiting in the FSM as toAdd but waiting for TechProfileConfig
+ // so we have to check if we have to abort the outstanding AddRequest and regard the current DelRequest as done
+ // if the Fsm is in some other transient (config) state, we will reach the DelRequest later and correctly process it then
+ if pConfigVlanStateBaseFsm.Is(vlanStWaitingTechProf) {
+ logger.Debugw(ctx, "UniVlanConfigFsm was waiting for TechProf config with add-request, just aborting the outstanding add",
+ log.Fields{"device-id": oFsm.deviceID})
+ cancelPendingConfig = true
+ } else {
+ //create a new element for the removeVlanFlow slice
+ loRemoveParams = uniRemoveVlanFlowParams{
+ vlanRuleParams: aUniFlowParams.VlanRuleParams,
+ cookie: aCookie,
+ }
+ loRemoveParams.removeChannel = make(chan bool)
+ oFsm.uniRemoveFlowsSlice = append(oFsm.uniRemoveFlowsSlice, loRemoveParams)
+ }
+
+ usedTpID := aUniFlowParams.VlanRuleParams.TpID
+ if len(oFsm.uniVlanFlowParamsSlice) <= 1 {
+ //at this point it is evident that no flow anymore will refer to a still possibly active Techprofile
+ //request that this profile gets deleted before a new flow add is allowed (except for some aborted add)
+ if !cancelPendingConfig {
+ logger.Debugw(ctx, "UniVlanConfigFsm flow removal requested - set TechProfile to-delete", log.Fields{
+ "device-id": oFsm.deviceID})
+ if oFsm.pUniTechProf != nil {
+ oFsm.pUniTechProf.setProfileToDelete(oFsm.pOnuUniPort.uniID, usedTpID, true)
+ }
+ }
+ } else {
+ if !cancelPendingConfig {
+ oFsm.updateTechProfileToDelete(ctx, usedTpID)
+ }
+ }
+ //trigger the FSM to remove the relevant rule
+ if cancelPendingConfig {
+ //as the uniFlow parameters are already stored (for add) but no explicit removal is done anymore
+ // the paramSlice has to be updated with rule-removal, which also then updates numUniFlows
+ //call from 'non-configured' state of the rules
+ if err := oFsm.removeFlowFromParamsSlice(ctx, aCookie, false); err != nil {
+ //something quite inconsistent detected, perhaps just try to recover with FSM reset
+ oFsm.mutexFlowParams.Unlock()
+ if fsmErr := pConfigVlanStateBaseFsm.Event(vlanEvReset); fsmErr != nil {
+ logger.Errorw(ctx, "error in FsmEvent handling UniVlanConfigFsm!",
+ log.Fields{"fsmState": pConfigVlanStateBaseFsm.Current(), "error": fsmErr, "device-id": oFsm.deviceID})
+ }
+ return false //data base update could not be done, return like cookie not found
+ }
+
+ oFsm.requestEventOffset = uint8(cDeviceEventOffsetRemoveWithKvStore) //offset for last flow-remove activity (with kvStore request)
+ //attention: take care to release and re-take the mutexFlowParams when calling the FSM directly -
+ // synchronous FSM 'event/state' functions may rely on this mutex
+ oFsm.mutexFlowParams.Unlock()
+ if fsmErr := pConfigVlanStateBaseFsm.Event(vlanEvCancelOutstandingConfig); fsmErr != nil {
+ logger.Errorw(ctx, "error in FsmEvent handling UniVlanConfigFsm!",
+ log.Fields{"fsmState": pConfigVlanStateBaseFsm.Current(), "error": fsmErr, "device-id": oFsm.deviceID})
+ }
+ oFsm.mutexFlowParams.Lock()
+ return true
+ }
+ if pConfigVlanStateBaseFsm.Is(vlanStConfigDone) {
+ logger.Debugw(ctx, "UniVlanConfigFsm rule removal request", log.Fields{
+ "device-id": oFsm.deviceID, "uni-id": oFsm.pOnuUniPort.uniID,
+ "tp-id": loRemoveParams.vlanRuleParams.TpID,
+ "set-Vlan": loRemoveParams.vlanRuleParams.SetVid})
+ //have to re-trigger the FSM to proceed with outstanding incremental flow configuration
+ //attention: take care to release and re-take the mutexFlowParams when calling the FSM directly -
+ // synchronous FSM 'event/state' functions may rely on this mutex
+ oFsm.mutexFlowParams.Unlock()
+ if fsmErr := pConfigVlanStateBaseFsm.Event(vlanEvRemFlowConfig); fsmErr != nil {
+ logger.Errorw(ctx, "error in FsmEvent handling UniVlanConfigFsm!",
+ log.Fields{"fsmState": pConfigVlanStateBaseFsm.Current(), "error": fsmErr, "device-id": oFsm.deviceID})
+ }
+ oFsm.mutexFlowParams.Lock()
+ } // if not in the appropriate state a new entry will be automatically considered later
+ // when the configDone state is reached
+ return true
+}
+
//removeFlowFromParamsSlice removes a flow from stored uniVlanFlowParamsSlice based on the cookie
// it assumes that adding cookies for this flow (including the actual one to delete) was prevented
// from the start of the deletion request to avoid to much interference
// so when called, there can only be one cookie active for this flow
// requires mutexFlowParams to be locked at call
-func (oFsm *UniVlanConfigFsm) removeFlowFromParamsSlice(ctx context.Context, aCookie uint64, aWasConfigured bool) {
+func (oFsm *UniVlanConfigFsm) removeFlowFromParamsSlice(ctx context.Context, aCookie uint64, aWasConfigured bool) error {
logger.Debugw(ctx, "UniVlanConfigFsm flow removal from ParamsSlice", log.Fields{
"device-id": oFsm.deviceID, "cookie": aCookie})
+ cookieFound := false
removeFromSlice_loop:
for flow, storedUniFlowParams := range oFsm.uniVlanFlowParamsSlice {
- // if UniFlowParams exists, cookieSlice is assumed to have always at least one element
- if storedUniFlowParams.CookieSlice[0] == aCookie {
- if len(storedUniFlowParams.CookieSlice) != 1 {
- errStr := "UniVlanConfigFsm flow removal from ParamsSlice abort - unexpected cookie slice length"
- logger.Errorw(ctx, errStr, log.Fields{
- "cookieSliceLen": len(oFsm.uniVlanFlowParamsSlice), "device-id": oFsm.deviceID})
- return
+ // if UniFlowParams exists, cookieSlice should always have at least one element
+ cookieSliceLen := len(storedUniFlowParams.CookieSlice)
+ if cookieSliceLen == 1 {
+ if storedUniFlowParams.CookieSlice[0] == aCookie {
+ cookieFound = true
}
+ } else if cookieSliceLen == 0 {
+ errStr := "UniVlanConfigFsm unexpected cookie slice length 0 - removal in uniVlanFlowParamsSlice aborted"
+ logger.Errorw(ctx, errStr, log.Fields{"device-id": oFsm.deviceID})
+ return errors.New(errStr)
+ } else {
+ errStr := "UniVlanConfigFsm flow removal unexpected cookie slice length, but rule removal continued"
+ logger.Errorw(ctx, errStr, log.Fields{
+ "cookieSliceLen": len(oFsm.uniVlanFlowParamsSlice), "device-id": oFsm.deviceID})
+ for _, cookie := range storedUniFlowParams.CookieSlice {
+ if cookie == aCookie {
+ cookieFound = true
+ break
+ }
+ }
+ }
+ if cookieFound {
logger.Debugw(ctx, "UniVlanConfigFsm flow removal from ParamsSlice - cookie found", log.Fields{
"device-id": oFsm.deviceID, "cookie": aCookie})
//remove the actual element from the addVlanFlow slice
@@ -941,6 +981,12 @@
break removeFromSlice_loop //found the cookie - no further search for this requested cookie
}
} //search all flows
+ if !cookieFound {
+ errStr := "UniVlanConfigFsm cookie for removal not found, internal counter not updated"
+ logger.Errorw(ctx, errStr, log.Fields{"device-id": oFsm.deviceID})
+ return errors.New(errStr)
+ }
+ return nil
}
// requires mutexFlowParams to be locked at call
@@ -1565,9 +1611,26 @@
oFsm.mutexFlowParams.Lock()
deletedCookie := oFsm.uniRemoveFlowsSlice[0].cookie
+ pConfigVlanStateAFsm := oFsm.pAdaptFsm
+ if pConfigVlanStateAFsm == nil {
+ logger.Errorw(ctx, "invalid Fsm pointer - unresolvable - abort",
+ log.Fields{"device-id": oFsm.deviceID})
+ //would have to be fixed from outside somehow
+ return
+ }
+
// here we need o finally remove the removed data also from uniVlanFlowParamsSlice and possibly have to
// stop the suspension of a add-activity waiting for the end of removal
- oFsm.removeFlowFromParamsSlice(ctx, deletedCookie, true) //call from 'configured' state of the rule
+ //call from 'configured' state of the rule
+ if err := oFsm.removeFlowFromParamsSlice(ctx, deletedCookie, true); err != nil {
+ //something quite inconsistent detected, perhaps just try to recover with FSM reset
+ oFsm.mutexFlowParams.Unlock()
+ logger.Errorw(ctx, "UniVlanConfigFsm - could not clear database - abort", log.Fields{"device-id": oFsm.deviceID})
+ go func(a_pAFsm *AdapterFsm) {
+ _ = a_pAFsm.pFsm.Event(vlanEvReset)
+ }(pConfigVlanStateAFsm)
+ return
+ }
if oFsm.uniRemoveFlowsSlice[0].isSuspendedOnAdd {
removeChannel := oFsm.uniRemoveFlowsSlice[0].removeChannel
oFsm.mutexFlowParams.Unlock()
@@ -1595,15 +1658,10 @@
oFsm.requestEventOffset = uint8(cDeviceEventOffsetRemoveWithKvStore) //offset for last flow-remove activity (with kvStore request)
//return to the basic config verification state
- pConfigVlanStateAFsm := oFsm.pAdaptFsm
- if pConfigVlanStateAFsm != nil {
- // Can't call FSM Event directly, decoupling it
- go func(a_pAFsm *AdapterFsm) {
- if a_pAFsm != nil && a_pAFsm.pFsm != nil {
- _ = a_pAFsm.pFsm.Event(vlanEvFlowDataRemoved)
- }
- }(pConfigVlanStateAFsm)
- }
+ // Can't call FSM Event directly, decoupling it
+ go func(a_pAFsm *AdapterFsm) {
+ _ = a_pAFsm.pFsm.Event(vlanEvFlowDataRemoved)
+ }(pConfigVlanStateAFsm)
oFsm.mutexFlowParams.Lock()
noOfFlowRem := len(oFsm.uniRemoveFlowsSlice)