[VOL-4479] openonu panic in periodic-voltha-multi-uni-multiple-olts-test-bbsim-2.8 +
Onu adapter reconcilement may stuck on VLAN processing, especially in TT traffic scenarios
Signed-off-by: mpagenko <michael.pagenkopf@adtran.com>
Change-Id: I806f97c85e09bc2c741e40569a67ab3ce21300ab
diff --git a/VERSION b/VERSION
index 92ee6ac..25b22e0 100755
--- a/VERSION
+++ b/VERSION
@@ -1 +1 @@
-1.3.15
\ No newline at end of file
+1.3.16
diff --git a/internal/pkg/onuadaptercore/device_handler.go b/internal/pkg/onuadaptercore/device_handler.go
index c737e74..77b1dbd 100644
--- a/internal/pkg/onuadaptercore/device_handler.go
+++ b/internal/pkg/onuadaptercore/device_handler.go
@@ -3753,6 +3753,7 @@
logger.Errorw(ctx, "No valid OnuDevice - aborting Core DeviceStateUpdate",
log.Fields{"device-id": dh.deviceID})
} else {
+ onuDevEntry.mutexPersOnuConfig.RLock()
if onuDevEntry.sOnuPersistentData.PersOperState == "up" {
connectStatus = voltha.ConnectStatus_REACHABLE
if !onuDevEntry.sOnuPersistentData.PersUniDisableDone {
@@ -3767,7 +3768,7 @@
onuDevEntry.sOnuPersistentData.PersOperState == "" {
operState = voltha.OperStatus_DISCOVERED
}
-
+ onuDevEntry.mutexPersOnuConfig.RUnlock()
logger.Debugw(ctx, "Core DeviceStateUpdate", log.Fields{"connectStatus": connectStatus, "operState": operState})
}
logger.Debugw(ctx, "reconciling has been finished in time",
@@ -3783,8 +3784,12 @@
if onuDevEntry := dh.getOnuDeviceEntry(ctx, true); onuDevEntry == nil {
logger.Errorw(ctx, "No valid OnuDevice",
log.Fields{"device-id": dh.deviceID})
- } else if onuDevEntry.sOnuPersistentData.PersOperState == "up" {
- connectStatus = voltha.ConnectStatus_REACHABLE
+ } else {
+ onuDevEntry.mutexPersOnuConfig.RLock()
+ if onuDevEntry.sOnuPersistentData.PersOperState == "up" {
+ connectStatus = voltha.ConnectStatus_REACHABLE
+ }
+ onuDevEntry.mutexPersOnuConfig.RUnlock()
}
dh.deviceReconcileFailedUpdate(ctx, drReconcileCanceled, connectStatus)
@@ -3796,12 +3801,14 @@
if onuDevEntry := dh.getOnuDeviceEntry(ctx, true); onuDevEntry == nil {
logger.Errorw(ctx, "No valid OnuDevice",
log.Fields{"device-id": dh.deviceID})
- } else if onuDevEntry.sOnuPersistentData.PersOperState == "up" {
- connectStatus = voltha.ConnectStatus_REACHABLE
+ } else {
+ onuDevEntry.mutexPersOnuConfig.RLock()
+ if onuDevEntry.sOnuPersistentData.PersOperState == "up" {
+ connectStatus = voltha.ConnectStatus_REACHABLE
+ }
+ onuDevEntry.mutexPersOnuConfig.RUnlock()
}
-
dh.deviceReconcileFailedUpdate(ctx, drReconcileMaxTimeout, connectStatus)
-
}
dh.mutexReconcilingFlag.Lock()
dh.reconciling = cNoReconciling
diff --git a/internal/pkg/onuadaptercore/omci_vlan_config.go b/internal/pkg/onuadaptercore/omci_vlan_config.go
index 01a98df..daa4473 100644
--- a/internal/pkg/onuadaptercore/omci_vlan_config.go
+++ b/internal/pkg/onuadaptercore/omci_vlan_config.go
@@ -1313,9 +1313,14 @@
if oFsm.pDeviceHandler.isSkipOnuConfigReconciling() {
oFsm.configuredUniFlow = oFsm.numUniFlows
if oFsm.lastFlowToReconcile {
- logger.Debugw(ctx, "reconciling - flow processing finished", log.Fields{"device-id": oFsm.deviceID})
+ logger.Debugw(ctx, "reconciling - flow processing finished", log.Fields{
+ "device-id": oFsm.deviceID, "uni-id": oFsm.pOnuUniPort.uniID})
oFsm.pDeviceHandler.setReconcilingFlows(false)
- oFsm.pDeviceHandler.chReconcilingFlowsFinished <- true
+ //use asynchronous channel sending to avoid stucking on non-waiting receiver
+ select {
+ case oFsm.pDeviceHandler.chReconcilingFlowsFinished <- true:
+ default:
+ }
}
logger.Debugw(ctx, "reconciling - skip enterVlanConfigDone processing",
log.Fields{"numUniFlows": oFsm.numUniFlows, "configuredUniFlow": oFsm.configuredUniFlow, "device-id": oFsm.deviceID})
diff --git a/internal/pkg/onuadaptercore/onu_device_entry.go b/internal/pkg/onuadaptercore/onu_device_entry.go
index cd2799f..64929d5 100644
--- a/internal/pkg/onuadaptercore/onu_device_entry.go
+++ b/internal/pkg/onuadaptercore/onu_device_entry.go
@@ -307,8 +307,6 @@
// within the FSM event procedures
omciMessageReceived chan bool //seperate channel needed by DownloadFsm
omciRebootMessageReceivedChannel chan Message // channel needed by Reboot request
-
- mutexTcontMap sync.RWMutex
}
//newOnuDeviceEntry returns a new instance of a OnuDeviceEntry
@@ -947,8 +945,8 @@
logger.Debugw(ctx, "allocate-free-tcont", log.Fields{"device-id": oo.deviceID, "allocID": allocID,
"allocated-instances": oo.sOnuPersistentData.PersTcontMap})
- oo.mutexTcontMap.Lock()
- defer oo.mutexTcontMap.Unlock()
+ oo.mutexPersOnuConfig.Lock()
+ defer oo.mutexPersOnuConfig.Unlock()
if entityID, ok := oo.sOnuPersistentData.PersTcontMap[allocID]; ok {
//tcont already allocated before, return the used instance-id
return entityID, true, nil
@@ -972,12 +970,11 @@
}
}
return 0, false, fmt.Errorf(fmt.Sprintf("no-free-tcont-left-for-device-%s", oo.deviceID))
-
}
func (oo *OnuDeviceEntry) freeTcont(ctx context.Context, allocID uint16) {
logger.Debugw(ctx, "free-tcont", log.Fields{"device-id": oo.deviceID, "alloc": allocID})
- oo.mutexTcontMap.Lock()
- defer oo.mutexTcontMap.Unlock()
+ oo.mutexPersOnuConfig.Lock()
+ defer oo.mutexPersOnuConfig.Unlock()
delete(oo.sOnuPersistentData.PersTcontMap, allocID)
}
diff --git a/internal/pkg/onuadaptercore/onu_metrics_manager.go b/internal/pkg/onuadaptercore/onu_metrics_manager.go
index c33fb99..ab3000c 100644
--- a/internal/pkg/onuadaptercore/onu_metrics_manager.go
+++ b/internal/pkg/onuadaptercore/onu_metrics_manager.go
@@ -2959,9 +2959,11 @@
func (mm *onuMetricsManager) getEthernetFrameExtendedMETypeFromKvStore(ctx context.Context) (bool, error) {
// Check if the data is already available in KV store, if yes, do not send the request for get me.
var data me.ClassID
+ mm.pDeviceHandler.pOnuOmciDevice.mutexPersOnuConfig.RLock()
key := fmt.Sprintf("%s/%s/%s", mm.pDeviceHandler.pOnuOmciDevice.sOnuPersistentData.PersVendorID,
mm.pDeviceHandler.pOnuOmciDevice.sOnuPersistentData.PersEquipmentID,
mm.pDeviceHandler.pOnuOmciDevice.sOnuPersistentData.PersActiveSwVersion)
+ mm.pDeviceHandler.pOnuOmciDevice.mutexPersOnuConfig.RUnlock()
Value, err := mm.extPmKvStore.Get(ctx, key)
if err == nil {
if Value != nil {
@@ -3059,9 +3061,11 @@
}
func (mm *onuMetricsManager) putExtPmMeKvStore(ctx context.Context) {
+ mm.pDeviceHandler.pOnuOmciDevice.mutexPersOnuConfig.RLock()
key := fmt.Sprintf("%s/%s/%s", mm.pDeviceHandler.pOnuOmciDevice.sOnuPersistentData.PersVendorID,
mm.pDeviceHandler.pOnuOmciDevice.sOnuPersistentData.PersEquipmentID,
mm.pDeviceHandler.pOnuOmciDevice.sOnuPersistentData.PersActiveSwVersion)
+ mm.pDeviceHandler.pOnuOmciDevice.mutexPersOnuConfig.RUnlock()
// check if we get the supported type me for ethernet frame extended pm class id
if mm.supportedEthernetFrameExtendedPMClass == 0 {
logger.Error(ctx, "unable-to-get-any-supported-extended-pm-me-class")