fixing issues with restarting alarm manager during onu reboot or delete device
Change-Id: I68bdf8318b4502e1fe324acf686c98f3eb356f91
diff --git a/internal/pkg/onuadaptercore/alarm_manager.go b/internal/pkg/onuadaptercore/alarm_manager.go
index cb0c223..5b070ab 100644
--- a/internal/pkg/onuadaptercore/alarm_manager.go
+++ b/internal/pkg/onuadaptercore/alarm_manager.go
@@ -334,26 +334,35 @@
func (am *onuAlarmManager) processAlarmSyncMessages(ctx context.Context) {
logger.Debugw(ctx, "start-routine-to-process-omci-messages-for-alarm-sync", log.Fields{"device-id": am.pDeviceHandler.deviceID})
- am.flushAlarmSyncChannels(ctx)
-loop:
for {
- message, ok := <-am.eventChannel
- if !ok {
- logger.Info(ctx, "alarm-sync-omci-message-could-not-be-read-from-channel", log.Fields{"device-id": am.pDeviceHandler.deviceID})
- break loop
- }
- logger.Debugw(ctx, "alarm-sync-omci-message-received", log.Fields{"device-id": am.pDeviceHandler.deviceID})
+ select {
+ case message, ok := <-am.eventChannel:
+ if !ok {
+ logger.Info(ctx, "alarm-sync-omci-message-could-not-be-read-from-channel", log.Fields{"device-id": am.pDeviceHandler.deviceID})
+ continue
+ }
+ logger.Debugw(ctx, "alarm-sync-omci-message-received", log.Fields{"device-id": am.pDeviceHandler.deviceID})
- switch message.Type {
- case OMCI:
- msg, _ := message.Data.(OmciMessage)
- am.handleOmciMessage(ctx, msg)
- default:
- logger.Warn(ctx, "alarm-sync-unknown-message-type-received", log.Fields{"device-id": am.pDeviceHandler.deviceID, "message.Type": message.Type})
+ switch message.Type {
+ case OMCI:
+ msg, _ := message.Data.(OmciMessage)
+ am.handleOmciMessage(ctx, msg)
+ default:
+ logger.Warn(ctx, "alarm-sync-unknown-message-type-received", log.Fields{"device-id": am.pDeviceHandler.deviceID, "message.Type": message.Type})
+ }
+ case <-am.stopProcessingOmciMessages:
+ logger.Infow(ctx, "alarm-manager-stop-omci-alarm-message-processing-routines", log.Fields{"device-id": am.pDeviceHandler.deviceID})
+ am.onuAlarmManagerLock.Lock()
+ am.processMessage = false
+ am.activeAlarms = nil
+ am.alarmBitMapDB = nil
+ am.alarmUploadNoOfCmds = 0
+ am.alarmUploadSeqNo = 0
+ am.onuAlarmManagerLock.Unlock()
+ return
+
}
}
- logger.Info(ctx, "alarm-sync-stopped-handling-of-alarm-sync-omci-message", log.Fields{"device-id": am.pDeviceHandler.deviceID})
- _ = am.alarmSyncFsm.pFsm.Event(asEvStop)
}
func (am *onuAlarmManager) handleOmciMessage(ctx context.Context, msg OmciMessage) {
@@ -492,6 +501,7 @@
}
func (am *onuAlarmManager) startOMCIAlarmMessageProcessing(ctx context.Context) {
+ logger.Infow(ctx, "alarm-manager-start-omci-alarm-message-processing-routines", log.Fields{"device-id": am.pDeviceHandler.deviceID})
am.onuAlarmManagerLock.Lock()
am.processMessage = true
if am.activeAlarms == nil {
@@ -499,6 +509,8 @@
}
am.alarmBitMapDB = make(map[meAlarmKey][alarmBitMapSizeBytes]byte)
am.onuAlarmManagerLock.Unlock()
+ am.flushAlarmSyncChannels(ctx) // Need to do this first as there might be stale data on the channels and the start state waits on same channels
+
if am.alarmSyncFsm.pFsm.Is(asStDisabled) {
if err := am.alarmSyncFsm.pFsm.Event(asEvStart); err != nil {
logger.Errorw(ctx, "alarm-sync-fsm-can-not-go-to-state-starting", log.Fields{"device-id": am.pDeviceHandler.deviceID, "err": err})
@@ -510,17 +522,6 @@
return
}
logger.Debugw(ctx, "alarm-sync-fsm-started", log.Fields{"state": string(am.alarmSyncFsm.pFsm.Current())})
-
- if stop := <-am.stopProcessingOmciMessages; stop {
- am.onuAlarmManagerLock.Lock()
- am.processMessage = false
- am.activeAlarms = nil
- am.alarmBitMapDB = nil
- am.alarmUploadNoOfCmds = 0
- am.alarmUploadSeqNo = 0
- am.onuAlarmManagerLock.Unlock()
-
- }
}
func (am *onuAlarmManager) handleOmciAlarmNotificationMessage(ctx context.Context, msg OmciMessage) {
diff --git a/internal/pkg/onuadaptercore/device_handler.go b/internal/pkg/onuadaptercore/device_handler.go
index f609a43..25319f5 100644
--- a/internal/pkg/onuadaptercore/device_handler.go
+++ b/internal/pkg/onuadaptercore/device_handler.go
@@ -1566,7 +1566,7 @@
// Start PM collector routine
go dh.startCollector(ctx)
}
- if !dh.getAlarmManagerIsRunning() {
+ if !dh.getAlarmManagerIsRunning(ctx) {
go dh.startAlarmManager(ctx)
}
@@ -1695,7 +1695,7 @@
// Stop collector routine
dh.stopCollector <- true
}
- if dh.getAlarmManagerIsRunning() {
+ if dh.getAlarmManagerIsRunning(ctx) {
dh.stopAlarmManager <- true
}
@@ -3125,7 +3125,7 @@
// Start PM collector routine
go dh.startCollector(ctx)
}
- if !dh.getAlarmManagerIsRunning() {
+ if !dh.getAlarmManagerIsRunning(ctx) {
go dh.startAlarmManager(ctx)
}
dh.uniEntityMap = make(map[uint32]*onuUniPort)
@@ -3151,9 +3151,10 @@
dh.mutextAlarmManagerFlag.Unlock()
}
-func (dh *deviceHandler) getAlarmManagerIsRunning() bool {
+func (dh *deviceHandler) getAlarmManagerIsRunning(ctx context.Context) bool {
dh.mutextAlarmManagerFlag.RLock()
flagValue := dh.alarmManagerIsRunning
+ logger.Debugw(ctx, "alarm-manager-is-running", log.Fields{"flag": dh.alarmManagerIsRunning})
dh.mutextAlarmManagerFlag.RUnlock()
return flagValue
}
@@ -3168,11 +3169,13 @@
logger.Debugw(ctx, "stopping-collector-for-onu", log.Fields{"device-id": dh.device.Id})
dh.setAlarmManagerIsRunning(false)
go func() {
- _ = dh.pAlarmMgr.alarmSyncFsm.pFsm.Event(asEvStop)
+ if dh.pAlarmMgr.alarmSyncFsm != nil && dh.pAlarmMgr.alarmSyncFsm.pFsm != nil {
+ _ = dh.pAlarmMgr.alarmSyncFsm.pFsm.Event(asEvStop)
+ }
}()
- dh.pAlarmMgr.stopAlarmAuditTimer <- struct{}{}
dh.pAlarmMgr.stopProcessingOmciMessages <- true // Stop the OMCI routines if any(This will stop the fsms also)
-
+ dh.pAlarmMgr.stopAlarmAuditTimer <- struct{}{}
+ logger.Debugw(ctx, "sent-all-stop-signals-to-alarm-manager", log.Fields{"device-id": dh.device.Id})
}
}
diff --git a/internal/pkg/onuadaptercore/openonu.go b/internal/pkg/onuadaptercore/openonu.go
index 71a9034..b116686 100644
--- a/internal/pkg/onuadaptercore/openonu.go
+++ b/internal/pkg/onuadaptercore/openonu.go
@@ -392,7 +392,8 @@
if err := handler.deleteDevicePersistencyData(ctx); err != nil {
errorsList = append(errorsList, err)
}
- handler.stopCollector <- true // stop the metric collector routine
+ handler.stopCollector <- true // stop the metric collector routine
+ handler.stopAlarmManager <- true //stop the alarm manager.
if handler.pOnuMetricsMgr != nil {
if err := handler.pOnuMetricsMgr.clearAllPmData(ctx); err != nil {
errorsList = append(errorsList, err)