fixing issues with restarting alarm manager during onu reboot or delete device

Change-Id: I68bdf8318b4502e1fe324acf686c98f3eb356f91
diff --git a/internal/pkg/onuadaptercore/alarm_manager.go b/internal/pkg/onuadaptercore/alarm_manager.go
index cb0c223..5b070ab 100644
--- a/internal/pkg/onuadaptercore/alarm_manager.go
+++ b/internal/pkg/onuadaptercore/alarm_manager.go
@@ -334,26 +334,35 @@
 
 func (am *onuAlarmManager) processAlarmSyncMessages(ctx context.Context) {
 	logger.Debugw(ctx, "start-routine-to-process-omci-messages-for-alarm-sync", log.Fields{"device-id": am.pDeviceHandler.deviceID})
-	am.flushAlarmSyncChannels(ctx)
-loop:
 	for {
-		message, ok := <-am.eventChannel
-		if !ok {
-			logger.Info(ctx, "alarm-sync-omci-message-could-not-be-read-from-channel", log.Fields{"device-id": am.pDeviceHandler.deviceID})
-			break loop
-		}
-		logger.Debugw(ctx, "alarm-sync-omci-message-received", log.Fields{"device-id": am.pDeviceHandler.deviceID})
+		select {
+		case message, ok := <-am.eventChannel:
+			if !ok {
+				logger.Info(ctx, "alarm-sync-omci-message-could-not-be-read-from-channel", log.Fields{"device-id": am.pDeviceHandler.deviceID})
+				continue
+			}
+			logger.Debugw(ctx, "alarm-sync-omci-message-received", log.Fields{"device-id": am.pDeviceHandler.deviceID})
 
-		switch message.Type {
-		case OMCI:
-			msg, _ := message.Data.(OmciMessage)
-			am.handleOmciMessage(ctx, msg)
-		default:
-			logger.Warn(ctx, "alarm-sync-unknown-message-type-received", log.Fields{"device-id": am.pDeviceHandler.deviceID, "message.Type": message.Type})
+			switch message.Type {
+			case OMCI:
+				msg, _ := message.Data.(OmciMessage)
+				am.handleOmciMessage(ctx, msg)
+			default:
+				logger.Warn(ctx, "alarm-sync-unknown-message-type-received", log.Fields{"device-id": am.pDeviceHandler.deviceID, "message.Type": message.Type})
+			}
+		case <-am.stopProcessingOmciMessages:
+			logger.Infow(ctx, "alarm-manager-stop-omci-alarm-message-processing-routines", log.Fields{"device-id": am.pDeviceHandler.deviceID})
+			am.onuAlarmManagerLock.Lock()
+			am.processMessage = false
+			am.activeAlarms = nil
+			am.alarmBitMapDB = nil
+			am.alarmUploadNoOfCmds = 0
+			am.alarmUploadSeqNo = 0
+			am.onuAlarmManagerLock.Unlock()
+			return
+
 		}
 	}
-	logger.Info(ctx, "alarm-sync-stopped-handling-of-alarm-sync-omci-message", log.Fields{"device-id": am.pDeviceHandler.deviceID})
-	_ = am.alarmSyncFsm.pFsm.Event(asEvStop)
 }
 
 func (am *onuAlarmManager) handleOmciMessage(ctx context.Context, msg OmciMessage) {
@@ -492,6 +501,7 @@
 }
 
 func (am *onuAlarmManager) startOMCIAlarmMessageProcessing(ctx context.Context) {
+	logger.Infow(ctx, "alarm-manager-start-omci-alarm-message-processing-routines", log.Fields{"device-id": am.pDeviceHandler.deviceID})
 	am.onuAlarmManagerLock.Lock()
 	am.processMessage = true
 	if am.activeAlarms == nil {
@@ -499,6 +509,8 @@
 	}
 	am.alarmBitMapDB = make(map[meAlarmKey][alarmBitMapSizeBytes]byte)
 	am.onuAlarmManagerLock.Unlock()
+	am.flushAlarmSyncChannels(ctx) // Need to do this first as there might be stale data on the channels and the start state waits on same channels
+
 	if am.alarmSyncFsm.pFsm.Is(asStDisabled) {
 		if err := am.alarmSyncFsm.pFsm.Event(asEvStart); err != nil {
 			logger.Errorw(ctx, "alarm-sync-fsm-can-not-go-to-state-starting", log.Fields{"device-id": am.pDeviceHandler.deviceID, "err": err})
@@ -510,17 +522,6 @@
 		return
 	}
 	logger.Debugw(ctx, "alarm-sync-fsm-started", log.Fields{"state": string(am.alarmSyncFsm.pFsm.Current())})
-
-	if stop := <-am.stopProcessingOmciMessages; stop {
-		am.onuAlarmManagerLock.Lock()
-		am.processMessage = false
-		am.activeAlarms = nil
-		am.alarmBitMapDB = nil
-		am.alarmUploadNoOfCmds = 0
-		am.alarmUploadSeqNo = 0
-		am.onuAlarmManagerLock.Unlock()
-
-	}
 }
 
 func (am *onuAlarmManager) handleOmciAlarmNotificationMessage(ctx context.Context, msg OmciMessage) {
diff --git a/internal/pkg/onuadaptercore/device_handler.go b/internal/pkg/onuadaptercore/device_handler.go
index f609a43..25319f5 100644
--- a/internal/pkg/onuadaptercore/device_handler.go
+++ b/internal/pkg/onuadaptercore/device_handler.go
@@ -1566,7 +1566,7 @@
 		// Start PM collector routine
 		go dh.startCollector(ctx)
 	}
-	if !dh.getAlarmManagerIsRunning() {
+	if !dh.getAlarmManagerIsRunning(ctx) {
 		go dh.startAlarmManager(ctx)
 	}
 
@@ -1695,7 +1695,7 @@
 		// Stop collector routine
 		dh.stopCollector <- true
 	}
-	if dh.getAlarmManagerIsRunning() {
+	if dh.getAlarmManagerIsRunning(ctx) {
 		dh.stopAlarmManager <- true
 	}
 
@@ -3125,7 +3125,7 @@
 		// Start PM collector routine
 		go dh.startCollector(ctx)
 	}
-	if !dh.getAlarmManagerIsRunning() {
+	if !dh.getAlarmManagerIsRunning(ctx) {
 		go dh.startAlarmManager(ctx)
 	}
 	dh.uniEntityMap = make(map[uint32]*onuUniPort)
@@ -3151,9 +3151,10 @@
 	dh.mutextAlarmManagerFlag.Unlock()
 }
 
-func (dh *deviceHandler) getAlarmManagerIsRunning() bool {
+func (dh *deviceHandler) getAlarmManagerIsRunning(ctx context.Context) bool {
 	dh.mutextAlarmManagerFlag.RLock()
 	flagValue := dh.alarmManagerIsRunning
+	logger.Debugw(ctx, "alarm-manager-is-running", log.Fields{"flag": dh.alarmManagerIsRunning})
 	dh.mutextAlarmManagerFlag.RUnlock()
 	return flagValue
 }
@@ -3168,11 +3169,13 @@
 		logger.Debugw(ctx, "stopping-collector-for-onu", log.Fields{"device-id": dh.device.Id})
 		dh.setAlarmManagerIsRunning(false)
 		go func() {
-			_ = dh.pAlarmMgr.alarmSyncFsm.pFsm.Event(asEvStop)
+			if dh.pAlarmMgr.alarmSyncFsm != nil && dh.pAlarmMgr.alarmSyncFsm.pFsm != nil {
+				_ = dh.pAlarmMgr.alarmSyncFsm.pFsm.Event(asEvStop)
+			}
 		}()
-		dh.pAlarmMgr.stopAlarmAuditTimer <- struct{}{}
 		dh.pAlarmMgr.stopProcessingOmciMessages <- true // Stop the OMCI routines if any(This will stop the fsms also)
-
+		dh.pAlarmMgr.stopAlarmAuditTimer <- struct{}{}
+		logger.Debugw(ctx, "sent-all-stop-signals-to-alarm-manager", log.Fields{"device-id": dh.device.Id})
 	}
 }
 
diff --git a/internal/pkg/onuadaptercore/openonu.go b/internal/pkg/onuadaptercore/openonu.go
index 71a9034..b116686 100644
--- a/internal/pkg/onuadaptercore/openonu.go
+++ b/internal/pkg/onuadaptercore/openonu.go
@@ -392,7 +392,8 @@
 		if err := handler.deleteDevicePersistencyData(ctx); err != nil {
 			errorsList = append(errorsList, err)
 		}
-		handler.stopCollector <- true // stop the metric collector routine
+		handler.stopCollector <- true    // stop the metric collector routine
+		handler.stopAlarmManager <- true //stop the alarm manager.
 		if handler.pOnuMetricsMgr != nil {
 			if err := handler.pOnuMetricsMgr.clearAllPmData(ctx); err != nil {
 				errorsList = append(errorsList, err)