[VOL-4789] openoltAdapter: ONUs are undiscoverable after multiple OLT reboots

Change-Id: I6979b5fa149e37085f47a28116433a7436dafc0e
diff --git a/VERSION b/VERSION
index d6f85ab..4739c61 100644
--- a/VERSION
+++ b/VERSION
@@ -1 +1 @@
-4.2.6
+4.2.7
diff --git a/internal/pkg/core/device_handler.go b/internal/pkg/core/device_handler.go
index 76f05da..773036b 100644
--- a/internal/pkg/core/device_handler.go
+++ b/internal/pkg/core/device_handler.go
@@ -109,7 +109,9 @@
 	portStats                     *OpenOltStatisticsMgr
 	metrics                       *pmmetrics.PmMetrics
 	stopCollector                 chan bool
+	isCollectorActive             bool
 	stopHeartbeatCheck            chan bool
+	isHeartbeatCheckActive        bool
 	activePorts                   sync.Map
 	stopIndications               chan bool
 	isReadIndicationRoutineActive bool
@@ -203,8 +205,8 @@
 	dh.openOLT = adapter
 	dh.exitChannel = make(chan struct{})
 	dh.lockDevice = sync.RWMutex{}
-	dh.stopCollector = make(chan bool, 2)      // TODO: Why buffered?
-	dh.stopHeartbeatCheck = make(chan bool, 2) // TODO: Why buffered?
+	dh.stopCollector = make(chan bool, 1)      // TODO: Why buffered?
+	dh.stopHeartbeatCheck = make(chan bool, 1) // TODO: Why buffered?
 	dh.metrics = pmmetrics.NewPmMetrics(cloned.Id, pmmetrics.Frequency(150), pmmetrics.FrequencyOverride(false), pmmetrics.Grouped(false), pmmetrics.Metrics(pmNames))
 	dh.activePorts = sync.Map{}
 	dh.stopIndications = make(chan bool, 1) // TODO: Why buffered?
@@ -1057,6 +1059,17 @@
 
 func startCollector(ctx context.Context, dh *DeviceHandler) {
 	logger.Debugw(ctx, "starting-collector", log.Fields{"device-id": dh.device.Id})
+
+	defer func() {
+		dh.lockDevice.Lock()
+		dh.isCollectorActive = false
+		dh.lockDevice.Unlock()
+	}()
+
+	dh.lockDevice.Lock()
+	dh.isCollectorActive = true
+	dh.lockDevice.Unlock()
+
 	for {
 		select {
 		case <-dh.stopCollector:
@@ -2015,8 +2028,12 @@
 	dh.discOnus = sync.Map{}
 	dh.onus = sync.Map{}
 
+	dh.lockDevice.RLock()
 	//stopping the stats collector
-	dh.stopCollector <- true
+	if dh.isCollectorActive {
+		dh.stopCollector <- true
+	}
+	dh.lockDevice.RUnlock()
 
 	go dh.notifyChildDevices(ctx, "unreachable")
 	cloned := proto.Clone(device).(*voltha.Device)
@@ -2167,11 +2184,16 @@
 
 	dh.cleanupDeviceResources(ctx)
 	logger.Debugw(ctx, "removed-device-from-Resource-manager-KV-store", log.Fields{"device-id": dh.device.Id})
-	// Stop the Stats collector
-	dh.stopCollector <- true
-	// stop the heartbeat check routine
-	dh.stopHeartbeatCheck <- true
+
 	dh.lockDevice.RLock()
+	// Stop the Stats collector
+	if dh.isCollectorActive {
+		dh.stopCollector <- true
+	}
+	// stop the heartbeat check routine
+	if dh.isHeartbeatCheckActive {
+		dh.stopHeartbeatCheck <- true
+	}
 	// Stop the read indication only if it the routine is active
 	if dh.isReadIndicationRoutineActive {
 		dh.stopIndications <- true
@@ -2417,6 +2439,16 @@
 
 func startHeartbeatCheck(ctx context.Context, dh *DeviceHandler) {
 
+	defer func() {
+		dh.lockDevice.Lock()
+		dh.isHeartbeatCheckActive = false
+		dh.lockDevice.Unlock()
+	}()
+
+	dh.lockDevice.Lock()
+	dh.isHeartbeatCheckActive = true
+	dh.lockDevice.Unlock()
+
 	// start the heartbeat check towards the OLT.
 	var timerCheck *time.Timer
 	dh.heartbeatSignature = dh.getHeartbeatSignature(ctx)
@@ -2519,11 +2551,15 @@
 		dh.device = cloned // update local copy of the device
 		go dh.eventMgr.oltCommunicationEvent(ctx, cloned, raisedTs)
 
-		// Stop the Stats collector
-		dh.stopCollector <- true
-		// stop the heartbeat check routine
-		dh.stopHeartbeatCheck <- true
 		dh.lockDevice.RLock()
+		// Stop the Stats collector
+		if dh.isCollectorActive {
+			dh.stopCollector <- true
+		}
+		// stop the heartbeat check routine
+		if dh.isHeartbeatCheckActive {
+			dh.stopHeartbeatCheck <- true
+		}
 		// Stop the read indication only if it the routine is active
 		// The read indication would have already stopped due to failure on the gRPC stream following OLT going unreachable
 		// Sending message on the 'stopIndication' channel again will cause the readIndication routine to immediately stop
@@ -2579,10 +2615,17 @@
 	go dh.eventMgr.oltCommunicationEvent(ctx, cloned, raisedTs)
 
 	dh.cleanupDeviceResources(ctx)
+
+	dh.lockDevice.RLock()
 	// Stop the Stats collector
-	dh.stopCollector <- true
+	if dh.isCollectorActive {
+		dh.stopCollector <- true
+	}
 	// stop the heartbeat check routine
-	dh.stopHeartbeatCheck <- true
+	if dh.isHeartbeatCheckActive {
+		dh.stopHeartbeatCheck <- true
+	}
+	dh.lockDevice.RUnlock()
 
 	var wg sync.WaitGroup
 	wg.Add(1) // for the multicast handler routine