VOL-5113:OnuAdapter crashes Intermittently while processing onu indication during scale tests
RCA:1) The ONU discovery is successfully completed.
2) As part of the ONU Indication processing , during creating the interface an OMCI test is performed before we start MIB upload process.
3) OMCI requests are sent and while we wait for the response we maintain a timeout.
4) Simultaneously , a delete request for this device has triggered clean
up of the device objects , the ONU indication goroutine wakes up after timeout in a case when the OLT is unable to process the OMCI requests
and acccess an invalid object causing a segmentation fault.
Change-Id: Ifcb64d86854ddb4e2d4857897cc9789128410015
diff --git a/internal/pkg/pmmgr/onu_metrics_manager.go b/internal/pkg/pmmgr/onu_metrics_manager.go
index 43ab2a0..03f7af7 100755
--- a/internal/pkg/pmmgr/onu_metrics_manager.go
+++ b/internal/pkg/pmmgr/onu_metrics_manager.go
@@ -333,6 +333,8 @@
maxL2PMGetPayLoadSize int
onuOpticalMetricstimer *time.Timer
onuUniStatusMetricstimer *time.Timer
+ opticalMetricsDelCommChan chan bool
+ uniMetricsDelCommChan chan bool
}
// NewOnuMetricsManager returns a new instance of the NewOnuMetricsManager
@@ -416,6 +418,9 @@
metricsManager.onuUniStatusMetricstimer = time.NewTimer(DefaultMetricCollectionFrequency)
metricsManager.onuUniStatusMetricstimer.Stop()
+ metricsManager.opticalMetricsDelCommChan = make(chan bool, 2)
+ metricsManager.uniMetricsDelCommChan = make(chan bool, 2)
+
logger.Info(ctx, "init-OnuMetricsManager completed", log.Fields{"device-id": metricsManager.deviceID})
return &metricsManager
}
@@ -785,6 +790,9 @@
logger.Errorw(ctx, "timeout waiting for omci-get response for optical metrics", log.Fields{"device-id": mm.deviceID})
// The metrics will be empty in this case
break loop
+ case <-mm.opticalMetricsDelCommChan:
+ logger.Warnw(ctx, "Deleting the device, stopping optical metrics collection for the device ", log.Fields{"device-id": mm.deviceID})
+ return nil, err
}
// Populate metric only if it was enabled.
for k := range OpticalPowerGroupMetrics {
@@ -871,6 +879,9 @@
logger.Errorw(ctx, "timeout waiting for omci-get response for uni status", log.Fields{"device-id": mm.deviceID})
// The metrics could be empty in this case
break loop1
+ case <-mm.uniMetricsDelCommChan:
+ logger.Warnw(ctx, "Deleting the device, stopping UniMetrics collection for the device ", log.Fields{"device-id": mm.deviceID})
+ return nil, err
}
// Populate metric only if it was enabled.
for k := range UniStatusGroupMetrics {
@@ -933,6 +944,9 @@
logger.Errorw(ctx, "timeout waiting for omci-get response for uni status", log.Fields{"device-id": mm.deviceID})
// The metrics could be empty in this case
break loop2
+ case <-mm.uniMetricsDelCommChan:
+ logger.Warnw(ctx, "Deleting the device, stopping UniMetrics collection for the device ", log.Fields{"device-id": mm.deviceID})
+ return nil, err
}
// Populate metric only if it was enabled.
@@ -1002,6 +1016,9 @@
logger.Errorw(ctx, "timeout waiting for omci-get response for uni status", log.Fields{"device-id": mm.deviceID})
// The metrics could be empty in this case
break loop3
+ case <-mm.uniMetricsDelCommChan:
+ logger.Warnw(ctx, "Deleting the device, stopping UniMetrics collection for the device ", log.Fields{"device-id": mm.deviceID})
+ return nil, err
}
// Populate metric only if it was enabled.
@@ -1415,6 +1432,10 @@
}
if mm.GetdeviceDeletionInProgress() {
+ mm.onuOpticalMetricstimer.Stop()
+ mm.onuUniStatusMetricstimer.Stop()
+ mm.opticalMetricsDelCommChan <- true
+ mm.uniMetricsDelCommChan <- true
mm.pDeviceHandler = nil
mm.pOnuDeviceEntry = nil
mm.GarbageCollectionComplete <- true