VOL-3777: Disable ONU and OLT failing sometimes at VOLTHA - stale devices exist even after delete
- re-org the code to free up 'lockDevice' lock as soon as possible. The DeleteDevice RPC handler
was held up on waiting for 'lockDevice' lock to be freed up in 'doStateDown'
handler which potentially caused 'DeleteDevice' RPC to timeout and stale device
was left in voltha.
Change-Id: I0fd5350dcde239f595b8e0b11a7a8e2466edeb9b
diff --git a/VERSION b/VERSION
index ff365e0..8188357 100644
--- a/VERSION
+++ b/VERSION
@@ -1 +1 @@
-3.1.3
+3.1.4-dev
diff --git a/internal/pkg/core/device_handler.go b/internal/pkg/core/device_handler.go
index ef1b179..8eaf0bf 100644
--- a/internal/pkg/core/device_handler.go
+++ b/internal/pkg/core/device_handler.go
@@ -339,13 +339,13 @@
}
func (dh *DeviceHandler) updateLocalDevice(ctx context.Context) {
- dh.lockDevice.Lock()
- defer dh.lockDevice.Unlock()
device, err := dh.coreProxy.GetDevice(log.WithSpanFromContext(context.TODO(), ctx), dh.device.Id, dh.device.Id)
if err != nil || device == nil {
logger.Errorf(ctx, "device-not-found", log.Fields{"device-id": dh.device.Id}, err)
return
}
+ dh.lockDevice.Lock()
+ defer dh.lockDevice.Unlock()
dh.device = device
}
@@ -650,8 +650,6 @@
// doStateDown handle the olt down indication
func (dh *DeviceHandler) doStateDown(ctx context.Context) error {
- dh.lockDevice.Lock()
- defer dh.lockDevice.Unlock()
logger.Debugw(ctx, "do-state-down-start", log.Fields{"device-id": dh.device.Id})
device, err := dh.coreProxy.GetDevice(ctx, dh.device.Id, dh.device.Id)
@@ -664,7 +662,9 @@
//Update the device oper state and connection status
cloned.OperStatus = voltha.OperStatus_UNKNOWN
+ dh.lockDevice.Lock()
dh.device = cloned
+ dh.lockDevice.Unlock()
if err = dh.coreProxy.DeviceStateUpdate(ctx, cloned.Id, cloned.ConnectStatus, cloned.OperStatus); err != nil {
return olterrors.NewErrAdapter("state-update-failed", log.Fields{"device-id": device.Id}, err)
@@ -676,7 +676,6 @@
return olterrors.NewErrAdapter("child-device-fetch-failed", log.Fields{"device-id": dh.device.Id}, err)
}
for _, onuDevice := range onuDevices.Items {
-
// Update onu state as down in onu adapter
onuInd := oop.OnuIndication{}
onuInd.OperState = "down"
@@ -689,11 +688,16 @@
"device-type": onuDevice.Type,
"device-id": onuDevice.Id}, err).LogAt(log.ErrorLevel)
//Do not return here and continue to process other ONUs
+ } else {
+ logger.Debugw(ctx, "sending inter adapter down ind to onu success", log.Fields{"olt-device-id": device.Id, "onu-device-id": onuDevice.Id})
}
}
+ dh.lockDevice.Lock()
/* Discovered ONUs entries need to be cleared , since after OLT
is up, it starts sending discovery indications again*/
dh.discOnus = sync.Map{}
+ dh.lockDevice.Unlock()
+
logger.Debugw(ctx, "do-state-down-end", log.Fields{"device-id": device.Id})
return nil
}