[VOL-4389] openonu-go panic
[VOL-4239] openonu-go-adapter: wrong indication of download failure
[VOL-4258] openonu-go-adapter: wrong indication of download from wrong url
[VOL-4303] OpenOnuAdapter Onu Upgrade mismatch in adapter file download state - no ONU upgrade start
[VOL-4336] openonuAdapterGo - ONU is not informed about abort of SW upgrade
Onu Upgrade image state indication handling merged back from master
+ some furthergoing corrections concerning download abort handling
Signed-off-by: mpagenko <michael.pagenkopf@adtran.com>
Change-Id: Ie2a27f00586d43b5b3285474863e0c48c4e384cd
diff --git a/internal/pkg/onuadaptercore/device_handler.go b/internal/pkg/onuadaptercore/device_handler.go
index 4fb0a66..6c34298 100644
--- a/internal/pkg/onuadaptercore/device_handler.go
+++ b/internal/pkg/onuadaptercore/device_handler.go
@@ -46,13 +46,10 @@
"github.com/opencord/voltha-protos/v4/go/voltha"
)
-/*
-// Constants for number of retries and for timeout
+// Constants for timeouts
const (
- MaxRetry = 10
- MaxTimeOutInMs = 500
+ cTimeOutRemoveUpgrade = 1 //for usage in seconds
)
-*/
const (
// events of Device FSM
@@ -225,6 +222,7 @@
UniVlanConfigFsmMap map[uint8]*UniVlanConfigFsm
lockUpgradeFsm sync.RWMutex
pOnuUpradeFsm *OnuUpgradeFsm
+ upgradeCanceled bool
reconciling uint8
mutexReconcilingFlag sync.RWMutex
chReconcilingFinished chan bool //channel to indicate that reconciling has been finished
@@ -235,7 +233,8 @@
readyForOmciConfig bool
deletionInProgress bool
mutexDeletionInProgressFlag sync.RWMutex
- upgradeSuccess bool
+ pLastUpgradeImageState *voltha.ImageState
+ upgradeFsmChan chan struct{}
}
//newDeviceHandler creates a new device handler
@@ -271,6 +270,12 @@
dh.chReconcilingFlowsFinished = make(chan bool)
dh.readyForOmciConfig = false
dh.deletionInProgress = false
+ dh.pLastUpgradeImageState = &voltha.ImageState{
+ DownloadState: voltha.ImageState_DOWNLOAD_UNKNOWN,
+ Reason: voltha.ImageState_UNKNOWN_ERROR,
+ ImageState: voltha.ImageState_IMAGE_UNKNOWN,
+ }
+ dh.upgradeFsmChan = make(chan struct{})
if dh.device.PmConfigs != nil { // can happen after onu adapter restart
dh.pmConfigs = cloned.PmConfigs
@@ -1165,6 +1170,7 @@
}
//doOnuSwUpgrade initiates the SW download transfer to the ONU and on success activates the (inactive) image
+// used only for old - R2.7 style - upgrade API
func (dh *deviceHandler) doOnuSwUpgrade(ctx context.Context, apImageDsc *voltha.ImageDownload,
apDownloadManager *adapterDownloadManager) error {
logger.Debugw(ctx, "onuSwUpgrade requested", log.Fields{
@@ -1181,9 +1187,11 @@
var inactiveImageID uint16
if inactiveImageID, err = pDevEntry.GetInactiveImageMeID(ctx); err == nil {
dh.lockUpgradeFsm.Lock()
- defer dh.lockUpgradeFsm.Unlock()
+ //lockUpgradeFsm must be release before cancelation as this may implicitly request RemoveOnuUpgradeFsm()
+ // but must be still locked at calling createOnuUpgradeFsm
if dh.pOnuUpradeFsm == nil {
err = dh.createOnuUpgradeFsm(ctx, pDevEntry, OmciOnuSwUpgradeDone)
+ dh.lockUpgradeFsm.Unlock()
if err == nil {
if err = dh.pOnuUpradeFsm.SetDownloadParams(ctx, inactiveImageID, apImageDsc, apDownloadManager); err != nil {
logger.Errorw(ctx, "onu upgrade fsm could not set parameters", log.Fields{
@@ -1194,21 +1202,14 @@
"device-id": dh.deviceID, "error": err})
}
} else { //OnuSw upgrade already running - restart (with possible abort of running)
+ dh.lockUpgradeFsm.Unlock()
logger.Debugw(ctx, "Onu SW upgrade already running - abort", log.Fields{"device-id": dh.deviceID})
- pUpgradeStatemachine := dh.pOnuUpradeFsm.pAdaptFsm.pFsm
- if pUpgradeStatemachine != nil {
- if err = pUpgradeStatemachine.Event(upgradeEvAbort); err != nil {
- logger.Errorw(ctx, "onu upgrade fsm could not abort a running processing", log.Fields{
- "device-id": dh.deviceID, "error": err})
- }
- err = fmt.Errorf("aborted Onu SW upgrade but not automatically started, try again, device-id: %s", dh.deviceID)
- //TODO!!!: wait for 'ready' to start and configure - see above SetDownloadParams()
- // for now a second start of download should work again
- } else { //should never occur
- logger.Errorw(ctx, "onu upgrade fsm inconsistent setup", log.Fields{
- "device-id": dh.deviceID})
- err = fmt.Errorf("onu upgrade fsm inconsistent setup, baseFsm invalid for device-id: %s", dh.deviceID)
+ if !dh.upgradeCanceled { //avoid double cancelation in case it is already doing the cancelation
+ dh.upgradeCanceled = true
+ dh.pOnuUpradeFsm.CancelProcessing(ctx, true, voltha.ImageState_CANCELLED_ON_REQUEST) //complete abort
}
+ //no effort spent anymore for the old API to automatically cancel and restart the download
+ // like done for the new API
}
} else {
logger.Errorw(ctx, "start Onu SW upgrade rejected: no inactive image", log.Fields{
@@ -1238,38 +1239,51 @@
logger.Debugw(ctx, "onuSwUpgrade requested", log.Fields{
"device-id": dh.deviceID, "image-version": apImageRequest.Image.Version, "to onu-image": inactiveImageID})
dh.lockUpgradeFsm.Lock()
- defer dh.lockUpgradeFsm.Unlock()
- if dh.pOnuUpradeFsm == nil {
- //OmciOnuSwUpgradeDone could be used to create some Kafka event with information on upgrade completion,
- // but none yet defined
- err = dh.createOnuUpgradeFsm(ctx, pDevEntry, OmciOnuSwUpgradeDone)
- if err == nil {
- if err = dh.pOnuUpradeFsm.SetDownloadParamsAfterDownload(ctx, inactiveImageID,
- apImageRequest, apDownloadManager, aImageIdentifier); err != nil {
- logger.Errorw(ctx, "onu upgrade fsm could not set parameters", log.Fields{
- "device-id": dh.deviceID, "error": err})
- return
- }
- } else {
- logger.Errorw(ctx, "onu upgrade fsm could not be created", log.Fields{
- "device-id": dh.deviceID, "error": err})
+ //lockUpgradeFsm must be release before cancellation as this may implicitly request RemoveOnuUpgradeFsm()
+ // but must be still locked at calling createOnuUpgradeFsm
+ // (and working with a local pointer copy does not work here if asynchronous request are done to fast
+ // [e.g.leaving the local pointer on nil even though a creation is already on the way])
+ if dh.pOnuUpradeFsm != nil {
+ //OnuSw upgrade already running on this device (e.g. with activate/commit not yet set)
+ // abort the current processing, running upgrades are always aborted by newer request
+ logger.Debugw(ctx, "Onu SW upgrade already running - abort previous activity", log.Fields{"device-id": dh.deviceID})
+ //flush the remove upgradeFsmChan channel
+ select {
+ case <-dh.upgradeFsmChan:
+ logger.Debug(ctx, "flushed-upgrade-fsm-channel")
+ default:
}
- return
+ dh.lockUpgradeFsm.Unlock()
+ if !dh.upgradeCanceled { //avoid double cancelation in case it is already doing the cancelation
+ dh.upgradeCanceled = true
+ dh.pOnuUpradeFsm.CancelProcessing(ctx, true, voltha.ImageState_CANCELLED_ON_REQUEST) //complete abort
+ }
+ select {
+ case <-time.After(cTimeOutRemoveUpgrade * time.Second):
+ logger.Errorw(ctx, "could not remove Upgrade FSM in time, aborting", log.Fields{"device-id": dh.deviceID})
+ //should not appear, can't proceed with new upgrade, perhaps operator can retry manually later
+ return
+ case <-dh.upgradeFsmChan:
+ logger.Debugw(ctx, "recent Upgrade FSM removed, proceed with new request", log.Fields{"device-id": dh.deviceID})
+ }
+ dh.lockUpgradeFsm.Lock() //lock again for following creation
}
- //OnuSw upgrade already running - restart (with possible abort of running)
- logger.Debugw(ctx, "Onu SW upgrade already running - abort", log.Fields{"device-id": dh.deviceID})
- pUpgradeStatemachine := dh.pOnuUpradeFsm.pAdaptFsm.pFsm
- if pUpgradeStatemachine != nil {
- if err = pUpgradeStatemachine.Event(upgradeEvAbort); err != nil {
- logger.Errorw(ctx, "onu upgrade fsm could not abort a running processing", log.Fields{
+
+ //here it can be assumed that no running upgrade processing exists (anymore)
+ //OmciOnuSwUpgradeDone could be used to create some event notification with information on upgrade completion,
+ // but none yet defined
+ err = dh.createOnuUpgradeFsm(ctx, pDevEntry, OmciOnuSwUpgradeDone)
+ dh.lockUpgradeFsm.Unlock()
+ if err == nil {
+ if err = dh.pOnuUpradeFsm.SetDownloadParamsAfterDownload(ctx, inactiveImageID,
+ apImageRequest, apDownloadManager, aImageIdentifier); err != nil {
+ logger.Errorw(ctx, "onu upgrade fsm could not set parameters", log.Fields{
"device-id": dh.deviceID, "error": err})
return
}
- //TODO!!!: wait for 'ready' to start and configure - see above SetDownloadParams()
- // for now a second start of download should work again - must still be initiated by user
- } else { //should never occur
- logger.Errorw(ctx, "onu upgrade fsm inconsistent setup", log.Fields{
- "device-id": dh.deviceID})
+ } else {
+ logger.Errorw(ctx, "onu upgrade fsm could not be created", log.Fields{
+ "device-id": dh.deviceID, "error": err})
}
return
}
@@ -1299,9 +1313,13 @@
logger.Errorw(ctx, "Failed to fetch Onu device for image activation", log.Fields{"device-id": dh.deviceID, "err": getErr})
return nil, fmt.Errorf("could not fetch device for device-id: %s", dh.deviceID)
}
+ if dh.upgradeCanceled { //avoid starting some new action in case it is already doing the cancelation
+ logger.Errorw(ctx, "Some upgrade procedure still runs cancelation - abort", log.Fields{"device-id": dh.deviceID})
+ return nil, fmt.Errorf("request collides with some ongoing cancelation for device-id: %s", dh.deviceID)
+ }
// use the OnuVendor identification from this device for the internal unique name
imageIdentifier := onuVolthaDevice.VendorId + aVersion //head on vendor ID of the ONU
- // 1.) check a started upgrade process and rely the activation request to it
+ // 1.) check a started upgrade process and relay the activation request to it
if err = dh.pOnuUpradeFsm.SetActivationParamsRunning(ctx, imageIdentifier, aCommitRequest); err != nil {
//if some ONU upgrade is ongoing we do not accept some explicit ONU image-version related activation
logger.Errorw(ctx, "onu upgrade fsm did not accept activation while running", log.Fields{
@@ -1310,13 +1328,7 @@
}
logger.Debugw(ctx, "image activation acknowledged by onu upgrade processing", log.Fields{
"device-id": dh.deviceID, "image-id": imageIdentifier})
- var pImageStates *voltha.ImageState
- if pImageStates, err = dh.pOnuUpradeFsm.GetImageStates(ctx, "", aVersion); err != nil {
- pImageStates = &voltha.ImageState{}
- pImageStates.DownloadState = voltha.ImageState_DOWNLOAD_UNKNOWN
- pImageStates.Reason = voltha.ImageState_UNKNOWN_ERROR
- pImageStates.ImageState = voltha.ImageState_IMAGE_UNKNOWN
- }
+ pImageStates := dh.pOnuUpradeFsm.GetImageStates(ctx, "", aVersion)
return pImageStates, nil
} //else
dh.lockUpgradeFsm.RUnlock()
@@ -1329,7 +1341,9 @@
"device-id": dh.deviceID, "err": err, "image-id": inactiveImageID})
return nil, fmt.Errorf("no valid inactive image found for device-id: %s", dh.deviceID)
}
+ dh.lockUpgradeFsm.Lock() //lock again for following creation
err = dh.createOnuUpgradeFsm(ctx, pDevEntry, OmciOnuSwUpgradeDone)
+ dh.lockUpgradeFsm.Unlock()
if err == nil {
if err = dh.pOnuUpradeFsm.SetActivationParamsStart(ctx, aVersion,
inactiveImageID, aCommitRequest); err != nil {
@@ -1339,13 +1353,7 @@
}
logger.Debugw(ctx, "inactive image activation acknowledged by onu upgrade", log.Fields{
"device-id": dh.deviceID, "image-version": aVersion})
- var pImageStates *voltha.ImageState
- if pImageStates, err = dh.pOnuUpradeFsm.GetImageStates(ctx, "", aVersion); err != nil {
- pImageStates := &voltha.ImageState{}
- pImageStates.DownloadState = voltha.ImageState_DOWNLOAD_UNKNOWN
- pImageStates.Reason = voltha.ImageState_UNKNOWN_ERROR
- pImageStates.ImageState = voltha.ImageState_IMAGE_UNKNOWN
- }
+ pImageStates := dh.pOnuUpradeFsm.GetImageStates(ctx, "", aVersion)
return pImageStates, nil
} //else
logger.Errorw(ctx, "onu upgrade fsm could not be created", log.Fields{
@@ -1375,24 +1383,24 @@
logger.Errorw(ctx, "Failed to fetch Onu device for image commitment", log.Fields{"device-id": dh.deviceID, "err": getErr})
return nil, fmt.Errorf("could not fetch device for device-id: %s", dh.deviceID)
}
+ if dh.upgradeCanceled { //avoid starting some new action in case it is already doing the cancelation
+ logger.Errorw(ctx, "Some upgrade procedure still runs cancelation - abort", log.Fields{"device-id": dh.deviceID})
+ return nil, fmt.Errorf("request collides with some ongoing cancelation for device-id: %s", dh.deviceID)
+ }
// use the OnuVendor identification from this device for the internal unique name
imageIdentifier := onuVolthaDevice.VendorId + aVersion //head on vendor ID of the ONU
- // 1.) check a started upgrade process and rely the commitment request to it
- if err = dh.pOnuUpradeFsm.SetCommitmentParamsRunning(ctx, imageIdentifier); err != nil {
- //if some ONU upgrade is ongoing we do not accept some explicit ONU image-version related commitment
+ // 1.) check a started upgrade process and relay the commitment request to it
+ // the running upgrade may be based either on the imageIdentifier (started from download)
+ // or on the imageVersion (started from pure activation)
+ if err = dh.pOnuUpradeFsm.SetCommitmentParamsRunning(ctx, imageIdentifier, aVersion); err != nil {
+ //if some ONU upgrade is ongoing we do not accept some explicit different ONU image-version related commitment
logger.Errorw(ctx, "onu upgrade fsm did not accept commitment while running", log.Fields{
"device-id": dh.deviceID, "error": err})
return nil, fmt.Errorf("commitment not accepted for this version for device-id: %s", dh.deviceID)
}
logger.Debugw(ctx, "image commitment acknowledged by onu upgrade processing", log.Fields{
"device-id": dh.deviceID, "image-id": imageIdentifier})
- var pImageStates *voltha.ImageState
- if pImageStates, err = dh.pOnuUpradeFsm.GetImageStates(ctx, "", aVersion); err != nil {
- pImageStates := &voltha.ImageState{}
- pImageStates.DownloadState = voltha.ImageState_DOWNLOAD_UNKNOWN
- pImageStates.Reason = voltha.ImageState_UNKNOWN_ERROR
- pImageStates.ImageState = voltha.ImageState_IMAGE_UNKNOWN
- }
+ pImageStates := dh.pOnuUpradeFsm.GetImageStates(ctx, "", aVersion)
return pImageStates, nil
} //else
dh.lockUpgradeFsm.RUnlock()
@@ -1404,7 +1412,9 @@
"device-id": dh.deviceID, "err": err, "image-id": activeImageID})
return nil, fmt.Errorf("no valid active image found for device-id: %s", dh.deviceID)
}
+ dh.lockUpgradeFsm.Lock() //lock again for following creation
err = dh.createOnuUpgradeFsm(ctx, pDevEntry, OmciOnuSwUpgradeDone)
+ dh.lockUpgradeFsm.Unlock()
if err == nil {
if err = dh.pOnuUpradeFsm.SetCommitmentParamsStart(ctx, aVersion, activeImageID); err != nil {
logger.Errorw(ctx, "onu upgrade fsm did not accept commitment to start", log.Fields{
@@ -1413,13 +1423,7 @@
}
logger.Debugw(ctx, "active image commitment acknowledged by onu upgrade", log.Fields{
"device-id": dh.deviceID, "image-version": aVersion})
- var pImageStates *voltha.ImageState
- if pImageStates, err = dh.pOnuUpradeFsm.GetImageStates(ctx, "", aVersion); err != nil {
- pImageStates := &voltha.ImageState{}
- pImageStates.DownloadState = voltha.ImageState_DOWNLOAD_UNKNOWN
- pImageStates.Reason = voltha.ImageState_UNKNOWN_ERROR
- pImageStates.ImageState = voltha.ImageState_IMAGE_UNKNOWN
- }
+ pImageStates := dh.pOnuUpradeFsm.GetImageStates(ctx, "", aVersion)
return pImageStates, nil
} //else
logger.Errorw(ctx, "onu upgrade fsm could not be created", log.Fields{
@@ -1428,53 +1432,60 @@
}
func (dh *deviceHandler) requestOnuSwUpgradeState(ctx context.Context, aImageIdentifier string,
- aVersion string, pDeviceImageState *voltha.DeviceImageState) {
- pDeviceImageState.DeviceId = dh.deviceID
- pDeviceImageState.ImageState.Version = aVersion
+ aVersion string) *voltha.ImageState {
+ var pImageState *voltha.ImageState
dh.lockUpgradeFsm.RLock()
+ defer dh.lockUpgradeFsm.RUnlock()
if dh.pOnuUpradeFsm != nil {
- dh.lockUpgradeFsm.RUnlock()
- if pImageStates, err := dh.pOnuUpradeFsm.GetImageStates(ctx, aImageIdentifier, aVersion); err == nil {
- pDeviceImageState.ImageState.DownloadState = pImageStates.DownloadState
- pDeviceImageState.ImageState.Reason = pImageStates.Reason
- pDeviceImageState.ImageState.ImageState = pImageStates.ImageState
- } else {
- pDeviceImageState.ImageState.DownloadState = voltha.ImageState_DOWNLOAD_UNKNOWN
- pDeviceImageState.ImageState.Reason = voltha.ImageState_NO_ERROR
- pDeviceImageState.ImageState.ImageState = voltha.ImageState_IMAGE_UNKNOWN
- }
- } else {
- dh.lockUpgradeFsm.RUnlock()
- pDeviceImageState.ImageState.Reason = voltha.ImageState_NO_ERROR
- if dh.upgradeSuccess {
- pDeviceImageState.ImageState.DownloadState = voltha.ImageState_DOWNLOAD_SUCCEEDED
- pDeviceImageState.ImageState.ImageState = voltha.ImageState_IMAGE_COMMITTED
- } else {
- pDeviceImageState.ImageState.DownloadState = voltha.ImageState_DOWNLOAD_UNKNOWN
- pDeviceImageState.ImageState.ImageState = voltha.ImageState_IMAGE_UNKNOWN
+ pImageState = dh.pOnuUpradeFsm.GetImageStates(ctx, aImageIdentifier, aVersion)
+ } else { //use the last stored ImageState (if the requested Imageversion coincides)
+ if aVersion == dh.pLastUpgradeImageState.Version {
+ pImageState = dh.pLastUpgradeImageState
+ } else { //state request for an image version different from last processed image version
+ pImageState = &voltha.ImageState{
+ Version: aVersion,
+ //we cannot state something concerning this version
+ DownloadState: voltha.ImageState_DOWNLOAD_UNKNOWN,
+ Reason: voltha.ImageState_NO_ERROR,
+ ImageState: voltha.ImageState_IMAGE_UNKNOWN,
+ }
}
}
+ return pImageState
}
func (dh *deviceHandler) cancelOnuSwUpgrade(ctx context.Context, aImageIdentifier string,
aVersion string, pDeviceImageState *voltha.DeviceImageState) {
pDeviceImageState.DeviceId = dh.deviceID
pDeviceImageState.ImageState.Version = aVersion
- pDeviceImageState.ImageState.ImageState = voltha.ImageState_IMAGE_UNKNOWN
dh.lockUpgradeFsm.RLock()
if dh.pOnuUpradeFsm != nil {
+ // so then we cancel the upgrade operation
+ // but before we still request the actual upgrade states for the direct response
+ pImageState := dh.pOnuUpradeFsm.GetImageStates(ctx, aImageIdentifier, aVersion)
dh.lockUpgradeFsm.RUnlock()
- //option: it could be also checked if the upgrade FSM is running on the given imageIdentifier or version
- // by now just straightforward assume this to be true
- dh.pOnuUpradeFsm.CancelProcessing(ctx)
- //nolint:misspell
- pDeviceImageState.ImageState.DownloadState = voltha.ImageState_DOWNLOAD_CANCELLED
- //nolint:misspell
+ pDeviceImageState.ImageState.DownloadState = pImageState.DownloadState
pDeviceImageState.ImageState.Reason = voltha.ImageState_CANCELLED_ON_REQUEST
+ pDeviceImageState.ImageState.ImageState = pImageState.ImageState
+ if pImageState.DownloadState != voltha.ImageState_DOWNLOAD_UNKNOWN {
+ //so here the imageIdentifier or version equals to what is used in the upgrade FSM
+ if !dh.upgradeCanceled { //avoid double cancelation in case it is already doing the cancelation
+ dh.upgradeCanceled = true
+ dh.pOnuUpradeFsm.CancelProcessing(ctx, true, voltha.ImageState_CANCELLED_ON_REQUEST) //complete abort
+ }
+ } //nothing to cancel (upgrade FSM for different image stays alive)
} else {
dh.lockUpgradeFsm.RUnlock()
+ // if no upgrade is ongoing, nothing is canceled and accordingly the states of the requested image are unknown
+ // reset also the dh handler LastUpgradeImageState (not relevant anymore/cleared)
+ (*dh.pLastUpgradeImageState).DownloadState = voltha.ImageState_DOWNLOAD_UNKNOWN
+ (*dh.pLastUpgradeImageState).Reason = voltha.ImageState_NO_ERROR
+ (*dh.pLastUpgradeImageState).ImageState = voltha.ImageState_IMAGE_UNKNOWN
+ (*dh.pLastUpgradeImageState).Version = "" //reset to 'no (relevant) upgrade done' (like initial state)
pDeviceImageState.ImageState.DownloadState = voltha.ImageState_DOWNLOAD_UNKNOWN
pDeviceImageState.ImageState.Reason = voltha.ImageState_NO_ERROR
+ pDeviceImageState.ImageState.ImageState = voltha.ImageState_IMAGE_UNKNOWN
+ //an abort request to a not active upgrade processing can be used to reset the device upgrade states completely
}
}
@@ -2130,10 +2141,17 @@
//reset a possibly running upgrade FSM
// (note the Upgrade FSM may stay alive e.g. in state upgradeStWaitForCommit to endure the ONU reboot)
dh.lockUpgradeFsm.RLock()
- if dh.pOnuUpradeFsm != nil {
- dh.pOnuUpradeFsm.CancelProcessing(ctx)
- }
+ lopOnuUpradeFsm := dh.pOnuUpradeFsm
+ //lockUpgradeFsm must be release before cancellation as this may implicitly request RemoveOnuUpgradeFsm()
dh.lockUpgradeFsm.RUnlock()
+ if lopOnuUpradeFsm != nil {
+ if !dh.upgradeCanceled { //avoid double cancelation in case it is already doing the cancelation
+ //here we do not expect intermediate cancelation, we still allow for other commands on this FSM
+ // (even though it may also run into direct cancellation, a bit hard to verify here)
+ // so don't set 'dh.upgradeCanceled = true' here!
+ lopOnuUpradeFsm.CancelProcessing(ctx, false, voltha.ImageState_CANCELLED_ON_ONU_STATE) //conditional cancel
+ }
+ }
logger.Infow(ctx, "resetFsms done", log.Fields{"device-id": dh.deviceID})
return nil
@@ -2451,10 +2469,6 @@
{
dh.processOmciVlanFilterDoneEvent(ctx, devEvent)
}
- case OmciOnuSwUpgradeDone:
- {
- dh.upgradeSuccess = true
- }
default:
{
logger.Debugw(ctx, "unhandled-device-event", log.Fields{"device-id": dh.deviceID, "event": devEvent})
@@ -2702,6 +2716,7 @@
}
// createOnuUpgradeFsm initializes and runs the Onu Software upgrade FSM
+// precondition: lockUpgradeFsm is already locked from caller of this function
func (dh *deviceHandler) createOnuUpgradeFsm(ctx context.Context, apDevEntry *OnuDeviceEntry, aDevEvent OnuDeviceEvent) error {
//in here lockUpgradeFsm is already locked
chUpgradeFsm := make(chan Message, 2048)
@@ -2717,13 +2732,16 @@
pUpgradeStatemachine := dh.pOnuUpradeFsm.pAdaptFsm.pFsm
if pUpgradeStatemachine != nil {
if pUpgradeStatemachine.Is(upgradeStDisabled) {
- dh.upgradeSuccess = false //for start of upgrade processing reset the last indication
if err := pUpgradeStatemachine.Event(upgradeEvStart); err != nil {
logger.Errorw(ctx, "OnuSwUpgradeFSM: can't start", log.Fields{"err": err})
// maybe try a FSM reset and then again ... - TODO!!!
return fmt.Errorf(fmt.Sprintf("OnuSwUpgradeFSM could not be started for device-id: %s", dh.device.Id))
}
- /***** LockStateFSM started */
+ /***** Upgrade FSM started */
+ //reset the last stored upgrade states (which anyway should be don't care as long as the newly created FSM exists)
+ (*dh.pLastUpgradeImageState).DownloadState = voltha.ImageState_DOWNLOAD_UNKNOWN
+ (*dh.pLastUpgradeImageState).Reason = voltha.ImageState_NO_ERROR
+ (*dh.pLastUpgradeImageState).ImageState = voltha.ImageState_IMAGE_UNKNOWN
logger.Debugw(ctx, "OnuSwUpgradeFSM started", log.Fields{
"state": pUpgradeStatemachine.Current(), "device-id": dh.deviceID})
} else {
@@ -2745,12 +2763,21 @@
}
// removeOnuUpgradeFsm clears the Onu Software upgrade FSM
-func (dh *deviceHandler) removeOnuUpgradeFsm(ctx context.Context) {
+func (dh *deviceHandler) RemoveOnuUpgradeFsm(ctx context.Context, apImageState *voltha.ImageState) {
logger.Debugw(ctx, "remove OnuSwUpgradeFSM StateMachine", log.Fields{
"device-id": dh.deviceID})
dh.lockUpgradeFsm.Lock()
- defer dh.lockUpgradeFsm.Unlock()
- dh.pOnuUpradeFsm = nil //resource clearing is left to garbage collector
+ dh.pOnuUpradeFsm = nil //resource clearing is left to garbage collector
+ dh.upgradeCanceled = false //cancelation done
+ dh.pLastUpgradeImageState = apImageState
+ dh.lockUpgradeFsm.Unlock()
+ //signal upgradeFsm removed using non-blocking channel send
+ select {
+ case dh.upgradeFsmChan <- struct{}{}:
+ default:
+ logger.Debugw(ctx, "removed-UpgradeFsm signal not send on upgradeFsmChan (no receiver)", log.Fields{
+ "device-id": dh.deviceID})
+ }
}
// checkOnOnuImageCommit verifies if the ONU is in some upgrade state that allows for image commit and if tries to commit
@@ -2762,8 +2789,13 @@
}
dh.lockUpgradeFsm.RLock()
- defer dh.lockUpgradeFsm.RUnlock()
+ //lockUpgradeFsm must be release before cancellation as this may implicitly request RemoveOnuUpgradeFsm()
if dh.pOnuUpradeFsm != nil {
+ if dh.upgradeCanceled { //avoid starting some new action in case it is already doing the cancelation
+ dh.lockUpgradeFsm.RUnlock()
+ logger.Errorw(ctx, "Some upgrade procedure still runs cancelation - abort", log.Fields{"device-id": dh.deviceID})
+ return
+ }
pUpgradeStatemachine := dh.pOnuUpradeFsm.pAdaptFsm.pFsm
if pUpgradeStatemachine != nil {
// commit is only processed in case out upgrade FSM indicates the according state (for automatic commit)
@@ -2776,11 +2808,16 @@
if pDevEntry.IsImageToBeCommitted(ctx, dh.pOnuUpradeFsm.inactiveImageMeID) {
activeImageID, errImg := pDevEntry.GetActiveImageMeID(ctx)
if errImg != nil {
+ dh.lockUpgradeFsm.RUnlock()
logger.Errorw(ctx, "OnuSwUpgradeFSM abort - could not get active image after reboot",
log.Fields{"device-id": dh.deviceID})
- _ = pUpgradeStatemachine.Event(upgradeEvAbort)
+ if !dh.upgradeCanceled { //avoid double cancelation in case it is already doing the cancelation
+ dh.upgradeCanceled = true
+ dh.pOnuUpradeFsm.CancelProcessing(ctx, true, voltha.ImageState_CANCELLED_ON_ONU_STATE) //complete abort
+ }
return
}
+ dh.lockUpgradeFsm.RUnlock()
if activeImageID == dh.pOnuUpradeFsm.inactiveImageMeID {
if (upgradeState == upgradeStRequestingActivate) && !dh.pOnuUpradeFsm.GetCommitFlag(ctx) {
// if FSM was waiting on activateResponse, new image is active, but FSM shall not commit, then:
@@ -2802,30 +2839,36 @@
} else {
logger.Errorw(ctx, "OnuSwUpgradeFSM waiting to commit/on ActivateResponse, but load did not start with expected image Id",
log.Fields{"device-id": dh.deviceID})
- _ = pUpgradeStatemachine.Event(upgradeEvAbort)
- return
+ if !dh.upgradeCanceled { //avoid double cancelation in case it is already doing the cancelation
+ dh.upgradeCanceled = true
+ dh.pOnuUpradeFsm.CancelProcessing(ctx, true, voltha.ImageState_CANCELLED_ON_ONU_STATE) //complete abort
+ }
}
- } else {
- logger.Errorw(ctx, "OnuSwUpgradeFSM waiting to commit, but nothing to commit on ONU - abort upgrade",
- log.Fields{"device-id": dh.deviceID})
- _ = pUpgradeStatemachine.Event(upgradeEvAbort)
return
}
- } else {
- //upgrade FSM is active but not waiting for commit: maybe because commit flag is not set
- // upgrade FSM is to be informed if the current active image is the one that was used in upgrade for the download
- if activeImageID, err := pDevEntry.GetActiveImageMeID(ctx); err == nil {
- if dh.pOnuUpradeFsm.inactiveImageMeID == activeImageID {
- logger.Debugw(ctx, "OnuSwUpgradeFSM image state set to activated", log.Fields{
- "state": pUpgradeStatemachine.Current(), "device-id": dh.deviceID})
- dh.pOnuUpradeFsm.SetImageState(ctx, voltha.ImageState_IMAGE_ACTIVE)
- }
+ dh.lockUpgradeFsm.RUnlock()
+ logger.Errorw(ctx, "OnuSwUpgradeFSM waiting to commit, but nothing to commit on ONU - abort upgrade",
+ log.Fields{"device-id": dh.deviceID})
+ if !dh.upgradeCanceled { //avoid double cancelation in case it is already doing the cancelation
+ dh.upgradeCanceled = true
+ dh.pOnuUpradeFsm.CancelProcessing(ctx, true, voltha.ImageState_CANCELLED_ON_ONU_STATE) //complete abort
+ }
+ return
+ }
+ //upgrade FSM is active but not waiting for commit: maybe because commit flag is not set
+ // upgrade FSM is to be informed if the current active image is the one that was used in upgrade for the download
+ if activeImageID, err := pDevEntry.GetActiveImageMeID(ctx); err == nil {
+ if dh.pOnuUpradeFsm.inactiveImageMeID == activeImageID {
+ logger.Debugw(ctx, "OnuSwUpgradeFSM image state set to activated", log.Fields{
+ "state": pUpgradeStatemachine.Current(), "device-id": dh.deviceID})
+ dh.pOnuUpradeFsm.SetImageStateActive(ctx)
}
}
}
} else {
logger.Debugw(ctx, "no ONU image to be committed", log.Fields{"device-id": dh.deviceID})
}
+ dh.lockUpgradeFsm.RUnlock()
}
//setBackend provides a DB backend for the specified path on the existing KV client
@@ -3525,29 +3568,45 @@
}
func (dh *deviceHandler) isFsmInOmciIdleStateDefault(ctx context.Context, omciFsm usedOmciConfigFsms, wantedState string) bool {
- var pFsm *fsm.FSM
- //note/TODO!!: might be that access to all these specific FSM; pointers need a semaphore protection as well, cmp lockUpgradeFsm
+ var pAdapterFsm *AdapterFsm
+ //note/TODO!!: might be that access to all these specific FSM pointers need a semaphore protection as well, cmp lockUpgradeFsm
switch omciFsm {
case cUploadFsm:
{
- pFsm = dh.pOnuOmciDevice.pMibUploadFsm.pFsm
+ if dh.pOnuOmciDevice != nil {
+ pAdapterFsm = dh.pOnuOmciDevice.pMibUploadFsm
+ } else {
+ return true //FSM not active - so there is no activity on omci
+ }
}
case cDownloadFsm:
{
- pFsm = dh.pOnuOmciDevice.pMibDownloadFsm.pFsm
+ if dh.pOnuOmciDevice != nil {
+ pAdapterFsm = dh.pOnuOmciDevice.pMibDownloadFsm
+ } else {
+ return true //FSM not active - so there is no activity on omci
+ }
}
case cUniLockFsm:
{
- pFsm = dh.pLockStateFsm.pAdaptFsm.pFsm
+ if dh.pLockStateFsm != nil {
+ pAdapterFsm = dh.pLockStateFsm.pAdaptFsm
+ } else {
+ return true //FSM not active - so there is no activity on omci
+ }
}
case cUniUnLockFsm:
{
- pFsm = dh.pUnlockStateFsm.pAdaptFsm.pFsm
+ if dh.pUnlockStateFsm != nil {
+ pAdapterFsm = dh.pUnlockStateFsm.pAdaptFsm
+ } else {
+ return true //FSM not active - so there is no activity on omci
+ }
}
case cL2PmFsm:
{
- if dh.pOnuMetricsMgr != nil && dh.pOnuMetricsMgr.pAdaptFsm != nil {
- pFsm = dh.pOnuMetricsMgr.pAdaptFsm.pFsm
+ if dh.pOnuMetricsMgr != nil {
+ pAdapterFsm = dh.pOnuMetricsMgr.pAdaptFsm
} else {
return true //FSM not active - so there is no activity on omci
}
@@ -3556,7 +3615,11 @@
{
dh.lockUpgradeFsm.RLock()
defer dh.lockUpgradeFsm.RUnlock()
- pFsm = dh.pOnuUpradeFsm.pAdaptFsm.pFsm
+ if dh.pOnuUpradeFsm != nil {
+ pAdapterFsm = dh.pOnuUpradeFsm.pAdaptFsm
+ } else {
+ return true //FSM not active - so there is no activity on omci
+ }
}
default:
{
@@ -3565,7 +3628,10 @@
return false //logical error in FSM check, do not not indicate 'idle' - we can't be sure
}
}
- return dh.isFsmInOmciIdleState(ctx, pFsm, wantedState)
+ if pAdapterFsm != nil && pAdapterFsm.pFsm != nil {
+ return dh.isFsmInOmciIdleState(ctx, pAdapterFsm.pFsm, wantedState)
+ }
+ return true //FSM not active - so there is no activity on omci
}
func (dh *deviceHandler) isAniConfigFsmInOmciIdleState(ctx context.Context, omciFsm usedOmciConfigFsms, idleState string) bool {