[VOL-4450] ONU upgrade to multiple ONU devices control insufficient, possibly leads to adapter crash
Change-Id: I5c56438c8bb7269e886f26035d251c961b00faf2
diff --git a/internal/pkg/onuadaptercore/file_download_manager.go b/internal/pkg/onuadaptercore/file_download_manager.go
index 4793e69..4948323 100644
--- a/internal/pkg/onuadaptercore/file_download_manager.go
+++ b/internal/pkg/onuadaptercore/file_download_manager.go
@@ -43,14 +43,14 @@
cFileStateDlSucceeded
cFileStateDlFailed
cFileStateDlAborted
- cFileStateDlInvalid
)
type downloadImageParams struct {
downloadImageName string
+ downloadImageURL string
downloadImageState fileState
downloadImageLen int64
- downloadImageCrc uint32
+ downloadImageCRC uint32
downloadActive bool
downloadContextCancelFn context.CancelFunc
}
@@ -59,6 +59,7 @@
//fileDownloadManager structure holds information needed for downloading to and storing images within the adapter
type fileDownloadManager struct {
+ mutexFileState sync.RWMutex
mutexDownloadImageDsc sync.RWMutex
downloadImageDscSlice []downloadImageParams
dnldImgReadyWaiting map[string]requesterChannelMap
@@ -91,38 +92,45 @@
return dm.dlToAdapterTimeout
}
-//ImageExists returns true if the requested image already exists within the adapter
-func (dm *fileDownloadManager) ImageExists(ctx context.Context, aImageName string) bool {
- logger.Debugw(ctx, "checking on existence of the image", log.Fields{"image-name": aImageName})
- dm.mutexDownloadImageDsc.RLock()
- defer dm.mutexDownloadImageDsc.RUnlock()
-
- for _, dnldImgDsc := range dm.downloadImageDscSlice {
- if dnldImgDsc.downloadImageName == aImageName {
- //image found (by name)
- return true
- }
- }
- //image not found (by name)
- return false
-}
-
-//StartDownload returns true if the download of the requested image could be started for the given file name and URL
-func (dm *fileDownloadManager) StartDownload(ctx context.Context, aImageName string, aURLCommand string) error {
+//StartDownload returns FileState and error code from download request for the given file name and URL
+func (dm *fileDownloadManager) StartDownload(ctx context.Context, aImageName string, aURLCommand string) (fileState, error) {
logger.Infow(ctx, "image download-to-adapter requested", log.Fields{
"image-name": aImageName, "url-command": aURLCommand})
- loDownloadImageParams := downloadImageParams{
- downloadImageName: aImageName, downloadImageState: cFileStateDlStarted,
- downloadImageLen: 0, downloadImageCrc: 0}
- //try to download from http
- var err error
- if err = dm.downloadFile(ctx, aURLCommand, cDefaultLocalDir, aImageName); err == nil {
- dm.mutexDownloadImageDsc.Lock()
+ // keep a semaphore over the complete method in order to avoid parallel entrance to this method
+ // otherwise a temporary file state 'Started' could be indicated allowing start of ONU upgrade handling
+ // even though the download-start to adapter may fail (e.g on wrong URL) (delivering inconsistent download results)
+ // so once called the download-start of the first call must have been completely checked before another execution
+ // could still be limited to the same imageName, but that should be no real gain
+ dm.mutexFileState.Lock()
+ defer dm.mutexFileState.Unlock()
+ dm.mutexDownloadImageDsc.Lock()
+ var fileState fileState
+ var exists bool
+ if fileState, exists = dm.imageExists(ctx, aImageName, aURLCommand); !exists {
+ loDownloadImageParams := downloadImageParams{
+ downloadImageName: aImageName, downloadImageURL: aURLCommand, downloadImageState: cFileStateDlStarted,
+ downloadImageLen: 0, downloadImageCRC: 0}
dm.downloadImageDscSlice = append(dm.downloadImageDscSlice, loDownloadImageParams)
dm.mutexDownloadImageDsc.Unlock()
+ //start downloading from server
+ var err error
+ if err = dm.downloadFile(ctx, aURLCommand, cDefaultLocalDir, aImageName); err == nil {
+ //indicate download started correctly, complete download may run in background
+ return cFileStateDlStarted, nil
+ }
+ //return the error result of the download-request
+ return cFileStateUnknown, err
}
- //return the result of the start-request to comfort the core processing even though the complete download may go on in background
- return err
+ dm.mutexDownloadImageDsc.Unlock()
+ if fileState == cFileStateUnknown {
+ //cannot simply remove the existing file here - might still be used for running upgrades on different devices!
+ // (has to be removed by operator - cancel API)
+ logger.Errorw(ctx, "image download requested for existing file with different URL",
+ log.Fields{"image-description": aImageName, "url": aURLCommand})
+ return fileState, fmt.Errorf("existing file is based on different URL, requested URL: %s", aURLCommand)
+ }
+ logger.Debugw(ctx, "image download already started or done", log.Fields{"image-description": aImageName})
+ return fileState, nil
}
//GetImageBufferLen returns the length of the specified file in bytes (file size) - as detected after download
@@ -139,6 +147,8 @@
}
//GetDownloadImageBuffer returns the content of the requested file as byte slice
+//precondition: it is assumed that a check is done immediately before if the file was downloaded to adapter correctly from caller
+// straightforward approach is here to e.g. immediately call and verify GetImageBufferLen() before this
func (dm *fileDownloadManager) GetDownloadImageBuffer(ctx context.Context, aFileName string) ([]byte, error) {
file, err := os.Open(filepath.Clean(cDefaultLocalDir + "/" + aFileName))
if err != nil {
@@ -221,6 +231,24 @@
// FileDownloadManager private (unexported) methods -- start
+//imageExists returns current ImageState and if the requested image already exists within the adapter
+//precondition: at calling this method mutexDownloadImageDsc must already be at least RLocked by the caller
+func (dm *fileDownloadManager) imageExists(ctx context.Context, aImageName string, aURL string) (fileState, bool) {
+ logger.Debugw(ctx, "checking on existence of the image", log.Fields{"image-name": aImageName})
+ for _, dnldImgDsc := range dm.downloadImageDscSlice {
+ if dnldImgDsc.downloadImageName == aImageName {
+ //image found (by name)
+ if dnldImgDsc.downloadImageURL == aURL {
+ //image found (by name and URL)
+ return dnldImgDsc.downloadImageState, true
+ }
+ return cFileStateUnknown, true //use fileState to indicate URL mismatch for existing file
+ }
+ }
+ //image not found (by name)
+ return cFileStateUnknown, false
+}
+
//imageLocallyDownloaded returns true if the requested image already exists within the adapter
// requires mutexDownloadImageDsc to be locked (at least RLocked)
func (dm *fileDownloadManager) imageLocallyDownloaded(ctx context.Context, aImageName string) bool {
@@ -294,6 +322,7 @@
urlBase, err1 := url.Parse(aURLCommand)
if err1 != nil {
logger.Errorw(ctx, "could not set base url command", log.Fields{"url": aURLCommand, "error": err1})
+ dm.removeImage(ctx, aFileName, false) //wo FileSystem access
return fmt.Errorf("could not set base url command: %s, error: %s", aURLCommand, err1)
}
urlParams := url.Values{}
@@ -303,6 +332,7 @@
reqExist, errExist2 := http.NewRequest("HEAD", urlBase.String(), nil)
if errExist2 != nil {
logger.Errorw(ctx, "could not generate http head request", log.Fields{"url": urlBase.String(), "error": errExist2})
+ dm.removeImage(ctx, aFileName, false) //wo FileSystem access
return fmt.Errorf("could not generate http head request: %s, error: %s", aURLCommand, errExist2)
}
ctxExist, cancelExist := context.WithDeadline(ctx, time.Now().Add(3*time.Second)) //waiting for some fast answer
@@ -313,6 +343,7 @@
if respExist == nil {
logger.Errorw(ctx, "http head from url error - no status, aborting", log.Fields{"url": urlBase.String(),
"error": errExist3})
+ dm.removeImage(ctx, aFileName, false) //wo FileSystem access
return fmt.Errorf("http head from url error - no status, aborting: %s, error: %s",
aURLCommand, errExist3)
}
@@ -322,6 +353,7 @@
if respExist.StatusCode != http.StatusMethodNotAllowed {
logger.Errorw(ctx, "http head from url: file does not exist here, aborting", log.Fields{"url": urlBase.String(),
"error": errExist3, "status": respExist.StatusCode})
+ dm.removeImage(ctx, aFileName, false) //wo FileSystem access
return fmt.Errorf("http head from url: file does not exist here, aborting: %s, error: %s, status: %d",
aURLCommand, errExist3, respExist.StatusCode)
}
@@ -403,7 +435,7 @@
//removeImage deletes the given image according to the Image name from filesystem and downloadImageDscSlice
func (dm *fileDownloadManager) removeImage(ctx context.Context, aImageName string, aDelFs bool) {
- logger.Debugw(ctx, "remove the image from Adapter", log.Fields{"image-name": aImageName})
+ logger.Debugw(ctx, "remove the image from Adapter", log.Fields{"image-name": aImageName, "del-fs": aDelFs})
dm.mutexDownloadImageDsc.RLock()
defer dm.mutexDownloadImageDsc.RUnlock()
diff --git a/internal/pkg/onuadaptercore/omci_onu_upgrade.go b/internal/pkg/onuadaptercore/omci_onu_upgrade.go
index 49b320d..657c857 100644
--- a/internal/pkg/onuadaptercore/omci_onu_upgrade.go
+++ b/internal/pkg/onuadaptercore/omci_onu_upgrade.go
@@ -624,7 +624,7 @@
} else {
fileLen, err = oFsm.pDownloadManager.getImageBufferLen(ctx, oFsm.pImageDsc.Name, oFsm.pImageDsc.LocalDir)
}
- if err != nil || fileLen > int64(cMaxUint32) {
+ if err != nil || fileLen == 0 || fileLen > int64(cMaxUint32) {
oFsm.volthaDownloadReason = voltha.ImageState_UNKNOWN_ERROR //something like 'LOCAL_FILE_ERROR' would be better (proto)
oFsm.mutexUpgradeParams.Unlock()
logger.Errorw(ctx, "OnuUpgradeFsm abort: problems getting image buffer length", log.Fields{
@@ -638,11 +638,11 @@
}
//copy file content to buffer
- oFsm.imageBuffer = make([]byte, fileLen)
+ var imageBuffer []byte
if oFsm.useAPIVersion43 {
- oFsm.imageBuffer, err = oFsm.pFileManager.GetDownloadImageBuffer(ctx, oFsm.imageIdentifier)
+ imageBuffer, err = oFsm.pFileManager.GetDownloadImageBuffer(ctx, oFsm.imageIdentifier)
} else {
- oFsm.imageBuffer, err = oFsm.pDownloadManager.getDownloadImageBuffer(ctx, oFsm.pImageDsc.Name, oFsm.pImageDsc.LocalDir)
+ imageBuffer, err = oFsm.pDownloadManager.getDownloadImageBuffer(ctx, oFsm.pImageDsc.Name, oFsm.pImageDsc.LocalDir)
}
if err != nil {
oFsm.volthaDownloadReason = voltha.ImageState_UNKNOWN_ERROR //something like 'LOCAL_FILE_ERROR' would be better (proto)
@@ -656,12 +656,17 @@
}(pBaseFsm)
return
}
+ //provide slice capacity already with the reserve of one section to avoid inflation of the slice to double size at append
+ oFsm.imageBuffer = make([]byte, fileLen, fileLen+cOmciDownloadSectionSize)
+ //better use a copy of the read image buffer in case the buffer/file is modified from outside,
+ // this also limits the slice len to the expected maximum fileLen
+ copy(oFsm.imageBuffer, imageBuffer)
oFsm.noOfSections = uint32(fileLen / cOmciDownloadSectionSize)
if fileLen%cOmciDownloadSectionSize > 0 {
bufferPadding := make([]byte, cOmciDownloadSectionSize-uint32((fileLen)%cOmciDownloadSectionSize))
//expand the imageBuffer to exactly fit multiples of cOmciDownloadSectionSize with padding
- oFsm.imageBuffer = append(oFsm.imageBuffer[:(fileLen)], bufferPadding...)
+ oFsm.imageBuffer = append(oFsm.imageBuffer, bufferPadding...)
oFsm.noOfSections++
}
oFsm.origImageLength = uint32(fileLen)
diff --git a/internal/pkg/onuadaptercore/openonu.go b/internal/pkg/onuadaptercore/openonu.go
index ef74c68..61f3c03 100644
--- a/internal/pkg/onuadaptercore/openonu.go
+++ b/internal/pkg/onuadaptercore/openonu.go
@@ -633,7 +633,7 @@
if request != nil && len((*request).DeviceId) > 0 && (*request).Image.Version != "" {
loResponse := voltha.DeviceImageResponse{}
imageIdentifier := (*request).Image.Version
- downloadedToAdapter := false
+ downloadStartDone := false
firstDevice := true
var vendorID string
var onuVolthaDevice *voltha.Device
@@ -666,28 +666,24 @@
if firstDevice {
//start/verify download of the image to the adapter based on first found device only
// use the OnuVendor identification from first given device
+ // note: if the request was done for a list of devices on the Voltha interface, rwCore
+ // translates that into a new rpc for each device, hence each device will be the first device in parallel requests!
firstDevice = false
vendorID = onuVolthaDevice.VendorId
imageIdentifier = vendorID + imageIdentifier //head on vendor ID of the ONU
- logger.Debugw(ctx, "download request for file", log.Fields{"image-id": imageIdentifier})
-
- if !oo.pFileManager.ImageExists(ctx, imageIdentifier) {
- logger.Debugw(ctx, "start image download", log.Fields{"image-description": request})
- // Download_image is not supposed to be blocking, anyway let's call the DownloadManager still synchronously to detect 'fast' problems
- // the download itself is later done in background
- if err := oo.pFileManager.StartDownload(ctx, imageIdentifier, (*request).Image.Url); err == nil {
- downloadedToAdapter = true
- }
- //else: treat any error here as 'INVALID_URL' (even though it might as well be some issue on local FS, eg. 'INSUFFICIENT_SPACE')
- // otherwise a more sophisticated error evaluation is needed
- } else {
- // image already exists
- downloadedToAdapter = true
- logger.Debugw(ctx, "image already downloaded", log.Fields{"image-description": imageIdentifier})
+ logger.Infow(ctx, "download request for file",
+ log.Fields{"device-id": loDeviceID, "image-id": imageIdentifier})
+ // call the StartDownload synchronously to detect 'immediate' download problems
+ // the real download itself is later done in background
+ if fileState, err := oo.pFileManager.StartDownload(ctx, imageIdentifier, (*request).Image.Url); err == nil {
// note: If the image (with vendorId+name) has already been downloaded before from some other
- // valid URL, the current URL is just ignored. If the operators want to ensure that the new URL
+ // valid URL, the current download request is not executed (current code delivers URL error).
+ // If the operators want to ensure that the new URL
// is really used, then they first have to use the 'abort' API to remove the existing image!
// (abort API can be used also after some successful download to just remove the image from adapter)
+ if fileState == cFileStateDlSucceeded || fileState == cFileStateDlStarted {
+ downloadStartDone = true
+ } //else fileState may also indicate error situation, where the requested image is not ready to be used for other devices
}
} else {
//for all following devices verify the matching vendorID
@@ -697,11 +693,11 @@
vendorIDMatch = false
}
}
- if downloadedToAdapter && vendorIDMatch {
+ if downloadStartDone && vendorIDMatch {
// start the ONU download activity for each possible device
- logger.Debugw(ctx, "image download on omci requested", log.Fields{
+ logger.Infow(ctx, "request image download to ONU on omci ", log.Fields{
"image-id": imageIdentifier, "device-id": loDeviceID})
- //onu upgrade handling called in background without immediate error evaluation here
+ // onu upgrade handling called in background without immediate error evaluation here
// as the processing can be done for multiple ONU's and an error on one ONU should not stop processing for others
// state/progress/success of the request has to be verified using the Get_onu_image_status() API
go handler.onuSwUpgradeAfterDownload(ctx, request, oo.pFileManager, imageIdentifier)
@@ -710,7 +706,9 @@
loDeviceImageState.ImageState.ImageState = voltha.ImageState_IMAGE_UNKNOWN
} else {
loDeviceImageState.ImageState.DownloadState = voltha.ImageState_DOWNLOAD_FAILED
- if !downloadedToAdapter {
+ if !downloadStartDone {
+ // based on above fileState more descriptive error codes would be possible, e.g
+ // IMAGE_EXISTS_WITH_DIFFERENT_URL - would require proto buf update
loDeviceImageState.ImageState.Reason = voltha.ImageState_INVALID_URL
} else { //only logical option is !vendorIDMatch
loDeviceImageState.ImageState.Reason = voltha.ImageState_VENDOR_DEVICE_MISMATCH