[VOL-3956] openonuAdapterGo - crash at mibUpload processing on unexpected GetResponse from ONU

Signed-off-by: mpagenko <michael.pagenkopf@adtran.com>
Change-Id: I88abe4707bc2467cc9f72e37fa9a621e511c0c1c
diff --git a/VERSION b/VERSION
index 46f5106..08a1262 100755
--- a/VERSION
+++ b/VERSION
@@ -1 +1 @@
-1.2.7-dev187
+1.2.7-dev188
diff --git a/internal/pkg/onuadaptercore/mib_sync.go b/internal/pkg/onuadaptercore/mib_sync.go
index a9f095e..c4f8d62 100644
--- a/internal/pkg/onuadaptercore/mib_sync.go
+++ b/internal/pkg/onuadaptercore/mib_sync.go
@@ -42,6 +42,12 @@
 	//"github.com/opencord/voltha-protos/v4/go/voltha"
 )
 
+type sLastTxMeParameter struct {
+	lastTxMessageType omci.MessageType
+	pLastTxMeInstance *me.ManagedEntity
+	repeatCount       uint8
+}
+
 var supportedClassIds = []me.ClassID{
 	me.CardholderClassID,                              // 5
 	me.CircuitPackClassID,                             // 6
@@ -81,6 +87,10 @@
 	logger.Debugw(ctx, "MibSync FSM", log.Fields{"send mibReset in State": e.FSM.Current(), "device-id": oo.deviceID})
 	_ = oo.PDevOmciCC.sendMibReset(log.WithSpanFromContext(context.TODO(), ctx), oo.pOpenOnuAc.omciTimeout, true)
 	//TODO: needs to handle timeouts
+	//even though lastTxParameters are currently not used for checking the ResetResponse message we have to ensure
+	//  that the lastTxMessageType is correctly set to avoid misinterpreting other responses
+	oo.lastTxParamStruct.lastTxMessageType = omci.MibResetRequestType
+	oo.lastTxParamStruct.repeatCount = 0
 }
 
 func (oo *OnuDeviceEntry) enterGettingVendorAndSerialState(ctx context.Context, e *fsm.Event) {
@@ -89,7 +99,8 @@
 	meInstance := oo.PDevOmciCC.sendGetMe(log.WithSpanFromContext(context.TODO(), ctx), me.OnuGClassID, onugMeID, requestedAttributes, oo.pOpenOnuAc.omciTimeout, true, oo.pMibUploadFsm.commChan)
 	//accept also nil as (error) return value for writing to LastTx
 	//  - this avoids misinterpretation of new received OMCI messages
-	oo.PDevOmciCC.pLastTxMeInstance = meInstance
+	oo.lastTxParamStruct.lastTxMessageType = omci.GetRequestType
+	oo.lastTxParamStruct.pLastTxMeInstance = meInstance
 }
 
 func (oo *OnuDeviceEntry) enterGettingEquipmentIDState(ctx context.Context, e *fsm.Event) {
@@ -98,7 +109,8 @@
 	meInstance := oo.PDevOmciCC.sendGetMe(log.WithSpanFromContext(context.TODO(), ctx), me.Onu2GClassID, onu2gMeID, requestedAttributes, oo.pOpenOnuAc.omciTimeout, true, oo.pMibUploadFsm.commChan)
 	//accept also nil as (error) return value for writing to LastTx
 	//  - this avoids misinterpretation of new received OMCI messages
-	oo.PDevOmciCC.pLastTxMeInstance = meInstance
+	oo.lastTxParamStruct.lastTxMessageType = omci.GetRequestType
+	oo.lastTxParamStruct.pLastTxMeInstance = meInstance
 }
 
 func (oo *OnuDeviceEntry) enterGettingFirstSwVersionState(ctx context.Context, e *fsm.Event) {
@@ -107,7 +119,8 @@
 	meInstance := oo.PDevOmciCC.sendGetMe(log.WithSpanFromContext(context.TODO(), ctx), me.SoftwareImageClassID, firstSwImageMeID, requestedAttributes, oo.pOpenOnuAc.omciTimeout, true, oo.pMibUploadFsm.commChan)
 	//accept also nil as (error) return value for writing to LastTx
 	//  - this avoids misinterpretation of new received OMCI messages
-	oo.PDevOmciCC.pLastTxMeInstance = meInstance
+	oo.lastTxParamStruct.lastTxMessageType = omci.GetRequestType
+	oo.lastTxParamStruct.pLastTxMeInstance = meInstance
 }
 
 func (oo *OnuDeviceEntry) enterGettingSecondSwVersionState(ctx context.Context, e *fsm.Event) {
@@ -116,7 +129,8 @@
 	meInstance := oo.PDevOmciCC.sendGetMe(log.WithSpanFromContext(context.TODO(), ctx), me.SoftwareImageClassID, secondSwImageMeID, requestedAttributes, oo.pOpenOnuAc.omciTimeout, true, oo.pMibUploadFsm.commChan)
 	//accept also nil as (error) return value for writing to LastTx
 	//  - this avoids misinterpretation of new received OMCI messages
-	oo.PDevOmciCC.pLastTxMeInstance = meInstance
+	oo.lastTxParamStruct.lastTxMessageType = omci.GetRequestType
+	oo.lastTxParamStruct.pLastTxMeInstance = meInstance
 }
 
 func (oo *OnuDeviceEntry) enterGettingMacAddressState(ctx context.Context, e *fsm.Event) {
@@ -125,7 +139,8 @@
 	meInstance := oo.PDevOmciCC.sendGetMe(log.WithSpanFromContext(context.TODO(), ctx), me.IpHostConfigDataClassID, ipHostConfigDataMeID, requestedAttributes, oo.pOpenOnuAc.omciTimeout, true, oo.pMibUploadFsm.commChan)
 	//accept also nil as (error) return value for writing to LastTx
 	//  - this avoids misinterpretation of new received OMCI messages
-	oo.PDevOmciCC.pLastTxMeInstance = meInstance
+	oo.lastTxParamStruct.lastTxMessageType = omci.GetRequestType
+	oo.lastTxParamStruct.pLastTxMeInstance = meInstance
 }
 
 func (oo *OnuDeviceEntry) enterGettingMibTemplateState(ctx context.Context, e *fsm.Event) {
@@ -167,6 +182,9 @@
 func (oo *OnuDeviceEntry) enterUploadingState(ctx context.Context, e *fsm.Event) {
 	logger.Debugw(ctx, "MibSync FSM", log.Fields{"send MibUpload in State": e.FSM.Current(), "device-id": oo.deviceID})
 	_ = oo.PDevOmciCC.sendMibUpload(log.WithSpanFromContext(context.TODO(), ctx), oo.pOpenOnuAc.omciTimeout, true)
+	//even though lastTxParameters are currently not used for checking the ResetResponse message we have to ensure
+	//  that the lastTxMessageType is correctly set to avoid misinterpreting other responses
+	oo.lastTxParamStruct.lastTxMessageType = omci.MibUploadRequestType
 }
 
 func (oo *OnuDeviceEntry) enterUploadDoneState(ctx context.Context, e *fsm.Event) {
@@ -338,12 +356,24 @@
 			logger.Errorw(ctx, "Omci Msg layer could not be detected", log.Fields{"device-id": oo.deviceID})
 		}
 	} else {
-		logger.Errorw(ctx, "Wrong Omci MibResetResponse received", log.Fields{"in state ": oo.pMibUploadFsm.pFsm.Current,
-			"device-id": oo.deviceID})
+		//in case the last request was MdsGetRequest this issue may appear if the ONU was online before and has received the MIB reset
+		//  with Sequence number 0x8000 as last request before - so it may still respond to that
+		//  then we may force the ONU to react on the MdsGetRequest with a new message that uses an increased Sequence number
+		if oo.lastTxParamStruct.lastTxMessageType == omci.GetRequestType && oo.lastTxParamStruct.repeatCount == 0 {
+			logger.Debugw(ctx, "MibSync FSM - repeat MdsGetRequest (updated SequenceNumber)", log.Fields{"device-id": oo.deviceID})
+			requestedAttributes := me.AttributeValueMap{"MibDataSync": ""}
+			_ = oo.PDevOmciCC.sendGetMe(log.WithSpanFromContext(context.TODO(), ctx),
+				me.OnuDataClassID, onuDataMeID, requestedAttributes, oo.pOpenOnuAc.omciTimeout, true, oo.pMibUploadFsm.commChan)
+			//TODO: needs extra handling of timeouts
+			oo.lastTxParamStruct.repeatCount = 1
+			return
+		}
+		logger.Errorw(ctx, "unexpected MibResetResponse - ignoring", log.Fields{"device-id": oo.deviceID})
+		//perhaps some still lingering message from some prior activity, let's wait for the real response
+		return
 	}
 	logger.Info(ctx, "MibSync Msg", log.Fields{"Stopped handling of MibSyncChan for device-id": oo.deviceID})
 	_ = oo.pMibUploadFsm.pFsm.Event(ulEvStop)
-
 }
 
 func (oo *OnuDeviceEntry) handleOmciMibUploadResponseMessage(ctx context.Context, msg OmciMessage) {
@@ -362,6 +392,9 @@
 	oo.PDevOmciCC.uploadNoOfCmds = msgObj.NumberOfCommands
 	if oo.PDevOmciCC.uploadSequNo < oo.PDevOmciCC.uploadNoOfCmds {
 		_ = oo.PDevOmciCC.sendMibUploadNext(log.WithSpanFromContext(context.TODO(), ctx), oo.pOpenOnuAc.omciTimeout, true)
+		//even though lastTxParameters are currently not used for checking the ResetResponse message we have to ensure
+		//  that the lastTxMessageType is correctly set to avoid misinterpreting other responses
+		oo.lastTxParamStruct.lastTxMessageType = omci.MibUploadNextRequestType
 	} else {
 		logger.Errorw(ctx, "Invalid number of commands received for:", log.Fields{"device-id": oo.deviceID, "uploadNoOfCmds": oo.PDevOmciCC.uploadNoOfCmds})
 		//TODO right action?
@@ -395,6 +428,9 @@
 	}
 	if oo.PDevOmciCC.uploadSequNo < oo.PDevOmciCC.uploadNoOfCmds {
 		_ = oo.PDevOmciCC.sendMibUploadNext(log.WithSpanFromContext(context.TODO(), ctx), oo.pOpenOnuAc.omciTimeout, true)
+		//even though lastTxParameters are currently not used for checking the ResetResponse message we have to ensure
+		//  that the lastTxMessageType is correctly set to avoid misinterpreting other responses
+		oo.lastTxParamStruct.lastTxMessageType = omci.MibUploadNextRequestType
 	} else {
 		oo.pOnuDB.logMeDb(ctx)
 		err := oo.createAndPersistMibTemplate(ctx)
@@ -408,6 +444,23 @@
 
 func (oo *OnuDeviceEntry) handleOmciGetResponseMessage(ctx context.Context, msg OmciMessage) error {
 	var err error = nil
+
+	if oo.lastTxParamStruct.lastTxMessageType != omci.GetRequestType ||
+		oo.lastTxParamStruct.pLastTxMeInstance == nil {
+		//in case the last request was MibReset this issue may appear if the ONU was online before and has received the MDS GetRequest
+		//  with Sequence number 0x8000 as last request before - so it may still respond to that
+		//  then we may force the ONU to react on the MIB reset with a new message that uses an increased Sequence number
+		if oo.lastTxParamStruct.lastTxMessageType == omci.MibResetRequestType && oo.lastTxParamStruct.repeatCount == 0 {
+			logger.Debugw(ctx, "MibSync FSM - repeat mibReset (updated SequenceNumber)", log.Fields{"device-id": oo.deviceID})
+			_ = oo.PDevOmciCC.sendMibReset(log.WithSpanFromContext(context.TODO(), ctx), oo.pOpenOnuAc.omciTimeout, true)
+			//TODO: needs extra handling of timeouts
+			oo.lastTxParamStruct.repeatCount = 1
+			return nil
+		}
+		logger.Warnw(ctx, "unexpected GetResponse - ignoring", log.Fields{"device-id": oo.deviceID})
+		//perhaps some still lingering message from some prior activity, let's wait for the real response
+		return nil
+	}
 	msgLayer := (*msg.OmciPacket).Layer(omci.LayerTypeGetResponse)
 	if msgLayer == nil {
 		logger.Errorw(ctx, "omci Msg layer could not be detected for GetResponse - handling of MibSyncChan stopped", log.Fields{"device-id": oo.deviceID})
@@ -422,10 +475,10 @@
 	}
 	logger.Debugw(ctx, "MibSync FSM - GetResponse Data", log.Fields{"device-id": oo.deviceID, "data-fields": msgObj})
 	if msgObj.Result == me.Success {
-		entityID := oo.PDevOmciCC.pLastTxMeInstance.GetEntityID()
-		if msgObj.EntityClass == oo.PDevOmciCC.pLastTxMeInstance.GetClassID() && msgObj.EntityInstance == entityID {
+		entityID := oo.lastTxParamStruct.pLastTxMeInstance.GetEntityID()
+		if msgObj.EntityClass == oo.lastTxParamStruct.pLastTxMeInstance.GetClassID() && msgObj.EntityInstance == entityID {
 			meAttributes := msgObj.Attributes
-			meInstance := oo.PDevOmciCC.pLastTxMeInstance.GetName()
+			meInstance := oo.lastTxParamStruct.pLastTxMeInstance.GetName()
 			logger.Debugf(ctx, "MibSync FSM - GetResponse Data for %s", log.Fields{"device-id": oo.deviceID, "data-fields": msgObj}, meInstance)
 			switch meInstance {
 			case "OnuG":
@@ -589,9 +642,9 @@
 	logger.Debugf(ctx, "MibSync FSM - erroneous result in GetResponse Data: %s", log.Fields{"device-id": oo.deviceID, "data-fields": msgObj}, msgObj.Result)
 	// Up to now the following erroneous results have been seen for different ONU-types to indicate an unsupported ME
 	if msgObj.Result == me.UnknownInstance || msgObj.Result == me.UnknownEntity || msgObj.Result == me.ProcessingError || msgObj.Result == me.NotSupported {
-		entityID := oo.PDevOmciCC.pLastTxMeInstance.GetEntityID()
-		if msgObj.EntityClass == oo.PDevOmciCC.pLastTxMeInstance.GetClassID() && msgObj.EntityInstance == entityID {
-			meInstance := oo.PDevOmciCC.pLastTxMeInstance.GetName()
+		entityID := oo.lastTxParamStruct.pLastTxMeInstance.GetEntityID()
+		if msgObj.EntityClass == oo.lastTxParamStruct.pLastTxMeInstance.GetClassID() && msgObj.EntityInstance == entityID {
+			meInstance := oo.lastTxParamStruct.pLastTxMeInstance.GetName()
 			switch meInstance {
 			case "IpHostConfigData":
 				logger.Debugw(ctx, "MibSync FSM - erroneous result for IpHostConfigData received - ONU doesn't support ME - fill macAddress with zeros",
@@ -718,7 +771,9 @@
 		me.OnuDataClassID, onuDataMeID, requestedAttributes, oo.pOpenOnuAc.omciTimeout, true, oo.pMibUploadFsm.commChan)
 	//accept also nil as (error) return value for writing to LastTx
 	//  - this avoids misinterpretation of new received OMCI messages
-	oo.PDevOmciCC.pLastTxMeInstance = meInstance
+	oo.lastTxParamStruct.lastTxMessageType = omci.GetRequestType
+	oo.lastTxParamStruct.pLastTxMeInstance = meInstance
+	oo.lastTxParamStruct.repeatCount = 0
 }
 
 func (oo *OnuDeviceEntry) checkMdsValue(ctx context.Context, mibDataSyncOnu uint8) {
diff --git a/internal/pkg/onuadaptercore/onu_device_entry.go b/internal/pkg/onuadaptercore/onu_device_entry.go
index a494418..7f9cfda 100644
--- a/internal/pkg/onuadaptercore/onu_device_entry.go
+++ b/internal/pkg/onuadaptercore/onu_device_entry.go
@@ -289,7 +289,8 @@
 	//mibNextDbResync uint32
 
 	// for mibUpload
-	pMibUploadFsm *AdapterFsm //could be handled dynamically and more general as pAdapterFsm - perhaps later
+	pMibUploadFsm     *AdapterFsm //could be handled dynamically and more general as pAdapterFsm - perhaps later
+	lastTxParamStruct sLastTxMeParameter
 	// for mibDownload
 	pMibDownloadFsm *AdapterFsm //could be handled dynamically and more general as pAdapterFsm - perhaps later
 	//remark: general usage of pAdapterFsm would require generalization of commChan  usage and internal event setting