[VOL-4771] Error deleting device via voltctl when OLT is unreachable

Change-Id: I4cae0625a78a61b470db47b0339e078108d108c9
diff --git a/internal/pkg/core/device_handler.go b/internal/pkg/core/device_handler.go
index dadd316..0686fa3 100644
--- a/internal/pkg/core/device_handler.go
+++ b/internal/pkg/core/device_handler.go
@@ -213,8 +213,7 @@
 	dh.perPonOnuIndicationChannel = make(map[uint32]onuIndicationChannels)
 	dh.childAdapterClients = make(map[string]*vgrpc.Client)
 	dh.cfg = cfg
-	kvStoreDevicePath := fmt.Sprintf(dh.cm.Backend.PathPrefix, "/%s/", dh.device.Id)
-	dh.kvStore = SetKVClient(ctx, dh.openOLT.KVStoreType, dh.openOLT.KVStoreAddress, dh.device.Id, kvStoreDevicePath)
+	dh.kvStore = SetKVClient(ctx, dh.openOLT.KVStoreType, dh.openOLT.KVStoreAddress, dh.device.Id, dh.cm.Backend.PathPrefix)
 	if dh.kvStore == nil {
 		logger.Error(ctx, "Failed to setup KV store")
 		return nil
@@ -254,7 +253,6 @@
 		logger.Fatalw(ctx, "Failed to init KV client\n", log.Fields{"err": err})
 		return nil
 	}
-
 	kvbackend := &db.Backend{
 		Client:     kvClient,
 		StoreType:  backend,
@@ -2170,6 +2168,7 @@
 func (dh *DeviceHandler) clearUNIData(ctx context.Context, onu *rsrcMgr.OnuGemInfo) error {
 	var uniID uint32
 	var err error
+	var errs []error
 	for _, port := range onu.UniPorts {
 		uniID = plt.UniIDFromPortNum(port)
 		logger.Debugw(ctx, "clearing-resource-data-for-uni-port", log.Fields{"port": port, "uni-id": uniID})
@@ -2179,6 +2178,7 @@
 		} else {
 			if err = dh.flowMgr[onu.IntfID].DeleteTechProfileInstances(ctx, onu.IntfID, onu.OnuID, uniID); err != nil {
 				logger.Debugw(ctx, "failed-to-remove-tech-profile-instance-for-onu", log.Fields{"onu-id": onu.OnuID})
+				errs = append(errs, err)
 			}
 		}
 		logger.Debugw(ctx, "deleted-tech-profile-instance-for-onu", log.Fields{"onu-id": onu.OnuID})
@@ -2186,22 +2186,30 @@
 		for _, tpID := range tpIDList {
 			if err = dh.resourceMgr[onu.IntfID].RemoveMeterInfoForOnu(ctx, "upstream", onu.OnuID, uniID, tpID); err != nil {
 				logger.Debugw(ctx, "failed-to-remove-meter-id-for-onu-upstream", log.Fields{"onu-id": onu.OnuID})
+				errs = append(errs, err)
 			}
 			logger.Debugw(ctx, "removed-meter-id-for-onu-upstream", log.Fields{"onu-id": onu.OnuID})
 			if err = dh.resourceMgr[onu.IntfID].RemoveMeterInfoForOnu(ctx, "downstream", onu.OnuID, uniID, tpID); err != nil {
 				logger.Debugw(ctx, "failed-to-remove-meter-id-for-onu-downstream", log.Fields{"onu-id": onu.OnuID})
+				errs = append(errs, err)
 			}
 			logger.Debugw(ctx, "removed-meter-id-for-onu-downstream", log.Fields{"onu-id": onu.OnuID})
 		}
 		dh.resourceMgr[onu.IntfID].FreePONResourcesForONU(ctx, onu.OnuID, uniID)
 		if err = dh.resourceMgr[onu.IntfID].RemoveTechProfileIDsForOnu(ctx, onu.OnuID, uniID); err != nil {
 			logger.Debugw(ctx, "failed-to-remove-tech-profile-id-for-onu", log.Fields{"onu-id": onu.OnuID})
+			errs = append(errs, err)
 		}
 		logger.Debugw(ctx, "removed-tech-profile-id-for-onu", log.Fields{"onu-id": onu.OnuID})
 		if err = dh.resourceMgr[onu.IntfID].DeletePacketInGemPortForOnu(ctx, onu.OnuID, port); err != nil {
 			logger.Debugw(ctx, "failed-to-remove-gemport-pkt-in", log.Fields{"intfid": onu.IntfID, "onuid": onu.OnuID, "uniId": uniID})
+			errs = append(errs, err)
 		}
 	}
+	if len(errs) > 0 {
+		return olterrors.NewErrAdapter(fmt.Errorf("one-or-more-error-during-clear-uni-data, errors:%v",
+			errs).Error(), log.Fields{"device-id": dh.device.Id}, nil)
+	}
 	return nil
 }
 
@@ -2217,8 +2225,12 @@
 
 	dh.StopAllFlowRoutines(ctx)
 
-	dh.cleanupDeviceResources(ctx)
-	logger.Debugw(ctx, "removed-device-from-Resource-manager-KV-store", log.Fields{"device-id": dh.device.Id})
+	err := dh.cleanupDeviceResources(ctx)
+	if err != nil {
+		logger.Errorw(ctx, "could-not-remove-device-from-KV-store", log.Fields{"device-id": dh.device.Id, "err": err})
+	} else {
+		logger.Debugw(ctx, "successfully-removed-device-from-Resource-manager-KV-store", log.Fields{"device-id": dh.device.Id})
+	}
 
 	dh.lockDevice.RLock()
 	// Stop the Stats collector
@@ -2238,7 +2250,13 @@
 	//Reset the state
 	if dh.Client != nil {
 		if _, err := dh.Client.Reboot(ctx, new(oop.Empty)); err != nil {
-			return olterrors.NewErrAdapter("olt-reboot-failed", log.Fields{"device-id": dh.device.Id}, err).Log()
+			go func() {
+				failureReason := fmt.Sprintf("Failed to reboot during device delete request with error: %s", err.Error())
+				if err = dh.eventMgr.oltRebootFailedEvent(ctx, dh.device.Id, failureReason, time.Now().Unix()); err != nil {
+					logger.Errorw(ctx, "on-olt-reboot-failed", log.Fields{"device-id": dh.device.Id, "err": err})
+				}
+			}()
+			logger.Errorw(ctx, "olt-reboot-failed", log.Fields{"device-id": dh.device.Id, "err": err})
 		}
 	}
 	// There is no need to update the core about operation status and connection status of the OLT.
@@ -2248,7 +2266,7 @@
 
 	// Stop the adapter grpc clients for that parent device
 	dh.deleteAdapterClients(ctx)
-	return nil
+	return err
 }
 
 // StopAllFlowRoutines stops all flow routines
@@ -2269,44 +2287,43 @@
 	}
 }
 
-func (dh *DeviceHandler) cleanupDeviceResources(ctx context.Context) {
-
+func (dh *DeviceHandler) cleanupDeviceResources(ctx context.Context) error {
+	var errs []error
 	if dh.resourceMgr != nil {
 		var ponPort uint32
 		for ponPort = 0; ponPort < dh.totalPonPorts; ponPort++ {
-			var err error
 			onuGemData := dh.resourceMgr[ponPort].GetOnuGemInfoList(ctx)
 			for i, onu := range onuGemData {
 				logger.Debugw(ctx, "onu-data", log.Fields{"onu": onu})
-				if err = dh.clearUNIData(ctx, &onuGemData[i]); err != nil {
-					logger.Errorw(ctx, "failed-to-clear-data-for-onu", log.Fields{"onu-device": onu})
+				if err := dh.clearUNIData(ctx, &onuGemData[i]); err != nil {
+					errs = append(errs, err)
 				}
 			}
-			_ = dh.resourceMgr[ponPort].DeleteAllFlowIDsForGemForIntf(ctx)
-			_ = dh.resourceMgr[ponPort].DeleteAllOnuGemInfoForIntf(ctx)
-			dh.resourceMgr[ponPort].DeleteMcastQueueForIntf(ctx)
+			if err := dh.resourceMgr[ponPort].DeleteAllFlowIDsForGemForIntf(ctx); err != nil {
+				errs = append(errs, err)
+			}
+			if err := dh.resourceMgr[ponPort].DeleteAllOnuGemInfoForIntf(ctx); err != nil {
+				errs = append(errs, err)
+			}
+			if err := dh.resourceMgr[ponPort].DeleteMcastQueueForIntf(ctx); err != nil {
+				errs = append(errs, err)
+			}
 			if err := dh.resourceMgr[ponPort].Delete(ctx, ponPort); err != nil {
-				logger.Debug(ctx, err)
+				errs = append(errs, err)
 			}
 		}
-		// Clean up NNI manager's data
-		_ = dh.resourceMgr[dh.totalPonPorts].DeleteAllFlowIDsForGemForIntf(ctx)
+	}
+	// Clean up NNI manager's data
+	if err := dh.resourceMgr[dh.totalPonPorts].DeleteAllFlowIDsForGemForIntf(ctx); err != nil {
+		errs = append(errs, err)
 	}
 
 	dh.CloseKVClient(ctx)
 
 	// Take one final sweep at cleaning up KV store for the OLT device
 	// Clean everything at <base-path-prefix>/openolt/<device-id>
-	kvClient, err := kvstore.NewEtcdClient(ctx, dh.openOLT.KVStoreAddress, rsrcMgr.KvstoreTimeout, log.FatalLevel)
-	if err == nil {
-		kvBackend := &db.Backend{
-			Client:     kvClient,
-			StoreType:  dh.openOLT.KVStoreType,
-			Address:    dh.openOLT.KVStoreAddress,
-			Timeout:    rsrcMgr.KvstoreTimeout,
-			PathPrefix: fmt.Sprintf(rsrcMgr.BasePathKvStore, dh.cm.Backend.PathPrefix, dh.device.Id)}
-		_ = kvBackend.DeleteWithPrefix(ctx, "")
-		kvBackend.Client.Close(ctx)
+	if err := dh.kvStore.DeleteWithPrefix(ctx, ""); err != nil {
+		errs = append(errs, err)
 	}
 
 	/*Delete ONU map for the device*/
@@ -2320,6 +2337,11 @@
 		dh.discOnus.Delete(key)
 		return true
 	})
+	if len(errs) > 0 {
+		return olterrors.NewErrAdapter(fmt.Errorf("one-or-more-error-during-device-delete, errors:%v",
+			errs).Error(), log.Fields{"device-id": dh.device.Id}, nil)
+	}
+	return nil
 }
 
 // RebootDevice reboots the given device
@@ -2691,8 +2713,11 @@
 	dh.device = cloned // update local copy of the device
 	go dh.eventMgr.oltCommunicationEvent(ctx, cloned, raisedTs)
 
-	dh.cleanupDeviceResources(ctx)
-	logger.Debugw(ctx, "removed-device-from-Resource-manager-KV-store", log.Fields{"device-id": dh.device.Id})
+	if err := dh.cleanupDeviceResources(ctx); err != nil {
+		logger.Errorw(ctx, "failure-in-cleanup-device-resources", log.Fields{"device-id": dh.device.Id, "err": err})
+	} else {
+		logger.Debugw(ctx, "removed-device-from-Resource-manager-KV-store", log.Fields{"device-id": dh.device.Id})
+	}
 
 	dh.lockDevice.RLock()
 	// Stop the Stats collector