[VOL-4771] Error deleting device via voltctl when OLT is unreachable

Change-Id: I4cae0625a78a61b470db47b0339e078108d108c9
diff --git a/internal/pkg/core/device_handler.go b/internal/pkg/core/device_handler.go
index dadd316..0686fa3 100644
--- a/internal/pkg/core/device_handler.go
+++ b/internal/pkg/core/device_handler.go
@@ -213,8 +213,7 @@
 	dh.perPonOnuIndicationChannel = make(map[uint32]onuIndicationChannels)
 	dh.childAdapterClients = make(map[string]*vgrpc.Client)
 	dh.cfg = cfg
-	kvStoreDevicePath := fmt.Sprintf(dh.cm.Backend.PathPrefix, "/%s/", dh.device.Id)
-	dh.kvStore = SetKVClient(ctx, dh.openOLT.KVStoreType, dh.openOLT.KVStoreAddress, dh.device.Id, kvStoreDevicePath)
+	dh.kvStore = SetKVClient(ctx, dh.openOLT.KVStoreType, dh.openOLT.KVStoreAddress, dh.device.Id, dh.cm.Backend.PathPrefix)
 	if dh.kvStore == nil {
 		logger.Error(ctx, "Failed to setup KV store")
 		return nil
@@ -254,7 +253,6 @@
 		logger.Fatalw(ctx, "Failed to init KV client\n", log.Fields{"err": err})
 		return nil
 	}
-
 	kvbackend := &db.Backend{
 		Client:     kvClient,
 		StoreType:  backend,
@@ -2170,6 +2168,7 @@
 func (dh *DeviceHandler) clearUNIData(ctx context.Context, onu *rsrcMgr.OnuGemInfo) error {
 	var uniID uint32
 	var err error
+	var errs []error
 	for _, port := range onu.UniPorts {
 		uniID = plt.UniIDFromPortNum(port)
 		logger.Debugw(ctx, "clearing-resource-data-for-uni-port", log.Fields{"port": port, "uni-id": uniID})
@@ -2179,6 +2178,7 @@
 		} else {
 			if err = dh.flowMgr[onu.IntfID].DeleteTechProfileInstances(ctx, onu.IntfID, onu.OnuID, uniID); err != nil {
 				logger.Debugw(ctx, "failed-to-remove-tech-profile-instance-for-onu", log.Fields{"onu-id": onu.OnuID})
+				errs = append(errs, err)
 			}
 		}
 		logger.Debugw(ctx, "deleted-tech-profile-instance-for-onu", log.Fields{"onu-id": onu.OnuID})
@@ -2186,22 +2186,30 @@
 		for _, tpID := range tpIDList {
 			if err = dh.resourceMgr[onu.IntfID].RemoveMeterInfoForOnu(ctx, "upstream", onu.OnuID, uniID, tpID); err != nil {
 				logger.Debugw(ctx, "failed-to-remove-meter-id-for-onu-upstream", log.Fields{"onu-id": onu.OnuID})
+				errs = append(errs, err)
 			}
 			logger.Debugw(ctx, "removed-meter-id-for-onu-upstream", log.Fields{"onu-id": onu.OnuID})
 			if err = dh.resourceMgr[onu.IntfID].RemoveMeterInfoForOnu(ctx, "downstream", onu.OnuID, uniID, tpID); err != nil {
 				logger.Debugw(ctx, "failed-to-remove-meter-id-for-onu-downstream", log.Fields{"onu-id": onu.OnuID})
+				errs = append(errs, err)
 			}
 			logger.Debugw(ctx, "removed-meter-id-for-onu-downstream", log.Fields{"onu-id": onu.OnuID})
 		}
 		dh.resourceMgr[onu.IntfID].FreePONResourcesForONU(ctx, onu.OnuID, uniID)
 		if err = dh.resourceMgr[onu.IntfID].RemoveTechProfileIDsForOnu(ctx, onu.OnuID, uniID); err != nil {
 			logger.Debugw(ctx, "failed-to-remove-tech-profile-id-for-onu", log.Fields{"onu-id": onu.OnuID})
+			errs = append(errs, err)
 		}
 		logger.Debugw(ctx, "removed-tech-profile-id-for-onu", log.Fields{"onu-id": onu.OnuID})
 		if err = dh.resourceMgr[onu.IntfID].DeletePacketInGemPortForOnu(ctx, onu.OnuID, port); err != nil {
 			logger.Debugw(ctx, "failed-to-remove-gemport-pkt-in", log.Fields{"intfid": onu.IntfID, "onuid": onu.OnuID, "uniId": uniID})
+			errs = append(errs, err)
 		}
 	}
+	if len(errs) > 0 {
+		return olterrors.NewErrAdapter(fmt.Errorf("one-or-more-error-during-clear-uni-data, errors:%v",
+			errs).Error(), log.Fields{"device-id": dh.device.Id}, nil)
+	}
 	return nil
 }
 
@@ -2217,8 +2225,12 @@
 
 	dh.StopAllFlowRoutines(ctx)
 
-	dh.cleanupDeviceResources(ctx)
-	logger.Debugw(ctx, "removed-device-from-Resource-manager-KV-store", log.Fields{"device-id": dh.device.Id})
+	err := dh.cleanupDeviceResources(ctx)
+	if err != nil {
+		logger.Errorw(ctx, "could-not-remove-device-from-KV-store", log.Fields{"device-id": dh.device.Id, "err": err})
+	} else {
+		logger.Debugw(ctx, "successfully-removed-device-from-Resource-manager-KV-store", log.Fields{"device-id": dh.device.Id})
+	}
 
 	dh.lockDevice.RLock()
 	// Stop the Stats collector
@@ -2238,7 +2250,13 @@
 	//Reset the state
 	if dh.Client != nil {
 		if _, err := dh.Client.Reboot(ctx, new(oop.Empty)); err != nil {
-			return olterrors.NewErrAdapter("olt-reboot-failed", log.Fields{"device-id": dh.device.Id}, err).Log()
+			go func() {
+				failureReason := fmt.Sprintf("Failed to reboot during device delete request with error: %s", err.Error())
+				if err = dh.eventMgr.oltRebootFailedEvent(ctx, dh.device.Id, failureReason, time.Now().Unix()); err != nil {
+					logger.Errorw(ctx, "on-olt-reboot-failed", log.Fields{"device-id": dh.device.Id, "err": err})
+				}
+			}()
+			logger.Errorw(ctx, "olt-reboot-failed", log.Fields{"device-id": dh.device.Id, "err": err})
 		}
 	}
 	// There is no need to update the core about operation status and connection status of the OLT.
@@ -2248,7 +2266,7 @@
 
 	// Stop the adapter grpc clients for that parent device
 	dh.deleteAdapterClients(ctx)
-	return nil
+	return err
 }
 
 // StopAllFlowRoutines stops all flow routines
@@ -2269,44 +2287,43 @@
 	}
 }
 
-func (dh *DeviceHandler) cleanupDeviceResources(ctx context.Context) {
-
+func (dh *DeviceHandler) cleanupDeviceResources(ctx context.Context) error {
+	var errs []error
 	if dh.resourceMgr != nil {
 		var ponPort uint32
 		for ponPort = 0; ponPort < dh.totalPonPorts; ponPort++ {
-			var err error
 			onuGemData := dh.resourceMgr[ponPort].GetOnuGemInfoList(ctx)
 			for i, onu := range onuGemData {
 				logger.Debugw(ctx, "onu-data", log.Fields{"onu": onu})
-				if err = dh.clearUNIData(ctx, &onuGemData[i]); err != nil {
-					logger.Errorw(ctx, "failed-to-clear-data-for-onu", log.Fields{"onu-device": onu})
+				if err := dh.clearUNIData(ctx, &onuGemData[i]); err != nil {
+					errs = append(errs, err)
 				}
 			}
-			_ = dh.resourceMgr[ponPort].DeleteAllFlowIDsForGemForIntf(ctx)
-			_ = dh.resourceMgr[ponPort].DeleteAllOnuGemInfoForIntf(ctx)
-			dh.resourceMgr[ponPort].DeleteMcastQueueForIntf(ctx)
+			if err := dh.resourceMgr[ponPort].DeleteAllFlowIDsForGemForIntf(ctx); err != nil {
+				errs = append(errs, err)
+			}
+			if err := dh.resourceMgr[ponPort].DeleteAllOnuGemInfoForIntf(ctx); err != nil {
+				errs = append(errs, err)
+			}
+			if err := dh.resourceMgr[ponPort].DeleteMcastQueueForIntf(ctx); err != nil {
+				errs = append(errs, err)
+			}
 			if err := dh.resourceMgr[ponPort].Delete(ctx, ponPort); err != nil {
-				logger.Debug(ctx, err)
+				errs = append(errs, err)
 			}
 		}
-		// Clean up NNI manager's data
-		_ = dh.resourceMgr[dh.totalPonPorts].DeleteAllFlowIDsForGemForIntf(ctx)
+	}
+	// Clean up NNI manager's data
+	if err := dh.resourceMgr[dh.totalPonPorts].DeleteAllFlowIDsForGemForIntf(ctx); err != nil {
+		errs = append(errs, err)
 	}
 
 	dh.CloseKVClient(ctx)
 
 	// Take one final sweep at cleaning up KV store for the OLT device
 	// Clean everything at <base-path-prefix>/openolt/<device-id>
-	kvClient, err := kvstore.NewEtcdClient(ctx, dh.openOLT.KVStoreAddress, rsrcMgr.KvstoreTimeout, log.FatalLevel)
-	if err == nil {
-		kvBackend := &db.Backend{
-			Client:     kvClient,
-			StoreType:  dh.openOLT.KVStoreType,
-			Address:    dh.openOLT.KVStoreAddress,
-			Timeout:    rsrcMgr.KvstoreTimeout,
-			PathPrefix: fmt.Sprintf(rsrcMgr.BasePathKvStore, dh.cm.Backend.PathPrefix, dh.device.Id)}
-		_ = kvBackend.DeleteWithPrefix(ctx, "")
-		kvBackend.Client.Close(ctx)
+	if err := dh.kvStore.DeleteWithPrefix(ctx, ""); err != nil {
+		errs = append(errs, err)
 	}
 
 	/*Delete ONU map for the device*/
@@ -2320,6 +2337,11 @@
 		dh.discOnus.Delete(key)
 		return true
 	})
+	if len(errs) > 0 {
+		return olterrors.NewErrAdapter(fmt.Errorf("one-or-more-error-during-device-delete, errors:%v",
+			errs).Error(), log.Fields{"device-id": dh.device.Id}, nil)
+	}
+	return nil
 }
 
 // RebootDevice reboots the given device
@@ -2691,8 +2713,11 @@
 	dh.device = cloned // update local copy of the device
 	go dh.eventMgr.oltCommunicationEvent(ctx, cloned, raisedTs)
 
-	dh.cleanupDeviceResources(ctx)
-	logger.Debugw(ctx, "removed-device-from-Resource-manager-KV-store", log.Fields{"device-id": dh.device.Id})
+	if err := dh.cleanupDeviceResources(ctx); err != nil {
+		logger.Errorw(ctx, "failure-in-cleanup-device-resources", log.Fields{"device-id": dh.device.Id, "err": err})
+	} else {
+		logger.Debugw(ctx, "removed-device-from-Resource-manager-KV-store", log.Fields{"device-id": dh.device.Id})
+	}
 
 	dh.lockDevice.RLock()
 	// Stop the Stats collector
diff --git a/internal/pkg/core/openolt.go b/internal/pkg/core/openolt.go
index 914b27d..8a41a01 100644
--- a/internal/pkg/core/openolt.go
+++ b/internal/pkg/core/openolt.go
@@ -242,7 +242,7 @@
 	logger.Infow(ctx, "delete-device", log.Fields{"device-id": device.Id})
 	if handler := oo.getDeviceHandler(device.Id); handler != nil {
 		if err := handler.DeleteDevice(log.WithSpanFromContext(context.Background(), ctx), device); err != nil {
-			logger.Errorw(ctx, "failed-to-handle-delete-device", log.Fields{"device-id": device.Id})
+			return nil, err
 		}
 		oo.deleteDeviceHandlerToMap(handler)
 		return &empty.Empty{}, nil
diff --git a/internal/pkg/core/openolt_eventmgr.go b/internal/pkg/core/openolt_eventmgr.go
index f39dcf7..6ee538b 100644
--- a/internal/pkg/core/openolt_eventmgr.go
+++ b/internal/pkg/core/openolt_eventmgr.go
@@ -41,6 +41,7 @@
 	onuLopcMissEvent                    = "ONU_LOPC_MISS"
 	onuLopcMicErrorEvent                = "ONU_LOPC_MIC_ERROR"
 	oltLosEvent                         = "OLT_LOSS_OF_SIGNAL"
+	oltRebootFailedEvent                = "OLT_REBOOT_FAILED"
 	oltCommFailure                      = "OLT_COMMUNICATION_FAILURE"
 	oltIndicationDown                   = "OLT_DOWN_INDICATION"
 	onuDyingGaspEvent                   = "ONU_DYING_GASP"
@@ -80,6 +81,8 @@
 	ContextOltAdminState = "admin-state"
 	// ContextOltConnectState is for the connect state of the Olt in the context of the event
 	ContextOltConnectState = "connect-state"
+	// ContextOltFailureReason is to report the reason of an operation failure in the Olt
+	ContextOltFailureReason = "failure-reason"
 	// ContextOltOperState is for the operational state of the Olt in the context of the event
 	ContextOltOperState = "oper-state"
 	// ContextOltVendor is for the Olt vendor in the context of the event
@@ -339,6 +342,20 @@
 	logger.Debugw(ctx, "olt-los-event-sent-to-kafka", log.Fields{"intf-id": ponIntdID})
 	return nil
 }
+func (em *OpenOltEventMgr) oltRebootFailedEvent(ctx context.Context, deviceID string, reason string, raisedTs int64) error {
+	de := voltha.DeviceEvent{
+		Context:         map[string]string{ContextOltFailureReason: "olt-reboot-failed"},
+		ResourceId:      deviceID,
+		DeviceEventName: fmt.Sprintf("%s_%s", oltRebootFailedEvent, "RAISE_EVENT")}
+	if err := em.eventProxy.SendDeviceEvent(ctx, &de, voltha.EventCategory_COMMUNICATION, voltha.EventSubCategory_OLT,
+		raisedTs); err != nil {
+		return olterrors.NewErrCommunication("send-olt-reboot-failed-event", log.Fields{
+			"device-id": deviceID, "raised-ts": raisedTs}, err)
+	}
+	logger.Debugw(ctx, "olt-reboot-failed-event-sent-to-kafka", log.Fields{
+		"device-id": deviceID, "raised-ts": raisedTs})
+	return nil
+}
 
 func (em *OpenOltEventMgr) populateContextWithSerialDeviceID(context map[string]string, intfID, onuID uint32) string {
 	var serialNumber = ""
diff --git a/internal/pkg/core/openolt_test.go b/internal/pkg/core/openolt_test.go
index 575ce64..776fef4 100644
--- a/internal/pkg/core/openolt_test.go
+++ b/internal/pkg/core/openolt_test.go
@@ -30,6 +30,8 @@
 
 	conf "github.com/opencord/voltha-lib-go/v7/pkg/config"
 	vgrpc "github.com/opencord/voltha-lib-go/v7/pkg/grpc"
+	"github.com/opencord/voltha-protos/v5/go/openolt"
+	"github.com/stretchr/testify/assert"
 
 	"github.com/opencord/voltha-lib-go/v7/pkg/events"
 	fu "github.com/opencord/voltha-lib-go/v7/pkg/flows"
@@ -205,7 +207,18 @@
 }
 
 func TestOpenOLT_DeleteDevice(t *testing.T) {
+	oo1 := testOltObject(&fields{})
+	oo2 := testOltObject(&fields{})
+	oo2.deviceHandlers = make(map[string]*DeviceHandler)
+	oo2.deviceHandlers[mockDevice().Id] = newMockDeviceHandler()
+	oo3 := testOltObject(&fields{})
+	oo3.deviceHandlers = make(map[string]*DeviceHandler)
+	oo3.deviceHandlers[mockDevice().Id] = newMockDeviceHandler()
+	_, err := oo3.deviceHandlers[mockDevice().Id].Client.Reboot(context.Background(), &openolt.Empty{})
+	assert.Nil(t, err)
+
 	type args struct {
+		oo     *OpenOLT
 		device *voltha.Device
 	}
 	tests := []struct {
@@ -214,13 +227,14 @@
 		args    args
 		wantErr error
 	}{
-		{"delete_device-1", &fields{}, args{mockDevice()},
+		{"delete_device-1", &fields{}, args{oo1, mockDevice()},
 			olterrors.NewErrNotFound("device-handler", log.Fields{"device-id": "olt"}, nil)},
+		{"delete_device-2", &fields{}, args{oo2, mockDevice()}, nil},
+		{"delete_device-3", &fields{}, args{oo3, mockDevice()}, nil},
 	}
 	for _, tt := range tests {
 		t.Run(tt.name, func(t *testing.T) {
-			oo := testOltObject(tt.fields)
-			if _, err := oo.DeleteDevice(context.Background(), tt.args.device); !reflect.DeepEqual(err, tt.wantErr) {
+			if _, err := tt.args.oo.DeleteDevice(context.Background(), tt.args.device); !reflect.DeepEqual(err, tt.wantErr) {
 				t.Errorf("Delete_device() error = %v, wantErr %v", err, tt.wantErr)
 			}
 		})