[VOL-4771] Error deleting device via voltctl when OLT is unreachable
Change-Id: I4cae0625a78a61b470db47b0339e078108d108c9
diff --git a/internal/pkg/core/device_handler.go b/internal/pkg/core/device_handler.go
index dadd316..0686fa3 100644
--- a/internal/pkg/core/device_handler.go
+++ b/internal/pkg/core/device_handler.go
@@ -213,8 +213,7 @@
dh.perPonOnuIndicationChannel = make(map[uint32]onuIndicationChannels)
dh.childAdapterClients = make(map[string]*vgrpc.Client)
dh.cfg = cfg
- kvStoreDevicePath := fmt.Sprintf(dh.cm.Backend.PathPrefix, "/%s/", dh.device.Id)
- dh.kvStore = SetKVClient(ctx, dh.openOLT.KVStoreType, dh.openOLT.KVStoreAddress, dh.device.Id, kvStoreDevicePath)
+ dh.kvStore = SetKVClient(ctx, dh.openOLT.KVStoreType, dh.openOLT.KVStoreAddress, dh.device.Id, dh.cm.Backend.PathPrefix)
if dh.kvStore == nil {
logger.Error(ctx, "Failed to setup KV store")
return nil
@@ -254,7 +253,6 @@
logger.Fatalw(ctx, "Failed to init KV client\n", log.Fields{"err": err})
return nil
}
-
kvbackend := &db.Backend{
Client: kvClient,
StoreType: backend,
@@ -2170,6 +2168,7 @@
func (dh *DeviceHandler) clearUNIData(ctx context.Context, onu *rsrcMgr.OnuGemInfo) error {
var uniID uint32
var err error
+ var errs []error
for _, port := range onu.UniPorts {
uniID = plt.UniIDFromPortNum(port)
logger.Debugw(ctx, "clearing-resource-data-for-uni-port", log.Fields{"port": port, "uni-id": uniID})
@@ -2179,6 +2178,7 @@
} else {
if err = dh.flowMgr[onu.IntfID].DeleteTechProfileInstances(ctx, onu.IntfID, onu.OnuID, uniID); err != nil {
logger.Debugw(ctx, "failed-to-remove-tech-profile-instance-for-onu", log.Fields{"onu-id": onu.OnuID})
+ errs = append(errs, err)
}
}
logger.Debugw(ctx, "deleted-tech-profile-instance-for-onu", log.Fields{"onu-id": onu.OnuID})
@@ -2186,22 +2186,30 @@
for _, tpID := range tpIDList {
if err = dh.resourceMgr[onu.IntfID].RemoveMeterInfoForOnu(ctx, "upstream", onu.OnuID, uniID, tpID); err != nil {
logger.Debugw(ctx, "failed-to-remove-meter-id-for-onu-upstream", log.Fields{"onu-id": onu.OnuID})
+ errs = append(errs, err)
}
logger.Debugw(ctx, "removed-meter-id-for-onu-upstream", log.Fields{"onu-id": onu.OnuID})
if err = dh.resourceMgr[onu.IntfID].RemoveMeterInfoForOnu(ctx, "downstream", onu.OnuID, uniID, tpID); err != nil {
logger.Debugw(ctx, "failed-to-remove-meter-id-for-onu-downstream", log.Fields{"onu-id": onu.OnuID})
+ errs = append(errs, err)
}
logger.Debugw(ctx, "removed-meter-id-for-onu-downstream", log.Fields{"onu-id": onu.OnuID})
}
dh.resourceMgr[onu.IntfID].FreePONResourcesForONU(ctx, onu.OnuID, uniID)
if err = dh.resourceMgr[onu.IntfID].RemoveTechProfileIDsForOnu(ctx, onu.OnuID, uniID); err != nil {
logger.Debugw(ctx, "failed-to-remove-tech-profile-id-for-onu", log.Fields{"onu-id": onu.OnuID})
+ errs = append(errs, err)
}
logger.Debugw(ctx, "removed-tech-profile-id-for-onu", log.Fields{"onu-id": onu.OnuID})
if err = dh.resourceMgr[onu.IntfID].DeletePacketInGemPortForOnu(ctx, onu.OnuID, port); err != nil {
logger.Debugw(ctx, "failed-to-remove-gemport-pkt-in", log.Fields{"intfid": onu.IntfID, "onuid": onu.OnuID, "uniId": uniID})
+ errs = append(errs, err)
}
}
+ if len(errs) > 0 {
+ return olterrors.NewErrAdapter(fmt.Errorf("one-or-more-error-during-clear-uni-data, errors:%v",
+ errs).Error(), log.Fields{"device-id": dh.device.Id}, nil)
+ }
return nil
}
@@ -2217,8 +2225,12 @@
dh.StopAllFlowRoutines(ctx)
- dh.cleanupDeviceResources(ctx)
- logger.Debugw(ctx, "removed-device-from-Resource-manager-KV-store", log.Fields{"device-id": dh.device.Id})
+ err := dh.cleanupDeviceResources(ctx)
+ if err != nil {
+ logger.Errorw(ctx, "could-not-remove-device-from-KV-store", log.Fields{"device-id": dh.device.Id, "err": err})
+ } else {
+ logger.Debugw(ctx, "successfully-removed-device-from-Resource-manager-KV-store", log.Fields{"device-id": dh.device.Id})
+ }
dh.lockDevice.RLock()
// Stop the Stats collector
@@ -2238,7 +2250,13 @@
//Reset the state
if dh.Client != nil {
if _, err := dh.Client.Reboot(ctx, new(oop.Empty)); err != nil {
- return olterrors.NewErrAdapter("olt-reboot-failed", log.Fields{"device-id": dh.device.Id}, err).Log()
+ go func() {
+ failureReason := fmt.Sprintf("Failed to reboot during device delete request with error: %s", err.Error())
+ if err = dh.eventMgr.oltRebootFailedEvent(ctx, dh.device.Id, failureReason, time.Now().Unix()); err != nil {
+ logger.Errorw(ctx, "on-olt-reboot-failed", log.Fields{"device-id": dh.device.Id, "err": err})
+ }
+ }()
+ logger.Errorw(ctx, "olt-reboot-failed", log.Fields{"device-id": dh.device.Id, "err": err})
}
}
// There is no need to update the core about operation status and connection status of the OLT.
@@ -2248,7 +2266,7 @@
// Stop the adapter grpc clients for that parent device
dh.deleteAdapterClients(ctx)
- return nil
+ return err
}
// StopAllFlowRoutines stops all flow routines
@@ -2269,44 +2287,43 @@
}
}
-func (dh *DeviceHandler) cleanupDeviceResources(ctx context.Context) {
-
+func (dh *DeviceHandler) cleanupDeviceResources(ctx context.Context) error {
+ var errs []error
if dh.resourceMgr != nil {
var ponPort uint32
for ponPort = 0; ponPort < dh.totalPonPorts; ponPort++ {
- var err error
onuGemData := dh.resourceMgr[ponPort].GetOnuGemInfoList(ctx)
for i, onu := range onuGemData {
logger.Debugw(ctx, "onu-data", log.Fields{"onu": onu})
- if err = dh.clearUNIData(ctx, &onuGemData[i]); err != nil {
- logger.Errorw(ctx, "failed-to-clear-data-for-onu", log.Fields{"onu-device": onu})
+ if err := dh.clearUNIData(ctx, &onuGemData[i]); err != nil {
+ errs = append(errs, err)
}
}
- _ = dh.resourceMgr[ponPort].DeleteAllFlowIDsForGemForIntf(ctx)
- _ = dh.resourceMgr[ponPort].DeleteAllOnuGemInfoForIntf(ctx)
- dh.resourceMgr[ponPort].DeleteMcastQueueForIntf(ctx)
+ if err := dh.resourceMgr[ponPort].DeleteAllFlowIDsForGemForIntf(ctx); err != nil {
+ errs = append(errs, err)
+ }
+ if err := dh.resourceMgr[ponPort].DeleteAllOnuGemInfoForIntf(ctx); err != nil {
+ errs = append(errs, err)
+ }
+ if err := dh.resourceMgr[ponPort].DeleteMcastQueueForIntf(ctx); err != nil {
+ errs = append(errs, err)
+ }
if err := dh.resourceMgr[ponPort].Delete(ctx, ponPort); err != nil {
- logger.Debug(ctx, err)
+ errs = append(errs, err)
}
}
- // Clean up NNI manager's data
- _ = dh.resourceMgr[dh.totalPonPorts].DeleteAllFlowIDsForGemForIntf(ctx)
+ }
+ // Clean up NNI manager's data
+ if err := dh.resourceMgr[dh.totalPonPorts].DeleteAllFlowIDsForGemForIntf(ctx); err != nil {
+ errs = append(errs, err)
}
dh.CloseKVClient(ctx)
// Take one final sweep at cleaning up KV store for the OLT device
// Clean everything at <base-path-prefix>/openolt/<device-id>
- kvClient, err := kvstore.NewEtcdClient(ctx, dh.openOLT.KVStoreAddress, rsrcMgr.KvstoreTimeout, log.FatalLevel)
- if err == nil {
- kvBackend := &db.Backend{
- Client: kvClient,
- StoreType: dh.openOLT.KVStoreType,
- Address: dh.openOLT.KVStoreAddress,
- Timeout: rsrcMgr.KvstoreTimeout,
- PathPrefix: fmt.Sprintf(rsrcMgr.BasePathKvStore, dh.cm.Backend.PathPrefix, dh.device.Id)}
- _ = kvBackend.DeleteWithPrefix(ctx, "")
- kvBackend.Client.Close(ctx)
+ if err := dh.kvStore.DeleteWithPrefix(ctx, ""); err != nil {
+ errs = append(errs, err)
}
/*Delete ONU map for the device*/
@@ -2320,6 +2337,11 @@
dh.discOnus.Delete(key)
return true
})
+ if len(errs) > 0 {
+ return olterrors.NewErrAdapter(fmt.Errorf("one-or-more-error-during-device-delete, errors:%v",
+ errs).Error(), log.Fields{"device-id": dh.device.Id}, nil)
+ }
+ return nil
}
// RebootDevice reboots the given device
@@ -2691,8 +2713,11 @@
dh.device = cloned // update local copy of the device
go dh.eventMgr.oltCommunicationEvent(ctx, cloned, raisedTs)
- dh.cleanupDeviceResources(ctx)
- logger.Debugw(ctx, "removed-device-from-Resource-manager-KV-store", log.Fields{"device-id": dh.device.Id})
+ if err := dh.cleanupDeviceResources(ctx); err != nil {
+ logger.Errorw(ctx, "failure-in-cleanup-device-resources", log.Fields{"device-id": dh.device.Id, "err": err})
+ } else {
+ logger.Debugw(ctx, "removed-device-from-Resource-manager-KV-store", log.Fields{"device-id": dh.device.Id})
+ }
dh.lockDevice.RLock()
// Stop the Stats collector
diff --git a/internal/pkg/core/openolt.go b/internal/pkg/core/openolt.go
index 914b27d..8a41a01 100644
--- a/internal/pkg/core/openolt.go
+++ b/internal/pkg/core/openolt.go
@@ -242,7 +242,7 @@
logger.Infow(ctx, "delete-device", log.Fields{"device-id": device.Id})
if handler := oo.getDeviceHandler(device.Id); handler != nil {
if err := handler.DeleteDevice(log.WithSpanFromContext(context.Background(), ctx), device); err != nil {
- logger.Errorw(ctx, "failed-to-handle-delete-device", log.Fields{"device-id": device.Id})
+ return nil, err
}
oo.deleteDeviceHandlerToMap(handler)
return &empty.Empty{}, nil
diff --git a/internal/pkg/core/openolt_eventmgr.go b/internal/pkg/core/openolt_eventmgr.go
index f39dcf7..6ee538b 100644
--- a/internal/pkg/core/openolt_eventmgr.go
+++ b/internal/pkg/core/openolt_eventmgr.go
@@ -41,6 +41,7 @@
onuLopcMissEvent = "ONU_LOPC_MISS"
onuLopcMicErrorEvent = "ONU_LOPC_MIC_ERROR"
oltLosEvent = "OLT_LOSS_OF_SIGNAL"
+ oltRebootFailedEvent = "OLT_REBOOT_FAILED"
oltCommFailure = "OLT_COMMUNICATION_FAILURE"
oltIndicationDown = "OLT_DOWN_INDICATION"
onuDyingGaspEvent = "ONU_DYING_GASP"
@@ -80,6 +81,8 @@
ContextOltAdminState = "admin-state"
// ContextOltConnectState is for the connect state of the Olt in the context of the event
ContextOltConnectState = "connect-state"
+ // ContextOltFailureReason is to report the reason of an operation failure in the Olt
+ ContextOltFailureReason = "failure-reason"
// ContextOltOperState is for the operational state of the Olt in the context of the event
ContextOltOperState = "oper-state"
// ContextOltVendor is for the Olt vendor in the context of the event
@@ -339,6 +342,20 @@
logger.Debugw(ctx, "olt-los-event-sent-to-kafka", log.Fields{"intf-id": ponIntdID})
return nil
}
+func (em *OpenOltEventMgr) oltRebootFailedEvent(ctx context.Context, deviceID string, reason string, raisedTs int64) error {
+ de := voltha.DeviceEvent{
+ Context: map[string]string{ContextOltFailureReason: "olt-reboot-failed"},
+ ResourceId: deviceID,
+ DeviceEventName: fmt.Sprintf("%s_%s", oltRebootFailedEvent, "RAISE_EVENT")}
+ if err := em.eventProxy.SendDeviceEvent(ctx, &de, voltha.EventCategory_COMMUNICATION, voltha.EventSubCategory_OLT,
+ raisedTs); err != nil {
+ return olterrors.NewErrCommunication("send-olt-reboot-failed-event", log.Fields{
+ "device-id": deviceID, "raised-ts": raisedTs}, err)
+ }
+ logger.Debugw(ctx, "olt-reboot-failed-event-sent-to-kafka", log.Fields{
+ "device-id": deviceID, "raised-ts": raisedTs})
+ return nil
+}
func (em *OpenOltEventMgr) populateContextWithSerialDeviceID(context map[string]string, intfID, onuID uint32) string {
var serialNumber = ""
diff --git a/internal/pkg/core/openolt_test.go b/internal/pkg/core/openolt_test.go
index 575ce64..776fef4 100644
--- a/internal/pkg/core/openolt_test.go
+++ b/internal/pkg/core/openolt_test.go
@@ -30,6 +30,8 @@
conf "github.com/opencord/voltha-lib-go/v7/pkg/config"
vgrpc "github.com/opencord/voltha-lib-go/v7/pkg/grpc"
+ "github.com/opencord/voltha-protos/v5/go/openolt"
+ "github.com/stretchr/testify/assert"
"github.com/opencord/voltha-lib-go/v7/pkg/events"
fu "github.com/opencord/voltha-lib-go/v7/pkg/flows"
@@ -205,7 +207,18 @@
}
func TestOpenOLT_DeleteDevice(t *testing.T) {
+ oo1 := testOltObject(&fields{})
+ oo2 := testOltObject(&fields{})
+ oo2.deviceHandlers = make(map[string]*DeviceHandler)
+ oo2.deviceHandlers[mockDevice().Id] = newMockDeviceHandler()
+ oo3 := testOltObject(&fields{})
+ oo3.deviceHandlers = make(map[string]*DeviceHandler)
+ oo3.deviceHandlers[mockDevice().Id] = newMockDeviceHandler()
+ _, err := oo3.deviceHandlers[mockDevice().Id].Client.Reboot(context.Background(), &openolt.Empty{})
+ assert.Nil(t, err)
+
type args struct {
+ oo *OpenOLT
device *voltha.Device
}
tests := []struct {
@@ -214,13 +227,14 @@
args args
wantErr error
}{
- {"delete_device-1", &fields{}, args{mockDevice()},
+ {"delete_device-1", &fields{}, args{oo1, mockDevice()},
olterrors.NewErrNotFound("device-handler", log.Fields{"device-id": "olt"}, nil)},
+ {"delete_device-2", &fields{}, args{oo2, mockDevice()}, nil},
+ {"delete_device-3", &fields{}, args{oo3, mockDevice()}, nil},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
- oo := testOltObject(tt.fields)
- if _, err := oo.DeleteDevice(context.Background(), tt.args.device); !reflect.DeepEqual(err, tt.wantErr) {
+ if _, err := tt.args.oo.DeleteDevice(context.Background(), tt.args.device); !reflect.DeepEqual(err, tt.wantErr) {
t.Errorf("Delete_device() error = %v, wantErr %v", err, tt.wantErr)
}
})
diff --git a/internal/pkg/resourcemanager/resourcemanager.go b/internal/pkg/resourcemanager/resourcemanager.go
index 7e4044b..26a73ad 100755
--- a/internal/pkg/resourcemanager/resourcemanager.go
+++ b/internal/pkg/resourcemanager/resourcemanager.go
@@ -1404,14 +1404,15 @@
}
//DeleteMcastQueueForIntf deletes multicast queue info for the current pon interface from kvstore
-func (rsrcMgr *OpenOltResourceMgr) DeleteMcastQueueForIntf(ctx context.Context) {
+func (rsrcMgr *OpenOltResourceMgr) DeleteMcastQueueForIntf(ctx context.Context) error {
path := McastQueuesForIntf
if err := rsrcMgr.KVStore.Delete(ctx, path); err != nil {
logger.Errorw(ctx, "Failed to delete multicast queue info from kvstore", log.Fields{"err": err, "interfaceId": rsrcMgr.PonIntfID})
- return
+ return err
}
logger.Debugw(ctx, "deleted multicast queue info from KV store successfully", log.Fields{"interfaceId": rsrcMgr.PonIntfID})
+ return nil
}
//AddFlowGroupToKVStore adds flow group into KV store
diff --git a/internal/pkg/resourcemanager/resourcemanager_test.go b/internal/pkg/resourcemanager/resourcemanager_test.go
index 84b00e8..b1ba8ef 100644
--- a/internal/pkg/resourcemanager/resourcemanager_test.go
+++ b/internal/pkg/resourcemanager/resourcemanager_test.go
@@ -1016,7 +1016,7 @@
RsrcMgr := testResMgrObject(tt.fields)
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
defer cancel()
- RsrcMgr.DeleteMcastQueueForIntf(ctx)
+ _ = RsrcMgr.DeleteMcastQueueForIntf(ctx)
})
}
}