[VOL-1645] Fix for device agents creation
Change-Id: I1a17bd689e458088b42963d395abf6b96180f5b8
diff --git a/rw_core/core/device_agent.go b/rw_core/core/device_agent.go
index e7089cd..4e31ff8 100755
--- a/rw_core/core/device_agent.go
+++ b/rw_core/core/device_agent.go
@@ -33,6 +33,7 @@
type DeviceAgent struct {
deviceId string
+ parentId string
deviceType string
isRootdevice bool
lastData *voltha.Device
@@ -65,6 +66,7 @@
}
agent.isRootdevice = device.Root
agent.deviceId = cloned.Id
+ agent.parentId = device.ParentId
agent.deviceType = cloned.Type
agent.lastData = cloned
agent.deviceMgr = deviceMgr
diff --git a/rw_core/core/device_manager.go b/rw_core/core/device_manager.go
index eff6fe8..a96166d 100755
--- a/rw_core/core/device_manager.go
+++ b/rw_core/core/device_manager.go
@@ -206,18 +206,37 @@
sendResponse(ctx, ch, res)
}
+// stopManagingDevice stops the management of the device as well as any of its reference device and logical device.
+// This function is called only in the Core that does not own this device. In the Core that owns this device then a
+// deletion deletion also includes removal of any reference of this device.
+func (dMgr *DeviceManager) stopManagingDevice(id string) {
+ log.Infow("stopManagingDevice", log.Fields{"deviceId": id})
+ if dMgr.IsDeviceInCache(id) { // Proceed only if an agent is present for this device
+ if root, _ := dMgr.IsRootDevice(id); root == true {
+ // stop managing the logical device
+ ldeviceId := dMgr.logicalDeviceMgr.stopManagingLogicalDeviceWithDeviceId(id)
+ if ldeviceId != "" { // Can happen if logical device agent was already stopped
+ dMgr.core.deviceOwnership.AbandonDevice(ldeviceId)
+ }
+ // stop managing the child devices
+ childDeviceIds := dMgr.getAllDeviceIdsWithDeviceParentId(id)
+ for _, cId := range childDeviceIds {
+ dMgr.stopManagingDevice(cId)
+ }
+ }
+ if agent := dMgr.getDeviceAgent(id); agent != nil {
+ agent.stop(nil)
+ dMgr.deleteDeviceAgentToMap(agent)
+ // Abandon the device ownership
+ dMgr.core.deviceOwnership.AbandonDevice(id)
+ }
+ }
+}
+
func (dMgr *DeviceManager) RunPostDeviceDelete(cDevice *voltha.Device) error {
log.Infow("RunPostDeviceDelete", log.Fields{"deviceId": cDevice.Id})
- if agent := dMgr.getDeviceAgent(cDevice.Id); agent != nil {
- agent.stop(nil)
- dMgr.deleteDeviceAgentToMap(agent)
- if err := dMgr.core.deviceOwnership.AbandonDevice(cDevice.Id); err != nil {
- log.Errorw("failed-abandoning-device-ownership", log.Fields{"deviceId": cDevice.Id, "error": err})
- return err
- }
- return nil
- }
- return status.Errorf(codes.NotFound, "%s", cDevice.Id)
+ dMgr.stopManagingDevice(cDevice.Id)
+ return nil
}
// GetDevice will returns a device, either from memory or from the dB, if present
@@ -351,6 +370,7 @@
for _, device := range devices.([]interface{}) {
// If device is not in memory then set it up
if !dMgr.IsDeviceInCache(device.(*voltha.Device).Id) {
+ log.Debugw("loading-device-from-Model", log.Fields{"id": device.(*voltha.Device).Id})
agent := newDeviceAgent(dMgr.adapterProxy, device.(*voltha.Device), dMgr, dMgr.clusterDataProxy, dMgr.defaultTimeout)
if err := agent.start(nil, true); err != nil {
log.Warnw("failure-starting-agent", log.Fields{"deviceId": device.(*voltha.Device).Id})
@@ -362,9 +382,20 @@
result.Items = append(result.Items, device.(*voltha.Device))
}
}
+ log.Debugw("ListDevices-end", log.Fields{"len": len(result.Items)})
return result, nil
}
+//getDeviceFromModelretrieves the device data from the model.
+func (dMgr *DeviceManager) getDeviceFromModel(deviceId string) (*voltha.Device, error) {
+ if device := dMgr.clusterDataProxy.Get("/devices/"+deviceId, 0, false, ""); device != nil {
+ if d, ok := device.(*voltha.Device); ok {
+ return d, nil
+ }
+ }
+ return nil, status.Error(codes.NotFound, deviceId)
+}
+
// loadDevice loads the deviceId in memory, if not present
func (dMgr *DeviceManager) loadDevice(deviceId string) (*DeviceAgent, error) {
log.Debugw("loading-device", log.Fields{"deviceId": deviceId})
@@ -373,12 +404,17 @@
return nil, status.Error(codes.InvalidArgument, "deviceId empty")
}
if !dMgr.IsDeviceInCache(deviceId) {
- agent := newDeviceAgent(dMgr.adapterProxy, &voltha.Device{Id: deviceId}, dMgr, dMgr.clusterDataProxy, dMgr.defaultTimeout)
- if err := agent.start(nil, true); err != nil {
- agent.stop(nil)
- return nil, err
+ // Proceed with the loading only if the device exist in the Model (could have been deleted)
+ if device, err := dMgr.getDeviceFromModel(deviceId); err == nil {
+ agent := newDeviceAgent(dMgr.adapterProxy, device, dMgr, dMgr.clusterDataProxy, dMgr.defaultTimeout)
+ if err := agent.start(nil, true); err != nil {
+ agent.stop(nil)
+ return nil, err
+ }
+ dMgr.addDeviceAgentToMap(agent)
+ } else {
+ return nil, status.Error(codes.NotFound, deviceId)
}
- dMgr.addDeviceAgentToMap(agent)
}
if agent := dMgr.getDeviceAgent(deviceId); agent != nil {
return agent, nil
@@ -425,7 +461,6 @@
var dAgent *DeviceAgent
var err error
if dAgent, err = dMgr.loadDevice(deviceId); err != nil {
- log.Warnw("failure-loading-device", log.Fields{"deviceId": deviceId})
return err
}
// Get the loaded device details
@@ -475,17 +510,19 @@
if !dMgr.IsDeviceInCache(id.Id) {
// Device Id not in memory
log.Debugw("reconciling-device", log.Fields{"id": id.Id})
- // Load device from dB
- agent := newDeviceAgent(dMgr.adapterProxy, &voltha.Device{Id: id.Id}, dMgr, dMgr.clusterDataProxy, dMgr.defaultTimeout)
- if err := agent.start(nil, true); err != nil {
- log.Warnw("failure-loading-device", log.Fields{"deviceId": id.Id})
- agent.stop(nil)
+ // Proceed with the loading only if the device exist in the Model (could have been deleted)
+ if device, err := dMgr.getDeviceFromModel(id.Id); err == nil {
+ agent := newDeviceAgent(dMgr.adapterProxy, device, dMgr, dMgr.clusterDataProxy, dMgr.defaultTimeout)
+ if err := agent.start(nil, true); err != nil {
+ log.Warnw("failure-loading-device", log.Fields{"deviceId": id.Id})
+ agent.stop(nil)
+ } else {
+ dMgr.addDeviceAgentToMap(agent)
+ reconciled += 1
+ }
} else {
- dMgr.addDeviceAgentToMap(agent)
reconciled += 1
}
- } else {
- reconciled += 1
}
}
if toReconcile != reconciled {
@@ -736,7 +773,7 @@
for _, handler := range handlers {
log.Debugw("running-handler", log.Fields{"handler": funcName(handler)})
if err := handler(current); err != nil {
- log.Warnw("handler-falied", log.Fields{"handler": funcName(handler), "error": err})
+ log.Warnw("handler-failed", log.Fields{"handler": funcName(handler), "error": err})
return err
}
}
@@ -939,6 +976,22 @@
return nil
}
+//getAllDeviceIdsWithDeviceParentId returns the list of device Ids which has id as parent Id. This function uses the
+// data from the agent instead of using the data from the parent device as that data would disappear from a parent
+// device during a delete device operation.
+func (dMgr *DeviceManager) getAllDeviceIdsWithDeviceParentId(id string) []string {
+ log.Debugw("getAllAgentsWithDeviceParentId", log.Fields{"parentDeviceId": id})
+ deviceIds := make([]string, 0)
+ dMgr.lockDeviceAgentsMap.RLock()
+ defer dMgr.lockDeviceAgentsMap.RUnlock()
+ for deviceId, agent := range dMgr.deviceAgents {
+ if agent.parentId == id {
+ deviceIds = append(deviceIds, deviceId)
+ }
+ }
+ return deviceIds
+}
+
//getAllChildDeviceIds is a helper method to get all the child device IDs from the device passed as parameter
func (dMgr *DeviceManager) getAllChildDeviceIds(parentDevice *voltha.Device) ([]string, error) {
log.Debugw("getAllChildDeviceIds", log.Fields{"parentDeviceId": parentDevice.Id})
diff --git a/rw_core/core/device_ownership.go b/rw_core/core/device_ownership.go
index 5deb1cb..ed0fd08 100644
--- a/rw_core/core/device_ownership.go
+++ b/rw_core/core/device_ownership.go
@@ -174,21 +174,33 @@
// Retrieve the ownership key based on the id
var ownershipKey string
var err error
- if ownershipKey, err = da.getOwnershipKey(id); err != nil {
+ var idStr string
+ var cache bool
+ if ownershipKey, idStr, cache, err = da.getOwnershipKey(id); err != nil {
log.Warnw("no-ownershipkey", log.Fields{"error": err})
return false
}
+ // Update the deviceToKey map, if not from cache
+ if !cache {
+ da.deviceToKeyMapLock.Lock()
+ da.deviceToKeyMap[idStr] = ownershipKey
+ da.deviceToKeyMapLock.Unlock()
+ }
+
deviceOwned, ownedByMe := da.getOwnership(ownershipKey)
if deviceOwned {
return ownedByMe
}
- // Not owned by me or maybe anybody else. Try to reserve it
+ // Not owned by me or maybe nobody else. Try to reserve it
reservedByMe := da.tryToReserveKey(ownershipKey)
myChnl := make(chan int)
da.deviceMapLock.Lock()
- da.deviceMap[ownershipKey] = &ownership{id: ownershipKey, owned: reservedByMe, chnl: myChnl}
+ da.deviceMap[ownershipKey] = &ownership{
+ id: ownershipKey,
+ owned: reservedByMe,
+ chnl: myChnl}
da.deviceMapLock.Unlock()
log.Debugw("set-new-ownership", log.Fields{"Id": ownershipKey, "owned": reservedByMe})
@@ -198,6 +210,9 @@
//AbandonDevice must be invoked whenever a device is deleted from the Core
func (da *DeviceOwnership) AbandonDevice(id string) error {
+ if id == "" {
+ return status.Error(codes.FailedPrecondition, "id-nil")
+ }
da.deviceMapLock.Lock()
defer da.deviceMapLock.Unlock()
if o, exist := da.deviceMap[id]; exist { // id is ownership key
@@ -209,7 +224,7 @@
delete(da.deviceToKeyMap, k)
}
}
- // Remove the device reference from the devicMap
+ // Remove the device reference from the deviceMap
delete(da.deviceMap, id)
// Stop the Go routine monitoring the device
@@ -218,9 +233,7 @@
log.Debugw("abandoning-device", log.Fields{"Id": id})
return nil
} else { // id is not ownership key
- if err := da.deleteDeviceKey(id); err != nil {
- log.Errorw("failed-deleting-key", log.Fields{"id": id})
- }
+ da.deleteDeviceKey(id)
}
return nil
}
@@ -258,50 +271,48 @@
return nil
}
-func (da *DeviceOwnership) deleteDeviceKey(id string) error {
+func (da *DeviceOwnership) deleteDeviceKey(id string) {
da.deviceToKeyMapLock.Lock()
defer da.deviceToKeyMapLock.Unlock()
if _, exist := da.deviceToKeyMap[id]; exist {
delete(da.deviceToKeyMap, id)
- return nil
}
- log.Warnw("device-not-owned", log.Fields{"deviceId": id})
- return nil
}
-func (da *DeviceOwnership) getOwnershipKey(id interface{}) (string, error) {
+// getOwnershipKey returns the ownership key that the id param uses. Ownership key is the parent
+// device Id of a child device or the rootdevice of a logical device. This function also returns the
+// id in string format of the id param via the ref output as well as if the data was retrieved from cache
+func (da *DeviceOwnership) getOwnershipKey(id interface{}) (ownershipKey string, ref string, cached bool, err error) {
if id == nil {
- return "", status.Error(codes.InvalidArgument, "nil-id")
+ return "", "", false, status.Error(codes.InvalidArgument, "nil-id")
}
- da.deviceToKeyMapLock.Lock()
- defer da.deviceToKeyMapLock.Unlock()
+ da.deviceToKeyMapLock.RLock()
+ defer da.deviceToKeyMapLock.RUnlock()
var device *voltha.Device
var lDevice *voltha.LogicalDevice
// The id can either be a device Id or a logical device id.
if dId, ok := id.(*utils.DeviceID); ok {
// Use cache if present
if val, exist := da.deviceToKeyMap[dId.Id]; exist {
- return val, nil
+ return val, dId.Id, true, nil
}
if device, _ = da.deviceMgr.GetDevice(dId.Id); device == nil {
- return "", status.Error(codes.NotFound, fmt.Sprintf("id-absent-%s", dId))
+ return "", dId.Id, false, status.Errorf(codes.NotFound, "id-absent-%s", dId)
}
if device.Root {
- da.deviceToKeyMap[dId.Id] = device.Id
+ return device.Id, dId.Id, false, nil
} else {
- da.deviceToKeyMap[dId.Id] = device.ParentId
+ return device.ParentId, dId.Id, false, nil
}
- return da.deviceToKeyMap[dId.Id], nil
} else if ldId, ok := id.(*utils.LogicalDeviceID); ok {
// Use cache if present
if val, exist := da.deviceToKeyMap[ldId.Id]; exist {
- return val, nil
+ return val, ldId.Id, true, nil
}
if lDevice, _ = da.logicalDeviceMgr.getLogicalDevice(ldId.Id); lDevice == nil {
- return "", status.Error(codes.NotFound, fmt.Sprintf("id-absent-%s", ldId))
+ return "", ldId.Id, false, status.Errorf(codes.NotFound, "id-absent-%s", dId)
}
- da.deviceToKeyMap[ldId.Id] = lDevice.RootDeviceId
- return lDevice.RootDeviceId, nil
+ return lDevice.RootDeviceId, ldId.Id, false, nil
}
- return "", status.Error(codes.NotFound, fmt.Sprintf("id-%s", id))
+ return "", "", false, status.Error(codes.NotFound, fmt.Sprintf("id-%v", id))
}
diff --git a/rw_core/core/grpc_nbi_api_handler.go b/rw_core/core/grpc_nbi_api_handler.go
index 0f97cd3..2c3b35e 100755
--- a/rw_core/core/grpc_nbi_api_handler.go
+++ b/rw_core/core/grpc_nbi_api_handler.go
@@ -150,13 +150,16 @@
}
}
-// This function handles the modification or deletion of existing devices
+// takeRequestOwnership creates a transaction in the dB for this request and handles the logic of transaction
+// acquisition. If the device is owned by this Core (in a core-pair) then acquire the transaction with a
+// timeout value (in the event of a timeout the other Core in the core-pair will proceed with the transaction). If the
+// device is not owned then this Core will just monitor the transaction for potential timeouts.
func (handler *APIHandler) takeRequestOwnership(ctx context.Context, id interface{}, maxTimeout ...int64) (*KVTransaction, error) {
+ t := time.Now()
timeout := handler.defaultRequestTimeout
if len(maxTimeout) > 0 {
timeout = maxTimeout[0]
}
- log.Debugw("transaction-timeout", log.Fields{"timeout": timeout})
txn, err := handler.createKvTransaction(ctx)
if txn == nil {
return nil, err
@@ -168,15 +171,18 @@
}
if owned {
if txn.Acquired(timeout) {
+ log.Debugw("acquired-transaction", log.Fields{"transaction-timeout": timeout})
return txn, nil
} else {
return nil, errors.New("failed-to-seize-request")
}
} else {
if txn.Monitor(timeout) {
+ log.Debugw("acquired-transaction-after-timeout", log.Fields{"timeout": timeout, "waited-time": time.Since(t)})
return txn, nil
} else {
- return nil, errors.New("device-not-owned")
+ log.Debugw("transaction-completed-by-other", log.Fields{"timeout": timeout, "waited-time": time.Since(t)})
+ return nil, errors.New(string(COMPLETED_BY_OTHER))
}
}
}
@@ -488,7 +494,11 @@
}
if handler.competeForTransaction() {
- if txn, err := handler.takeRequestOwnership(ctx, nil); err != nil {
+ if txn, err := handler.takeRequestOwnership(ctx, &utils.DeviceID{Id: id.Id}); err != nil {
+ // Remove the device in memory
+ if err.Error() == (errors.New(string(COMPLETED_BY_OTHER)).Error()) {
+ handler.deviceMgr.stopManagingDevice(id.Id)
+ }
return new(empty.Empty), err
} else {
defer txn.Close()
diff --git a/rw_core/core/logical_device_manager.go b/rw_core/core/logical_device_manager.go
index 7a625b4..b6493c5 100644
--- a/rw_core/core/logical_device_manager.go
+++ b/rw_core/core/logical_device_manager.go
@@ -90,6 +90,13 @@
}
}
+func (ldMgr *LogicalDeviceManager) isLogicalDeviceInCache(logicalDeviceId string) bool {
+ ldMgr.lockLogicalDeviceAgentsMap.RLock()
+ defer ldMgr.lockLogicalDeviceAgentsMap.RUnlock()
+ _, inCache := ldMgr.logicalDeviceAgents[logicalDeviceId]
+ return inCache
+}
+
// getLogicalDeviceAgent returns the logical device agent. If the device is not in memory then the device will
// be loaded from dB and a logical device agent created to managed it.
func (ldMgr *LogicalDeviceManager) getLogicalDeviceAgent(logicalDeviceId string) *LogicalDeviceAgent {
@@ -177,6 +184,36 @@
return &id, nil
}
+// stopManagingLogicalDeviceWithDeviceId stops the management of the logical device. This implies removal of any
+// reference of this logical device in cache. The device Id is passed as param because the logical device may already
+// have been removed from the model. This function returns the logical device Id if found
+func (ldMgr *LogicalDeviceManager) stopManagingLogicalDeviceWithDeviceId(id string) string {
+ log.Infow("stop-managing-logical-device", log.Fields{"deviceId": id})
+ // Go over the list of logical device agents to find the one which has rootDeviceId as id
+ ldMgr.lockLogicalDeviceAgentsMap.RLock()
+ defer ldMgr.lockLogicalDeviceAgentsMap.RUnlock()
+ for ldId, ldAgent := range ldMgr.logicalDeviceAgents {
+ if ldAgent.rootDeviceId == id {
+ log.Infow("stopping-logical-device-agent", log.Fields{"lDeviceId": ldId})
+ ldAgent.stop(nil)
+ delete(ldMgr.logicalDeviceAgents, ldId)
+ return ldId
+ }
+ }
+ return ""
+}
+
+//getLogicalDeviceFromModel retrieves the logical device data from the model.
+func (ldMgr *LogicalDeviceManager) getLogicalDeviceFromModel(lDeviceId string) (*voltha.LogicalDevice, error) {
+
+ if logicalDevice := ldMgr.clusterDataProxy.Get("/logical_devices/"+lDeviceId, 0, false, ""); logicalDevice != nil {
+ if lDevice, ok := logicalDevice.(*voltha.LogicalDevice); ok {
+ return lDevice, nil
+ }
+ }
+ return nil, status.Error(codes.NotFound, lDeviceId)
+}
+
// load loads a logical device manager in memory
func (ldMgr *LogicalDeviceManager) load(lDeviceId string) error {
log.Debugw("loading-logical-device", log.Fields{"lDeviceId": lDeviceId})
@@ -185,13 +222,16 @@
ldMgr.lockLogicalDeviceAgentsMap.Lock()
defer ldMgr.lockLogicalDeviceAgentsMap.Unlock()
if ldAgent, _ := ldMgr.logicalDeviceAgents[lDeviceId]; ldAgent == nil {
- // Logical device not in memory - create a temp logical device Agent and let it load from memory
- agent := newLogicalDeviceAgent(lDeviceId, "", ldMgr, ldMgr.deviceMgr, ldMgr.clusterDataProxy, ldMgr.defaultTimeout)
- if err := agent.start(nil, true); err != nil {
- //agent.stop(nil)
- return err
+ // Proceed with the loading only if the logical device exist in the Model (could have been deleted)
+ if _, err := ldMgr.getLogicalDeviceFromModel(lDeviceId); err == nil {
+ // Create a temp logical device Agent and let it load from memory
+ agent := newLogicalDeviceAgent(lDeviceId, "", ldMgr, ldMgr.deviceMgr, ldMgr.clusterDataProxy, ldMgr.defaultTimeout)
+ if err := agent.start(nil, true); err != nil {
+ agent.stop(nil)
+ return err
+ }
+ ldMgr.logicalDeviceAgents[agent.logicalDeviceId] = agent
}
- ldMgr.logicalDeviceAgents[agent.logicalDeviceId] = agent
}
// TODO: load the child device
return nil
@@ -348,7 +388,6 @@
log.Debugw("deleting-logical-ports", log.Fields{"deviceId": deviceId})
// Get logical port
if ldId, err := ldMgr.getLogicalDeviceIdFromDeviceId(deviceId); err != nil {
- log.Warnw("logical-device-not-found", log.Fields{"deviceId": deviceId})
return err
} else {
if agent := ldMgr.getLogicalDeviceAgent(*ldId); agent != nil {