[VOL-1645] Fix for device agents creation

Change-Id: I1a17bd689e458088b42963d395abf6b96180f5b8
diff --git a/rw_core/core/device_agent.go b/rw_core/core/device_agent.go
index e7089cd..4e31ff8 100755
--- a/rw_core/core/device_agent.go
+++ b/rw_core/core/device_agent.go
@@ -33,6 +33,7 @@
 
 type DeviceAgent struct {
 	deviceId         string
+	parentId         string
 	deviceType       string
 	isRootdevice     bool
 	lastData         *voltha.Device
@@ -65,6 +66,7 @@
 	}
 	agent.isRootdevice = device.Root
 	agent.deviceId = cloned.Id
+	agent.parentId = device.ParentId
 	agent.deviceType = cloned.Type
 	agent.lastData = cloned
 	agent.deviceMgr = deviceMgr
diff --git a/rw_core/core/device_manager.go b/rw_core/core/device_manager.go
index eff6fe8..a96166d 100755
--- a/rw_core/core/device_manager.go
+++ b/rw_core/core/device_manager.go
@@ -206,18 +206,37 @@
 	sendResponse(ctx, ch, res)
 }
 
+// stopManagingDevice stops the management of the device as well as any of its reference device and logical device.
+// This function is called only in the Core that does not own this device.  In the Core that owns this device then a
+// deletion deletion also includes removal of any reference of this device.
+func (dMgr *DeviceManager) stopManagingDevice(id string) {
+	log.Infow("stopManagingDevice", log.Fields{"deviceId": id})
+	if dMgr.IsDeviceInCache(id) { // Proceed only if an agent is present for this device
+		if root, _ := dMgr.IsRootDevice(id); root == true {
+			// stop managing the logical device
+			ldeviceId := dMgr.logicalDeviceMgr.stopManagingLogicalDeviceWithDeviceId(id)
+			if ldeviceId != "" { // Can happen if logical device agent was already stopped
+				dMgr.core.deviceOwnership.AbandonDevice(ldeviceId)
+			}
+			// stop managing the child devices
+			childDeviceIds := dMgr.getAllDeviceIdsWithDeviceParentId(id)
+			for _, cId := range childDeviceIds {
+				dMgr.stopManagingDevice(cId)
+			}
+		}
+		if agent := dMgr.getDeviceAgent(id); agent != nil {
+			agent.stop(nil)
+			dMgr.deleteDeviceAgentToMap(agent)
+			// Abandon the device ownership
+			dMgr.core.deviceOwnership.AbandonDevice(id)
+		}
+	}
+}
+
 func (dMgr *DeviceManager) RunPostDeviceDelete(cDevice *voltha.Device) error {
 	log.Infow("RunPostDeviceDelete", log.Fields{"deviceId": cDevice.Id})
-	if agent := dMgr.getDeviceAgent(cDevice.Id); agent != nil {
-		agent.stop(nil)
-		dMgr.deleteDeviceAgentToMap(agent)
-		if err := dMgr.core.deviceOwnership.AbandonDevice(cDevice.Id); err != nil {
-			log.Errorw("failed-abandoning-device-ownership", log.Fields{"deviceId": cDevice.Id, "error": err})
-			return err
-		}
-		return nil
-	}
-	return status.Errorf(codes.NotFound, "%s", cDevice.Id)
+	dMgr.stopManagingDevice(cDevice.Id)
+	return nil
 }
 
 // GetDevice will returns a device, either from memory or from the dB, if present
@@ -351,6 +370,7 @@
 		for _, device := range devices.([]interface{}) {
 			// If device is not in memory then set it up
 			if !dMgr.IsDeviceInCache(device.(*voltha.Device).Id) {
+				log.Debugw("loading-device-from-Model", log.Fields{"id": device.(*voltha.Device).Id})
 				agent := newDeviceAgent(dMgr.adapterProxy, device.(*voltha.Device), dMgr, dMgr.clusterDataProxy, dMgr.defaultTimeout)
 				if err := agent.start(nil, true); err != nil {
 					log.Warnw("failure-starting-agent", log.Fields{"deviceId": device.(*voltha.Device).Id})
@@ -362,9 +382,20 @@
 			result.Items = append(result.Items, device.(*voltha.Device))
 		}
 	}
+	log.Debugw("ListDevices-end", log.Fields{"len": len(result.Items)})
 	return result, nil
 }
 
+//getDeviceFromModelretrieves the device data from the model.
+func (dMgr *DeviceManager) getDeviceFromModel(deviceId string) (*voltha.Device, error) {
+	if device := dMgr.clusterDataProxy.Get("/devices/"+deviceId, 0, false, ""); device != nil {
+		if d, ok := device.(*voltha.Device); ok {
+			return d, nil
+		}
+	}
+	return nil, status.Error(codes.NotFound, deviceId)
+}
+
 // loadDevice loads the deviceId in memory, if not present
 func (dMgr *DeviceManager) loadDevice(deviceId string) (*DeviceAgent, error) {
 	log.Debugw("loading-device", log.Fields{"deviceId": deviceId})
@@ -373,12 +404,17 @@
 		return nil, status.Error(codes.InvalidArgument, "deviceId empty")
 	}
 	if !dMgr.IsDeviceInCache(deviceId) {
-		agent := newDeviceAgent(dMgr.adapterProxy, &voltha.Device{Id: deviceId}, dMgr, dMgr.clusterDataProxy, dMgr.defaultTimeout)
-		if err := agent.start(nil, true); err != nil {
-			agent.stop(nil)
-			return nil, err
+		// Proceed with the loading only if the device exist in the Model (could have been deleted)
+		if device, err := dMgr.getDeviceFromModel(deviceId); err == nil {
+			agent := newDeviceAgent(dMgr.adapterProxy, device, dMgr, dMgr.clusterDataProxy, dMgr.defaultTimeout)
+			if err := agent.start(nil, true); err != nil {
+				agent.stop(nil)
+				return nil, err
+			}
+			dMgr.addDeviceAgentToMap(agent)
+		} else {
+			return nil, status.Error(codes.NotFound, deviceId)
 		}
-		dMgr.addDeviceAgentToMap(agent)
 	}
 	if agent := dMgr.getDeviceAgent(deviceId); agent != nil {
 		return agent, nil
@@ -425,7 +461,6 @@
 	var dAgent *DeviceAgent
 	var err error
 	if dAgent, err = dMgr.loadDevice(deviceId); err != nil {
-		log.Warnw("failure-loading-device", log.Fields{"deviceId": deviceId})
 		return err
 	}
 	// Get the loaded device details
@@ -475,17 +510,19 @@
 			if !dMgr.IsDeviceInCache(id.Id) {
 				//	Device Id not in memory
 				log.Debugw("reconciling-device", log.Fields{"id": id.Id})
-				// Load device from dB
-				agent := newDeviceAgent(dMgr.adapterProxy, &voltha.Device{Id: id.Id}, dMgr, dMgr.clusterDataProxy, dMgr.defaultTimeout)
-				if err := agent.start(nil, true); err != nil {
-					log.Warnw("failure-loading-device", log.Fields{"deviceId": id.Id})
-					agent.stop(nil)
+				// Proceed with the loading only if the device exist in the Model (could have been deleted)
+				if device, err := dMgr.getDeviceFromModel(id.Id); err == nil {
+					agent := newDeviceAgent(dMgr.adapterProxy, device, dMgr, dMgr.clusterDataProxy, dMgr.defaultTimeout)
+					if err := agent.start(nil, true); err != nil {
+						log.Warnw("failure-loading-device", log.Fields{"deviceId": id.Id})
+						agent.stop(nil)
+					} else {
+						dMgr.addDeviceAgentToMap(agent)
+						reconciled += 1
+					}
 				} else {
-					dMgr.addDeviceAgentToMap(agent)
 					reconciled += 1
 				}
-			} else {
-				reconciled += 1
 			}
 		}
 		if toReconcile != reconciled {
@@ -736,7 +773,7 @@
 	for _, handler := range handlers {
 		log.Debugw("running-handler", log.Fields{"handler": funcName(handler)})
 		if err := handler(current); err != nil {
-			log.Warnw("handler-falied", log.Fields{"handler": funcName(handler), "error": err})
+			log.Warnw("handler-failed", log.Fields{"handler": funcName(handler), "error": err})
 			return err
 		}
 	}
@@ -939,6 +976,22 @@
 	return nil
 }
 
+//getAllDeviceIdsWithDeviceParentId returns the list of device Ids which has id as parent Id.  This function uses the
+// data from the agent instead of using the data from the parent device as that data would disappear from a parent
+// device during a delete device operation.
+func (dMgr *DeviceManager) getAllDeviceIdsWithDeviceParentId(id string) []string {
+	log.Debugw("getAllAgentsWithDeviceParentId", log.Fields{"parentDeviceId": id})
+	deviceIds := make([]string, 0)
+	dMgr.lockDeviceAgentsMap.RLock()
+	defer dMgr.lockDeviceAgentsMap.RUnlock()
+	for deviceId, agent := range dMgr.deviceAgents {
+		if agent.parentId == id {
+			deviceIds = append(deviceIds, deviceId)
+		}
+	}
+	return deviceIds
+}
+
 //getAllChildDeviceIds is a helper method to get all the child device IDs from the device passed as parameter
 func (dMgr *DeviceManager) getAllChildDeviceIds(parentDevice *voltha.Device) ([]string, error) {
 	log.Debugw("getAllChildDeviceIds", log.Fields{"parentDeviceId": parentDevice.Id})
diff --git a/rw_core/core/device_ownership.go b/rw_core/core/device_ownership.go
index 5deb1cb..ed0fd08 100644
--- a/rw_core/core/device_ownership.go
+++ b/rw_core/core/device_ownership.go
@@ -174,21 +174,33 @@
 	// Retrieve the ownership key based on the id
 	var ownershipKey string
 	var err error
-	if ownershipKey, err = da.getOwnershipKey(id); err != nil {
+	var idStr string
+	var cache bool
+	if ownershipKey, idStr, cache, err = da.getOwnershipKey(id); err != nil {
 		log.Warnw("no-ownershipkey", log.Fields{"error": err})
 		return false
 	}
 
+	// Update the deviceToKey map, if not from cache
+	if !cache {
+		da.deviceToKeyMapLock.Lock()
+		da.deviceToKeyMap[idStr] = ownershipKey
+		da.deviceToKeyMapLock.Unlock()
+	}
+
 	deviceOwned, ownedByMe := da.getOwnership(ownershipKey)
 	if deviceOwned {
 		return ownedByMe
 	}
-	// Not owned by me or maybe anybody else.  Try to reserve it
+	// Not owned by me or maybe nobody else.  Try to reserve it
 	reservedByMe := da.tryToReserveKey(ownershipKey)
 	myChnl := make(chan int)
 
 	da.deviceMapLock.Lock()
-	da.deviceMap[ownershipKey] = &ownership{id: ownershipKey, owned: reservedByMe, chnl: myChnl}
+	da.deviceMap[ownershipKey] = &ownership{
+		id:    ownershipKey,
+		owned: reservedByMe,
+		chnl:  myChnl}
 	da.deviceMapLock.Unlock()
 
 	log.Debugw("set-new-ownership", log.Fields{"Id": ownershipKey, "owned": reservedByMe})
@@ -198,6 +210,9 @@
 
 //AbandonDevice must be invoked whenever a device is deleted from the Core
 func (da *DeviceOwnership) AbandonDevice(id string) error {
+	if id == "" {
+		return status.Error(codes.FailedPrecondition, "id-nil")
+	}
 	da.deviceMapLock.Lock()
 	defer da.deviceMapLock.Unlock()
 	if o, exist := da.deviceMap[id]; exist { // id is ownership key
@@ -209,7 +224,7 @@
 				delete(da.deviceToKeyMap, k)
 			}
 		}
-		// Remove the device reference from the devicMap
+		// Remove the device reference from the deviceMap
 		delete(da.deviceMap, id)
 
 		// Stop the Go routine monitoring the device
@@ -218,9 +233,7 @@
 		log.Debugw("abandoning-device", log.Fields{"Id": id})
 		return nil
 	} else { // id is not ownership key
-		if err := da.deleteDeviceKey(id); err != nil {
-			log.Errorw("failed-deleting-key", log.Fields{"id": id})
-		}
+		da.deleteDeviceKey(id)
 	}
 	return nil
 }
@@ -258,50 +271,48 @@
 	return nil
 }
 
-func (da *DeviceOwnership) deleteDeviceKey(id string) error {
+func (da *DeviceOwnership) deleteDeviceKey(id string) {
 	da.deviceToKeyMapLock.Lock()
 	defer da.deviceToKeyMapLock.Unlock()
 	if _, exist := da.deviceToKeyMap[id]; exist {
 		delete(da.deviceToKeyMap, id)
-		return nil
 	}
-	log.Warnw("device-not-owned", log.Fields{"deviceId": id})
-	return nil
 }
 
-func (da *DeviceOwnership) getOwnershipKey(id interface{}) (string, error) {
+// getOwnershipKey returns the ownership key that the id param uses.   Ownership key is the parent
+// device Id of a child device or the rootdevice of a logical device.   This function also returns the
+// id in string format of the id param via the ref output as well as if the data was retrieved from cache
+func (da *DeviceOwnership) getOwnershipKey(id interface{}) (ownershipKey string, ref string, cached bool, err error) {
 	if id == nil {
-		return "", status.Error(codes.InvalidArgument, "nil-id")
+		return "", "", false, status.Error(codes.InvalidArgument, "nil-id")
 	}
-	da.deviceToKeyMapLock.Lock()
-	defer da.deviceToKeyMapLock.Unlock()
+	da.deviceToKeyMapLock.RLock()
+	defer da.deviceToKeyMapLock.RUnlock()
 	var device *voltha.Device
 	var lDevice *voltha.LogicalDevice
 	// The id can either be a device Id or a logical device id.
 	if dId, ok := id.(*utils.DeviceID); ok {
 		// Use cache if present
 		if val, exist := da.deviceToKeyMap[dId.Id]; exist {
-			return val, nil
+			return val, dId.Id, true, nil
 		}
 		if device, _ = da.deviceMgr.GetDevice(dId.Id); device == nil {
-			return "", status.Error(codes.NotFound, fmt.Sprintf("id-absent-%s", dId))
+			return "", dId.Id, false, status.Errorf(codes.NotFound, "id-absent-%s", dId)
 		}
 		if device.Root {
-			da.deviceToKeyMap[dId.Id] = device.Id
+			return device.Id, dId.Id, false, nil
 		} else {
-			da.deviceToKeyMap[dId.Id] = device.ParentId
+			return device.ParentId, dId.Id, false, nil
 		}
-		return da.deviceToKeyMap[dId.Id], nil
 	} else if ldId, ok := id.(*utils.LogicalDeviceID); ok {
 		// Use cache if present
 		if val, exist := da.deviceToKeyMap[ldId.Id]; exist {
-			return val, nil
+			return val, ldId.Id, true, nil
 		}
 		if lDevice, _ = da.logicalDeviceMgr.getLogicalDevice(ldId.Id); lDevice == nil {
-			return "", status.Error(codes.NotFound, fmt.Sprintf("id-absent-%s", ldId))
+			return "", ldId.Id, false, status.Errorf(codes.NotFound, "id-absent-%s", dId)
 		}
-		da.deviceToKeyMap[ldId.Id] = lDevice.RootDeviceId
-		return lDevice.RootDeviceId, nil
+		return lDevice.RootDeviceId, ldId.Id, false, nil
 	}
-	return "", status.Error(codes.NotFound, fmt.Sprintf("id-%s", id))
+	return "", "", false, status.Error(codes.NotFound, fmt.Sprintf("id-%v", id))
 }
diff --git a/rw_core/core/grpc_nbi_api_handler.go b/rw_core/core/grpc_nbi_api_handler.go
index 0f97cd3..2c3b35e 100755
--- a/rw_core/core/grpc_nbi_api_handler.go
+++ b/rw_core/core/grpc_nbi_api_handler.go
@@ -150,13 +150,16 @@
 	}
 }
 
-// This function handles the modification or deletion of existing devices
+// takeRequestOwnership creates a transaction in the dB for this request and handles the logic of transaction
+// acquisition.  If the device is owned by this Core (in a core-pair) then acquire the transaction with a
+// timeout value (in the event of a timeout the other Core in the core-pair will proceed with the transaction).  If the
+// device is not owned then this Core will just monitor the transaction for potential timeouts.
 func (handler *APIHandler) takeRequestOwnership(ctx context.Context, id interface{}, maxTimeout ...int64) (*KVTransaction, error) {
+	t := time.Now()
 	timeout := handler.defaultRequestTimeout
 	if len(maxTimeout) > 0 {
 		timeout = maxTimeout[0]
 	}
-	log.Debugw("transaction-timeout", log.Fields{"timeout": timeout})
 	txn, err := handler.createKvTransaction(ctx)
 	if txn == nil {
 		return nil, err
@@ -168,15 +171,18 @@
 	}
 	if owned {
 		if txn.Acquired(timeout) {
+			log.Debugw("acquired-transaction", log.Fields{"transaction-timeout": timeout})
 			return txn, nil
 		} else {
 			return nil, errors.New("failed-to-seize-request")
 		}
 	} else {
 		if txn.Monitor(timeout) {
+			log.Debugw("acquired-transaction-after-timeout", log.Fields{"timeout": timeout, "waited-time": time.Since(t)})
 			return txn, nil
 		} else {
-			return nil, errors.New("device-not-owned")
+			log.Debugw("transaction-completed-by-other", log.Fields{"timeout": timeout, "waited-time": time.Since(t)})
+			return nil, errors.New(string(COMPLETED_BY_OTHER))
 		}
 	}
 }
@@ -488,7 +494,11 @@
 	}
 
 	if handler.competeForTransaction() {
-		if txn, err := handler.takeRequestOwnership(ctx, nil); err != nil {
+		if txn, err := handler.takeRequestOwnership(ctx, &utils.DeviceID{Id: id.Id}); err != nil {
+			// Remove the device in memory
+			if err.Error() == (errors.New(string(COMPLETED_BY_OTHER)).Error()) {
+				handler.deviceMgr.stopManagingDevice(id.Id)
+			}
 			return new(empty.Empty), err
 		} else {
 			defer txn.Close()
diff --git a/rw_core/core/logical_device_manager.go b/rw_core/core/logical_device_manager.go
index 7a625b4..b6493c5 100644
--- a/rw_core/core/logical_device_manager.go
+++ b/rw_core/core/logical_device_manager.go
@@ -90,6 +90,13 @@
 	}
 }
 
+func (ldMgr *LogicalDeviceManager) isLogicalDeviceInCache(logicalDeviceId string) bool {
+	ldMgr.lockLogicalDeviceAgentsMap.RLock()
+	defer ldMgr.lockLogicalDeviceAgentsMap.RUnlock()
+	_, inCache := ldMgr.logicalDeviceAgents[logicalDeviceId]
+	return inCache
+}
+
 // getLogicalDeviceAgent returns the logical device agent.  If the device is not in memory then the device will
 // be loaded from dB and a logical device agent created to managed it.
 func (ldMgr *LogicalDeviceManager) getLogicalDeviceAgent(logicalDeviceId string) *LogicalDeviceAgent {
@@ -177,6 +184,36 @@
 	return &id, nil
 }
 
+// stopManagingLogicalDeviceWithDeviceId stops the management of the logical device.  This implies removal of any
+// reference of this logical device in cache.  The device Id is passed as param because the logical device may already
+// have been removed from the model.  This function returns the logical device Id if found
+func (ldMgr *LogicalDeviceManager) stopManagingLogicalDeviceWithDeviceId(id string) string {
+	log.Infow("stop-managing-logical-device", log.Fields{"deviceId": id})
+	// Go over the list of logical device agents to find the one which has rootDeviceId as id
+	ldMgr.lockLogicalDeviceAgentsMap.RLock()
+	defer ldMgr.lockLogicalDeviceAgentsMap.RUnlock()
+	for ldId, ldAgent := range ldMgr.logicalDeviceAgents {
+		if ldAgent.rootDeviceId == id {
+			log.Infow("stopping-logical-device-agent", log.Fields{"lDeviceId": ldId})
+			ldAgent.stop(nil)
+			delete(ldMgr.logicalDeviceAgents, ldId)
+			return ldId
+		}
+	}
+	return ""
+}
+
+//getLogicalDeviceFromModel retrieves the logical device data from the model.
+func (ldMgr *LogicalDeviceManager) getLogicalDeviceFromModel(lDeviceId string) (*voltha.LogicalDevice, error) {
+
+	if logicalDevice := ldMgr.clusterDataProxy.Get("/logical_devices/"+lDeviceId, 0, false, ""); logicalDevice != nil {
+		if lDevice, ok := logicalDevice.(*voltha.LogicalDevice); ok {
+			return lDevice, nil
+		}
+	}
+	return nil, status.Error(codes.NotFound, lDeviceId)
+}
+
 // load loads a logical device manager in memory
 func (ldMgr *LogicalDeviceManager) load(lDeviceId string) error {
 	log.Debugw("loading-logical-device", log.Fields{"lDeviceId": lDeviceId})
@@ -185,13 +222,16 @@
 	ldMgr.lockLogicalDeviceAgentsMap.Lock()
 	defer ldMgr.lockLogicalDeviceAgentsMap.Unlock()
 	if ldAgent, _ := ldMgr.logicalDeviceAgents[lDeviceId]; ldAgent == nil {
-		// Logical device not in memory - create a temp logical device Agent and let it load from memory
-		agent := newLogicalDeviceAgent(lDeviceId, "", ldMgr, ldMgr.deviceMgr, ldMgr.clusterDataProxy, ldMgr.defaultTimeout)
-		if err := agent.start(nil, true); err != nil {
-			//agent.stop(nil)
-			return err
+		// Proceed with the loading only if the logical device exist in the Model (could have been deleted)
+		if _, err := ldMgr.getLogicalDeviceFromModel(lDeviceId); err == nil {
+			// Create a temp logical device Agent and let it load from memory
+			agent := newLogicalDeviceAgent(lDeviceId, "", ldMgr, ldMgr.deviceMgr, ldMgr.clusterDataProxy, ldMgr.defaultTimeout)
+			if err := agent.start(nil, true); err != nil {
+				agent.stop(nil)
+				return err
+			}
+			ldMgr.logicalDeviceAgents[agent.logicalDeviceId] = agent
 		}
-		ldMgr.logicalDeviceAgents[agent.logicalDeviceId] = agent
 	}
 	// TODO: load the child device
 	return nil
@@ -348,7 +388,6 @@
 	log.Debugw("deleting-logical-ports", log.Fields{"deviceId": deviceId})
 	// Get logical port
 	if ldId, err := ldMgr.getLogicalDeviceIdFromDeviceId(deviceId); err != nil {
-		log.Warnw("logical-device-not-found", log.Fields{"deviceId": deviceId})
 		return err
 	} else {
 		if agent := ldMgr.getLogicalDeviceAgent(*ldId); agent != nil {