[VOL-5381] - handle logical agent creation failure
Change-Id: I9dd685117d13456bbcd6db8fd7723fa66967b949
Signed-off-by: Sridhar Ravindra <sridhar.ravindra@radisys.com>
diff --git a/VERSION b/VERSION
index b727628..9608f8e 100644
--- a/VERSION
+++ b/VERSION
@@ -1 +1 @@
-3.6.2
+3.6.3-dev1
diff --git a/rw_core/core/device/logical_agent.go b/rw_core/core/device/logical_agent.go
index b5c5cc0..253a523 100644
--- a/rw_core/core/device/logical_agent.go
+++ b/rw_core/core/device/logical_agent.go
@@ -57,6 +57,7 @@
orderedEvents orderedEvents
startOnce sync.Once
stopOnce sync.Once
+ exitChannel chan int
flowCache *flow.Cache
meterLoader *meter.Loader
@@ -77,6 +78,7 @@
flowDecomposer: fd.NewFlowDecomposer(deviceMgr.getDeviceReadOnly),
internalTimeout: internalTimeout,
requestQueue: coreutils.NewRequestQueue(),
+ exitChannel: make(chan int, 1),
flowCache: flow.NewCache(),
groupCache: group.NewCache(),
@@ -86,10 +88,11 @@
}
// start creates the logical device and add it to the data model
-func (agent *LogicalAgent) start(ctx context.Context, logicalDeviceExist bool, logicalDevice *voltha.LogicalDevice) error {
+func (agent *LogicalAgent) start(ctx context.Context, logicalDeviceExist bool, logicalDevice *voltha.LogicalDevice) {
needToStart := false
if agent.startOnce.Do(func() { needToStart = true }); !needToStart {
- return nil
+ logger.Debug(ctx, "starting-logical-device-agent already running")
+ return
}
logger.Infow(ctx, "starting-logical-device-agent", log.Fields{"logical-device-id": agent.logicalDeviceID, "load-from-db": logicalDeviceExist})
@@ -97,8 +100,8 @@
var startSucceeded bool
defer func() {
if !startSucceeded {
- if err := agent.stop(ctx); err != nil {
- logger.Errorw(ctx, "failed-to-cleanup-after-unsuccessful-start", log.Fields{"logical-device-id": agent.logicalDeviceID, "error": err})
+ if stopErr := agent.stop(ctx); stopErr != nil {
+ logger.Errorw(ctx, "failed-to-cleanup-after-unsuccessful-start", log.Fields{"logical-device-id": agent.logicalDeviceID, "error": stopErr})
}
}
}()
@@ -108,15 +111,40 @@
//Build the logical device based on information retrieved from the device adapter
var switchCap *ca.SwitchCapability
var err error
+
if switchCap, err = agent.deviceMgr.getSwitchCapability(ctx, agent.rootDeviceID); err != nil {
- return err
+ logger.Warnw(ctx, "failed-to-get-switch-capability", log.Fields{"root-device-id": agent.rootDeviceID, "error": err})
+ switchCapTicker := time.NewTicker(time.Second * 2)
+ defer switchCapTicker.Stop()
+
+ // Start a retry loop to get switch capability of the OLT device from adapter
+ for {
+ select {
+ case <-switchCapTicker.C:
+ if switchCap, err = agent.deviceMgr.getSwitchCapability(ctx, agent.rootDeviceID); err == nil {
+ logger.Infow(ctx, "received switch capability, proceeding to start logical device agent", log.Fields{"root-device-id": agent.rootDeviceID})
+ }
+ // Before retrying, check if the agent has stopped
+ case _, ok := (<-agent.exitChannel):
+ if !ok {
+ logger.Warnw(ctx, "agent stopped, exit retrying get-switch-capability", log.Fields{"root-device-id": agent.rootDeviceID})
+ return
+ }
+ }
+ // Break the for loop as we have received the switch capability from adapter
+ if err == nil {
+ break
+ }
+ logger.Warnw(ctx, "retrying get-switch-capability", log.Fields{"root-device-id": agent.rootDeviceID, "error": err})
+ }
}
ld = &voltha.LogicalDevice{Id: agent.logicalDeviceID, RootDeviceId: agent.rootDeviceID}
// Create the datapath ID (uint64) using the logical device ID (based on the MAC Address)
var datapathID uint64
if datapathID, err = coreutils.CreateDataPathID(agent.serialNumber); err != nil {
- return err
+ logger.Errorw(ctx, "failed-to-create-datapath-id", log.Fields{"serial-number": agent.serialNumber, "error": err})
+ return
}
ld.DatapathId = datapathID
ld.Desc = (proto.Clone(switchCap.Desc)).(*ofp.OfpDesc)
@@ -126,7 +154,7 @@
// Save the logical device
if err := agent.ldProxy.Set(ctx, ld.Id, ld); err != nil {
logger.Errorw(ctx, "failed-to-add-logical-device", log.Fields{"logical-device-id": agent.logicalDeviceID})
- return err
+ return
}
logger.Debugw(ctx, "logical-device-created", log.Fields{"logical-device-id": agent.logicalDeviceID, "root-id": ld.RootDeviceId})
@@ -147,9 +175,12 @@
ld = &voltha.LogicalDevice{}
have, err := agent.ldProxy.Get(ctx, agent.logicalDeviceID, ld)
if err != nil {
- return err
+ logger.Errorw(ctx, "failed-to-load-logical-device-from-db", log.Fields{"logical-device-id": agent.logicalDeviceID, "error": err})
+ return
} else if !have {
- return status.Errorf(codes.NotFound, "logical_device-%s", agent.logicalDeviceID)
+ err := status.Errorf(codes.NotFound, "logical_device-%s", agent.logicalDeviceID)
+ logger.Errorw(ctx, "logical-device-not-found-in-db", log.Fields{"logical-device-id": agent.logicalDeviceID, "error": err})
+ return
}
}
@@ -178,8 +209,7 @@
}
startSucceeded = true
-
- return nil
+ agent.ldeviceMgr.addLogicalDeviceAgentToMap(agent)
}
// stop stops the logical device agent. This removes the logical device from the data model.
@@ -217,6 +247,7 @@
// TODO: remove all entries from all loaders
// TODO: don't allow any more modifications to flows/groups/meters/ports or to any logical device field
+ close(agent.exitChannel)
agent.stopped = true
logger.Info(ctx, "logical-device-agent-stopped")
diff --git a/rw_core/core/device/logical_manager.go b/rw_core/core/device/logical_manager.go
index ed882a8..3812e2f 100644
--- a/rw_core/core/device/logical_manager.go
+++ b/rw_core/core/device/logical_manager.go
@@ -61,11 +61,7 @@
for _, lDevice := range logicalDevices {
// Create an agent for each device
agent := newLogicalAgent(ctx, lDevice.Id, "", "", ldMgr, ldMgr.deviceMgr, ldMgr.dbPath, ldMgr.ldProxy, ldMgr.internalTimeout)
- if err := agent.start(ctx, true, lDevice); err != nil {
- logger.Warnw(ctx, "failure-starting-logical-agent", log.Fields{"logical-device-id": lDevice.Id})
- } else {
- ldMgr.logicalDeviceAgents.Store(agent.logicalDeviceID, agent)
- }
+ go agent.start(ctx, true, lDevice)
}
probe.UpdateStatusFromContext(ctx, serviceName, probe.ServiceStatusRunning)
@@ -161,7 +157,6 @@
logger.Debugw(ctx, "logical-device-id", log.Fields{"logical-device-id": id})
agent := newLogicalAgent(ctx, id, sn, device.Id, ldMgr, ldMgr.deviceMgr, ldMgr.dbPath, ldMgr.ldProxy, ldMgr.internalTimeout)
- ldMgr.addLogicalDeviceAgentToMap(agent)
// Update the root device with the logical device Id reference
if err := ldMgr.deviceMgr.setParentID(ctx, device, id); err != nil {
@@ -169,11 +164,7 @@
return nil, err
}
- err := agent.start(ctx, false, nil)
- if err != nil {
- logger.Errorw(ctx, "unable-to-create-the-logical-device", log.Fields{"error": err})
- ldMgr.deleteLogicalDeviceAgent(id)
- }
+ go agent.start(ctx, false, nil)
logger.Debug(ctx, "creating-logical-device-ends")
return &id, nil
@@ -229,10 +220,7 @@
if _, err := ldMgr.getLogicalDeviceFromModel(ctx, lDeviceID); err == nil {
logger.Debugw(ctx, "loading-logical-device", log.Fields{"lDeviceId": lDeviceID})
agent := newLogicalAgent(ctx, lDeviceID, "", "", ldMgr, ldMgr.deviceMgr, ldMgr.dbPath, ldMgr.ldProxy, ldMgr.internalTimeout)
- if err := agent.start(ctx, true, nil); err != nil {
- return err
- }
- ldMgr.logicalDeviceAgents.Store(agent.logicalDeviceID, agent)
+ go agent.start(ctx, true, nil)
} else {
logger.Debugw(ctx, "logical-device-not-in-model", log.Fields{"logical-device-id": lDeviceID})
}