[VOL-2442] Fix for Core panic
The logical device agent was receiving logical ports creation
while the logical device was not ready - it was waiting for
switch capability data from the OLT device. This was causing
a panic. The fix prevents logical port creation to be
trigerred when the logical device is not ready. Once the
logical device is ready it will go over the ports data from the
OLT device and automatically create the logical ports.
Change-Id: Iad62302eda80fa158e59852810ad272a8aeedb7b
diff --git a/rw_core/core/common_test.go b/rw_core/core/common_test.go
index 7f6412d..9098f0f 100644
--- a/rw_core/core/common_test.go
+++ b/rw_core/core/common_test.go
@@ -55,6 +55,7 @@
type isLogicalDeviceConditionSatisfied func(ld *voltha.LogicalDevice) bool
type isDeviceConditionSatisfied func(ld *voltha.Device) bool
type isDevicesConditionSatisfied func(ds *voltha.Devices) bool
+type isLogicalDevicesConditionSatisfied func(lds *voltha.LogicalDevices) bool
func init() {
_, err := log.AddPackage(log.JSON, logLevel, log.Fields{"instanceId": "coreTests"})
@@ -244,3 +245,31 @@
return fmt.Errorf("timeout-waiting-devices")
}
}
+
+func waitUntilConditionForLogicalDevices(timeout time.Duration, nbi *APIHandler, verificationFunction isLogicalDevicesConditionSatisfied) error {
+ ch := make(chan int, 1)
+ done := false
+ go func() {
+ for {
+ lDevices, _ := nbi.ListLogicalDevices(getContext(), &empty.Empty{})
+ if verificationFunction(lDevices) {
+ ch <- 1
+ break
+ }
+ if done {
+ break
+ }
+
+ time.Sleep(retryInterval)
+ }
+ }()
+ timer := time.NewTimer(timeout)
+ defer timer.Stop()
+ select {
+ case <-ch:
+ return nil
+ case <-timer.C:
+ done = true
+ return fmt.Errorf("timeout-waiting-logical-devices")
+ }
+}
diff --git a/rw_core/core/grpc_nbi_api_handler.go b/rw_core/core/grpc_nbi_api_handler.go
index 52f8c3b..2da4d9e 100755
--- a/rw_core/core/grpc_nbi_api_handler.go
+++ b/rw_core/core/grpc_nbi_api_handler.go
@@ -953,8 +953,11 @@
// request. For performance reason we can let both Cores in a Core-Pair forward the Packet to the adapters and
// let once of the shim layer (kafka proxy or adapter request handler filters out the duplicate packet)
if ownedByMe, err := handler.core.deviceOwnership.OwnedByMe(&utils.LogicalDeviceID{ID: packet.Id}); ownedByMe && err == nil {
- agent := handler.logicalDeviceMgr.getLogicalDeviceAgent(packet.Id)
- agent.packetOut(packet.PacketOut)
+ if agent := handler.logicalDeviceMgr.getLogicalDeviceAgent(packet.Id); agent != nil {
+ agent.packetOut(packet.PacketOut)
+ } else {
+ log.Errorf("No logical device agent present", log.Fields{"logicaldeviceID": packet.Id})
+ }
}
}
diff --git a/rw_core/core/grpc_nbi_api_handler_test.go b/rw_core/core/grpc_nbi_api_handler_test.go
index 4acb8aa..e0b6f64 100755
--- a/rw_core/core/grpc_nbi_api_handler_test.go
+++ b/rw_core/core/grpc_nbi_api_handler_test.go
@@ -63,7 +63,7 @@
test.oltAdapterName = "olt_adapter_mock"
test.onuAdapterName = "onu_adapter_mock"
test.coreInstanceID = "rw-nbi-test"
- test.defaultTimeout = 5 * time.Second
+ test.defaultTimeout = 10 * time.Second
test.maxTimeout = 20 * time.Second
return test
}
@@ -74,6 +74,7 @@
cfg := config.NewRWCoreFlags()
cfg.CorePairTopic = "rw_core"
cfg.DefaultRequestTimeout = nb.defaultTimeout.Nanoseconds() / 1000000 //TODO: change when Core changes to Duration
+ cfg.DefaultCoreTimeout = nb.defaultTimeout.Nanoseconds() / 1000000
cfg.KVStorePort = nb.kvClientPort
cfg.InCompetingMode = inCompeteMode
grpcPort, err := freeport.GetFreePort()
@@ -472,6 +473,50 @@
assert.Nil(t, err)
}
+func (nb *NBTest) testDisableAndDeleteAllDevice(t *testing.T, nbi *APIHandler) {
+ //Get an OLT device
+ oltDevice, err := nb.getADevice(true, nbi)
+ assert.Nil(t, err)
+ assert.NotNil(t, oltDevice)
+
+ // Disable the oltDevice
+ _, err = nbi.DisableDevice(getContext(), &voltha.ID{Id: oltDevice.Id})
+ assert.Nil(t, err)
+
+ // Wait for the olt device to be disabled
+ var vdFunction isDeviceConditionSatisfied = func(device *voltha.Device) bool {
+ return device.AdminState == voltha.AdminState_DISABLED && device.OperStatus == voltha.OperStatus_UNKNOWN
+ }
+ err = waitUntilDeviceReadiness(oltDevice.Id, nb.maxTimeout, vdFunction, nbi)
+ assert.Nil(t, err)
+
+ // Verify that all onu devices are disabled as well
+ onuDevices, err := nb.core.deviceMgr.getAllChildDevices(oltDevice.Id)
+ assert.Nil(t, err)
+ for _, onu := range onuDevices.Items {
+ err = waitUntilDeviceReadiness(onu.Id, nb.maxTimeout, vdFunction, nbi)
+ assert.Nil(t, err)
+ }
+
+ // Delete the oltDevice
+ _, err = nbi.DeleteDevice(getContext(), &voltha.ID{Id: oltDevice.Id})
+ assert.Nil(t, err)
+
+ var vFunction isDevicesConditionSatisfied = func(devices *voltha.Devices) bool {
+ return devices != nil && len(devices.Items) == 0
+ }
+ err = waitUntilConditionForDevices(nb.maxTimeout, nbi, vFunction)
+ assert.Nil(t, err)
+
+ // Wait for absence of logical device
+ var vlFunction isLogicalDevicesConditionSatisfied = func(lds *voltha.LogicalDevices) bool {
+ return lds != nil && len(lds.Items) == 0
+ }
+
+ err = waitUntilConditionForLogicalDevices(nb.maxTimeout, nbi, vlFunction)
+ assert.Nil(t, err)
+}
+
func TestSuite1(t *testing.T) {
nb := newNBTest()
assert.NotNil(t, nb)
@@ -493,14 +538,20 @@
// 2. Test adapter registration
nb.testAdapterRegistration(t, nbi)
- // 3. Test create device
- nb.testCreateDevice(t, nbi)
+ numberOfDeviceTestRuns := 2
+ for i := 1; i <= numberOfDeviceTestRuns; i++ {
+ // 3. Test create device
+ nb.testCreateDevice(t, nbi)
- // 4. Test Enable a device
- nb.testEnableDevice(t, nbi)
+ // 4. Test Enable a device
+ nb.testEnableDevice(t, nbi)
- // 5. Test disable and ReEnable a root device
- nb.testDisableAndReEnableRootDevice(t, nbi)
+ // 5. Test disable and ReEnable a root device
+ nb.testDisableAndReEnableRootDevice(t, nbi)
+
+ // 6. Test disable and delete all devices
+ nb.testDisableAndDeleteAllDevice(t, nbi)
+ }
//x. TODO - More tests to come
}
diff --git a/rw_core/core/logical_device_manager.go b/rw_core/core/logical_device_manager.go
index 6f8b2fa..6783e29 100644
--- a/rw_core/core/logical_device_manager.go
+++ b/rw_core/core/logical_device_manager.go
@@ -99,7 +99,15 @@
func (ldMgr *LogicalDeviceManager) getLogicalDeviceAgent(logicalDeviceID string) *LogicalDeviceAgent {
agent, ok := ldMgr.logicalDeviceAgents.Load(logicalDeviceID)
if ok {
- return agent.(*LogicalDeviceAgent)
+ lda := agent.(*LogicalDeviceAgent)
+ if lda.logicalDevice == nil {
+ // This can happen when an agent for the logical device has been created but the logical device
+ // itself is not ready for action as it is waiting for switch and port capabilities from the
+ // relevant adapter. In such a case prevent any request aimed at that logical device.
+ log.Debugf("Logical device %s is not ready to serve requests", logicalDeviceID)
+ return nil
+ }
+ return lda
}
// Try to load into memory - loading will also create the logical device agent
if err := ldMgr.load(logicalDeviceID); err == nil {
diff --git a/rw_core/graph/device_graph.go b/rw_core/graph/device_graph.go
index 4c89134..02ad35d 100644
--- a/rw_core/graph/device_graph.go
+++ b/rw_core/graph/device_graph.go
@@ -121,7 +121,7 @@
//ComputeRoutes creates a device graph from the logical ports and then calculates all the routes
//between the logical ports. This will clear up the graph and routes if there were any.
func (dg *DeviceGraph) ComputeRoutes(lps []*voltha.LogicalPort) {
- if dg == nil {
+ if dg == nil || len(lps) == 0 {
return
}
dg.graphBuildLock.Lock()