VOL-2185:Handle ProbeDeviceCapabilities_ failure
Change-Id: I61c00b57c113a05e6b996b4e932b1da05cf972da
diff --git a/agent/src/core.cc b/agent/src/core.cc
index 8de776f..a3653f3 100644
--- a/agent/src/core.cc
+++ b/agent/src/core.cc
@@ -103,6 +103,10 @@
const std::string downstream = "downstream";
bcmolt_oltid dev_id = 0;
+/* Constants used for retrying some BAL APIs */
+const uint32_t BAL_API_RETRY_TIME_IN_USECS = 1000000;
+const uint32_t MAX_BAL_API_RETRY_COUNT = 5;
+
/* Current session */
static bcmcli_session *current_session;
static bcmcli_entry *api_parent_dir;
@@ -1299,6 +1303,36 @@
return Status::OK;
}
+/* Same as bcmolt_cfg_get but with added logic of retrying the API
+ in case of some specific failures like timeout or object not yet ready
+*/
+bcmos_errno bcmolt_cfg_get_mult_retry(bcmolt_oltid olt, bcmolt_cfg *cfg) {
+ bcmos_errno err;
+ uint32_t current_try = 0;
+
+ while (current_try < MAX_BAL_API_RETRY_COUNT) {
+ err = bcmolt_cfg_get(olt, cfg);
+ current_try++;
+
+ if (err == BCM_ERR_STATE || err == BCM_ERR_TIMEOUT) {
+ OPENOLT_LOG(WARNING, openolt_log_id, "bcmolt_cfg_get: err = %s(%d)\n",bcmos_strerror(err), err);
+ bcmos_usleep(BAL_API_RETRY_TIME_IN_USECS);
+ continue;
+ }
+ else {
+ break;
+ }
+ }
+
+ if (err != BCM_ERR_OK) {
+ OPENOLT_LOG(ERROR, openolt_log_id, "bcmolt_cfg_get tried (%d) times with retry time(%d usecs) err=%d\n",
+ current_try,
+ BAL_API_RETRY_TIME_IN_USECS,
+ err);
+ }
+ return err;
+}
+
Status ProbeDeviceCapabilities_() {
bcmos_errno err;
bcmolt_device_cfg dev_cfg = { };
@@ -1323,11 +1357,11 @@
// code in production code.
err = bcmolt_cfg_get__olt_topology_stub(dev_id, &olt_cfg);
#else
- err = bcmolt_cfg_get(dev_id, &olt_cfg.hdr);
+ err = bcmolt_cfg_get_mult_retry(dev_id, &olt_cfg.hdr);
#endif
if (err) {
- OPENOLT_LOG(ERROR, openolt_log_id, "cfg: Failed to query OLT\n");
- return bcm_to_grpc_err(err, "cfg: Failed to query OLT");
+ OPENOLT_LOG(ERROR, openolt_log_id, "cfg: Failed to query OLT topology\n");
+ return bcm_to_grpc_err(err, "cfg: Failed to query OLT topology");
}
num_of_nni_ports = olt_cfg.data.topology.num_switch_ports;
@@ -1342,16 +1376,18 @@
num_of_pon_ports,
BCM_MAX_DEVS_PER_LINE_CARD);
+ uint32_t num_failed_cfg_gets = 0;
for (int devid = 0; devid < BCM_MAX_DEVS_PER_LINE_CARD; devid++) {
dev_key.device_id = devid;
BCMOLT_CFG_INIT(&dev_cfg, device, dev_key);
BCMOLT_MSG_FIELD_GET(&dev_cfg, firmware_sw_version);
BCMOLT_MSG_FIELD_GET(&dev_cfg, chip_family);
BCMOLT_MSG_FIELD_GET(&dev_cfg, system_mode);
- err = bcmolt_cfg_get(dev_id, &dev_cfg.hdr);
+ err = bcmolt_cfg_get_mult_retry(dev_id, &dev_cfg.hdr);
if (err) {
- OPENOLT_LOG(ERROR, openolt_log_id, "device: Failed to query OLT\n");
- return bcm_to_grpc_err(err, "device: Failed to query OLT");
+ OPENOLT_LOG(WARNING, openolt_log_id,"Failed to query PON MAC Device %d (errno = %d). Skipping the device.\n", devid, err);
+ num_failed_cfg_gets++;
+ continue;
}
std::string bal_version;
@@ -1384,6 +1420,13 @@
bcmos_usleep(500000);
}
+ /* If all the devices returned errors then we tell the caller that this is an error else we work with
+ only the devices that retured success*/
+ if (num_failed_cfg_gets == BCM_MAX_DEVS_PER_LINE_CARD) {
+ OPENOLT_LOG(ERROR, openolt_log_id, "device: Query of all the devices failed\n");
+ return bcm_to_grpc_err(err, "device: All devices failed query");
+ }
+
return Status::OK;
}
#if 0