VOL-1148: ADTRAN OLT: Allow initial activation even if OLT is not reachable
Change-Id: I961635192123ac6ad8f04b895cceedf487defc86
diff --git a/voltha/adapters/adtran_olt/adtran_device_handler.py b/voltha/adapters/adtran_olt/adtran_device_handler.py
index 9cc0cdf..3dbbb98 100644
--- a/voltha/adapters/adtran_olt/adtran_device_handler.py
+++ b/voltha/adapters/adtran_olt/adtran_device_handler.py
@@ -33,7 +33,7 @@
from voltha.protos.openflow_13_pb2 import ofp_desc, ofp_switch_features, OFPC_PORT_STATS, \
OFPC_GROUP_STATS, OFPC_TABLE_STATS, OFPC_FLOW_STATS
from voltha.extensions.alarms.adapter_alarms import AdapterAlarms
-from voltha.extensions.pki.olt.olt_pm_metrics import OltPmMetrics
+from voltha.extensions.kpi.olt.olt_pm_metrics import OltPmMetrics
from common.utils.asleep import asleep
_ = third_party
@@ -51,6 +51,8 @@
_DEFAULT_NETCONF_PASSWORD = ""
_DEFAULT_NETCONF_PORT = 830
+_STARTUP_RETRY_TIMEOUT = 5 # 5 seconds delay after activate failed before we
+
class AdtranDeviceHandler(object):
"""
@@ -107,6 +109,7 @@
self.utility_vlan = DEFAULT_UTILITY_VLAN
self.mac_address = '00:13:95:00:00:00'
self._rest_support = None
+ self._initial_enable_complete = False
# Northbound and Southbound ports
self.northbound_ports = {} # port number -> Port
@@ -310,12 +313,10 @@
self.log.exception('option_parsing_error: {}'.format(e.message))
@inlineCallbacks
- def activate(self, device, done_deferred=None, reconciling=False):
+ def activate(self, done_deferred, reconciling):
"""
Activate the OLT device
- :param device: A voltha.Device object, with possible device-type
- specific extensions.
:param done_deferred: (Deferred) Deferred to fire when done
:param reconciling: If True, this adapter is taking over for a previous adapter
for an existing OLT
@@ -323,6 +324,8 @@
self.log.info('AdtranDeviceHandler.activating', reconciling=reconciling)
if self.logical_device_id is None:
+ device = self.adapter_agent.get_device(self.device_id)
+
try:
# Parse our command line options for this device
self.parse_provisioning_options(device)
@@ -334,32 +337,39 @@
############################################################################
# Start initial discovery of NETCONF support (if any)
try:
+ device.reason = 'establishing NETCONF connection'
+ self.adapter_agent.update_device(device)
+
self.startup = self.make_netconf_connection()
yield self.startup
except Exception as e:
self.log.exception('netconf-connection', e=e)
- self.activate_failed(device, e.message, reachable=False)
+ returnValue(self.restart_activate(done_deferred, reconciling))
############################################################################
# Update access information on network device for full protocol support
try:
+ device.reason = 'device networking validation'
+ self.adapter_agent.update_device(device)
self.startup = self.ready_network_access()
yield self.startup
except Exception as e:
self.log.exception('network-setup', e=e)
- self.activate_failed(device, e.message, reachable=False)
+ returnValue(self.restart_activate(done_deferred, reconciling))
############################################################################
# Restconf setup
try:
+ device.reason = 'establishing RESTConf connections'
+ self.adapter_agent.update_device(device)
self.startup = self.make_restconf_connection()
yield self.startup
except Exception as e:
self.log.exception('restconf-setup', e=e)
- self.activate_failed(device, e.message, reachable=False)
+ returnValue(self.restart_activate(done_deferred, reconciling))
############################################################################
# Get the device Information
@@ -369,6 +379,8 @@
self.adapter_agent.update_device(device)
else:
try:
+ device.reason = 'retrieving device information'
+ self.adapter_agent.update_device(device)
self.startup = self.get_device_info(device)
results = yield self.startup
@@ -385,12 +397,12 @@
except Exception as e:
self.log.exception('device-info', e=e)
- self.activate_failed(device, e.message, reachable=False)
+ returnValue(self.restart_activate(done_deferred, reconciling))
try:
# Enumerate and create Northbound NNI interfaces
- device.reason = 'Enumerating NNI Interfaces'
+ device.reason = 'enumerating northbound interfaces'
self.adapter_agent.update_device(device)
self.startup = self.enumerate_northbound_ports(device)
results = yield self.startup
@@ -398,7 +410,7 @@
self.startup = self.process_northbound_ports(device, results)
yield self.startup
- device.reason = 'Adding NNI Interfaces to Adapter'
+ device.reason = 'adding northbound interfaces to adapter'
self.adapter_agent.update_device(device)
if not reconciling:
@@ -407,12 +419,12 @@
except Exception as e:
self.log.exception('NNI-enumeration', e=e)
- self.activate_failed(device, e.message)
+ returnValue(self.restart_activate(done_deferred, reconciling))
try:
# Enumerate and create southbound interfaces
- device.reason = 'Enumerating PON Interfaces'
+ device.reason = 'enumerating southbound interfaces'
self.adapter_agent.update_device(device)
self.startup = self.enumerate_southbound_ports(device)
results = yield self.startup
@@ -420,7 +432,7 @@
self.startup = self.process_southbound_ports(device, results)
yield self.startup
- device.reason = 'Adding PON Interfaces to Adapter'
+ device.reason = 'adding southbound interfaces to adapter'
self.adapter_agent.update_device(device)
if not reconciling:
@@ -429,7 +441,7 @@
except Exception as e:
self.log.exception('PON_enumeration', e=e)
- self.activate_failed(device, e.message)
+ returnValue(self.restart_activate(done_deferred, reconciling))
if reconciling:
if device.admin_state == AdminState.ENABLED:
@@ -453,29 +465,32 @@
############################################################################
# Setup PM configuration for this device
- try:
- device.reason = 'Setting up PM configuration'
- self.adapter_agent.update_device(device)
+ if self.pm_metrics is None:
+ try:
+ device.reason = 'setting up Performance Monitoring configuration'
+ self.adapter_agent.update_device(device)
- kwargs = {
- 'nni-ports': self.northbound_ports.values(),
- 'pon-ports': self.southbound_ports.values()
- }
- self.pm_metrics = OltPmMetrics(self.adapter_agent, self.device_id,
- grouped=True, freq_override=False,
- **kwargs)
+ kwargs = {
+ 'nni-ports': self.northbound_ports.values(),
+ 'pon-ports': self.southbound_ports.values()
+ }
+ self.pm_metrics = OltPmMetrics(self.adapter_agent, self.device_id,
+ grouped=True, freq_override=False,
+ **kwargs)
- pm_config = self.pm_metrics.make_proto()
- self.log.debug("initial-pm-config", pm_config=pm_config)
- self.adapter_agent.update_device_pm_config(pm_config, init=True)
+ pm_config = self.pm_metrics.make_proto()
+ self.log.debug("initial-pm-config", pm_config=pm_config)
+ self.adapter_agent.update_device_pm_config(pm_config, init=True)
- except Exception as e:
- self.log.exception('pm-setup', e=e)
- self.activate_failed(device, e.message)
+ except Exception as e:
+ self.log.exception('pm-setup', e=e)
+ self.activate_failed(device, e.message, reachable=False)
############################################################################
# Set the ports in a known good initial state
if not reconciling:
+ device.reason = 'setting device to a known initial state'
+ self.adapter_agent.update_device(device)
try:
for port in self.northbound_ports.itervalues():
self.startup = yield port.reset()
@@ -485,24 +500,24 @@
except Exception as e:
self.log.exception('port-reset', e=e)
- self.activate_failed(device, e.message)
+ returnValue(self.restart_activate(done_deferred, reconciling))
############################################################################
# Create logical ports for all southbound and northbound interfaces
try:
- device.reason = 'Creating logical ports'
+ device.reason = 'creating logical ports'
self.adapter_agent.update_device(device)
self.startup = self.create_logical_ports(device, ld_initialized, reconciling)
yield self.startup
except Exception as e:
self.log.exception('logical-port', e=e)
- self.activate_failed(device, e.message)
+ returnValue(self.restart_activate(done_deferred, reconciling))
############################################################################
# Setup Alarm handler
- device.reason = 'Setting up Adapter Alarms'
+ device.reason = 'setting up adapter alarms'
self.adapter_agent.update_device(device)
self.alarms = AdapterAlarms(self.adapter_agent, device.id, ld_initialized.id)
@@ -514,15 +529,16 @@
# Complete device specific steps
try:
self.log.debug('device-activation-procedures')
+ device.reason = 'performing model specific activation procedures'
+ self.adapter_agent.update_device(device)
self.startup = self.complete_device_specific_activation(device, reconciling)
yield self.startup
except Exception as e:
self.log.exception('device-activation-procedures', e=e)
- self.activate_failed(device, e.message)
+ returnValue(self.restart_activate(done_deferred, reconciling))
# Schedule the heartbeat for the device
-
self.log.debug('starting-heartbeat')
self.start_heartbeat(delay=10)
@@ -537,18 +553,38 @@
reactor.callLater(10, self.pm_metrics.start_collector)
# Signal completion
+ self._initial_enable_complete = True
self.log.info('activated')
except Exception as e:
self.log.exception('activate', e=e)
if done_deferred is not None:
done_deferred.errback(e)
- raise
if done_deferred is not None:
done_deferred.callback('activated')
- returnValue(done_deferred)
+ returnValue('activated')
+
+ def restart_activate(self, done_deferred, reconciling):
+ """
+ Startup activation failed, pause a short period of time and retry
+
+ :param done_deferred: (deferred) Deferred to fire upon completion of activation
+ :param reconciling: (bool) If true, we are reconciling after moving to a new vCore
+ """
+ d, self.startup = self.startup, None
+ try:
+ if d is not None and not d.called:
+ d.cancel()
+ except:
+ pass
+ device = self.adapter_agent.get_device(self.device_id)
+ device.reason = 'Failed during {}, retrying'.format(device.reason)
+ self.adapter_agent.update_device(device)
+ self.startup = reactor.callLater(_STARTUP_RETRY_TIMEOUT, self.activate,
+ done_deferred, reconciling)
+ return 'retrying'
@inlineCallbacks
def ready_network_access(self):
@@ -847,8 +883,11 @@
d.cancel()
except:
pass
+
# Get the latest device reference
device = self.adapter_agent.get_device(self.device_id)
+ device.reason = 'Disabling'
+ self.adapter_agent.update_device(device)
# Drop registration for ONU detection
# self.adapter_agent.unregister_for_onu_detect_state(self.device.id)
@@ -901,10 +940,11 @@
self._rest_client = None
# Shutdown communications with OLT
-
self.startup.addCallbacks(_drop_netconf, _null_clients)
self.startup.addCallbacks(_null_clients, _null_clients)
+ device.reason = ''
+ self.adapter_agent.update_device(device)
self.log.info('disabled', device_id=device.id)
return self.startup
@@ -923,6 +963,12 @@
d.cancel()
except:
pass
+
+ if not self._initial_enable_complete:
+ # Never contacted the device on the initial startup, do 'activate' steps instead
+ self.startup = reactor.callLater(0, self.activate, done_deferred, False)
+ returnValue('activating')
+
# Get the latest device reference
device = self.adapter_agent.get_device(self.device_id)
@@ -1008,6 +1054,10 @@
"""
self.log.debug('reboot')
+ if not self._initial_enable_complete:
+ # Never contacted the device on the initial startup, do 'activate' steps instead
+ returnValue('failed')
+
# Cancel any running enable/disable/... in progress
d, self.startup = self.startup, None
try:
@@ -1179,6 +1229,12 @@
h.cancel()
except:
pass
+
+ # Get the latest device reference
+ device = self.adapter_agent.get_device(self.device_id)
+ device.reason = 'Deleting'
+ self.adapter_agent.update_device(device)
+
# self.adapter_agent.unregister_for_onu_detect_state(self.device.id)
# Remove all flows from the device
diff --git a/voltha/adapters/adtran_olt/adtran_olt.py b/voltha/adapters/adtran_olt/adtran_olt.py
index 3eef205..6b867bf 100644
--- a/voltha/adapters/adtran_olt/adtran_olt.py
+++ b/voltha/adapters/adtran_olt/adtran_olt.py
@@ -52,7 +52,7 @@
self.descriptor = Adapter(
id=self.name,
vendor='Adtran Inc.',
- version='0.18',
+ version='0.19',
config=AdapterConfig(log_level=LogLevel.INFO)
)
log.debug('adtran_olt.__init__', adapter_agent=adapter_agent)
@@ -139,8 +139,7 @@
}
self.devices_handlers[device.id] = AdtranOltHandler(**kwargs)
d = defer.Deferred()
- reactor.callLater(0, self.devices_handlers[device.id].activate,
- device, done_deferred=d)
+ reactor.callLater(0, self.devices_handlers[device.id].activate, d, False)
return d
def reconcile_device(self, device):
@@ -161,8 +160,7 @@
}
self.devices_handlers[device.id] = AdtranOltHandler(**kwargs)
d = defer.Deferred()
- reactor.callLater(0, self.devices_handlers[device.id].activate, device,
- done_deferred=d, reconciling=True)
+ reactor.callLater(0, self.devices_handlers[device.id].activate, d, True)
return d
def abandon_device(self, device):
diff --git a/voltha/adapters/adtran_olt/adtran_olt_handler.py b/voltha/adapters/adtran_olt/adtran_olt_handler.py
index 3864c9b..1403e1a 100644
--- a/voltha/adapters/adtran_olt/adtran_olt_handler.py
+++ b/voltha/adapters/adtran_olt/adtran_olt_handler.py
@@ -202,7 +202,8 @@
is_active=(rev_type == 'running-revision'),
is_committed=True,
is_valid=True,
- install_datetime='Not Available')
+ install_datetime='Not Available',
+ hash='Not Available')
device['software-images'].append(image)
except Exception as e:
@@ -267,12 +268,12 @@
for port in results.itervalues():
port_no = port.get('port_no')
assert port_no, 'Port number not found'
- assert port_no not in self.northbound_ports, \
- 'Port number {} already in northbound ports'.format(port_no)
- self.log.info('processing-nni', port_no=port_no, name=port['port_no'])
- self.northbound_ports[port_no] = NniPort(self, **port) if not self.is_virtual_olt \
- else MockNniPort(self, **port)
+ # May already exist if device was not fully reachable when first enabled
+ if port_no not in self.northbound_ports:
+ self.log.info('processing-nni', port_no=port_no, name=port['port_no'])
+ self.northbound_ports[port_no] = NniPort(self, **port) if not self.is_virtual_olt \
+ else MockNniPort(self, **port)
if len(self.northbound_ports) >= self.max_nni_ports: # TODO: For now, limit number of NNI ports to make debugging easier
break
@@ -362,14 +363,14 @@
for pon in results.itervalues():
pon_id = pon.get('pon-id')
assert pon_id is not None, 'PON ID not found'
- assert pon_id not in self.southbound_ports, \
- 'PON ID {} already in southbound ports'.format(pon_id)
if pon['ifIndex'] is None:
pon['port_no'] = self._pon_id_to_port_number(pon_id)
else:
pass # Need to adjust ONU numbering !!!!
- self.southbound_ports[pon_id] = PonPort(self, **pon)
+ # May already exist if device was not fully reachable when first enabled
+ if pon_id not in self.southbound_ports:
+ self.southbound_ports[pon_id] = PonPort(self, **pon)
self.num_southbound_ports = len(self.southbound_ports)
@@ -541,15 +542,19 @@
def reenable(self, done_deferred=None):
super(AdtranOltHandler, self).reenable(done_deferred=done_deferred)
- self.ready_network_access()
- self._zmq_startup()
+ # Only do the re-enable if we fully came up on the very first enable attempt.
+ # If we had not, the base class will have initiated the 'activate' for us
- # Register for adapter messages
- self.adapter_agent.register_for_inter_adapter_messages()
-
- self.status_poll = reactor.callLater(1, self.poll_for_status)
+ if self._initial_enable_complete:
+ self._zmq_startup()
+ self.adapter_agent.register_for_inter_adapter_messages()
+ self.status_poll = reactor.callLater(1, self.poll_for_status)
def reboot(self):
+ if not self._initial_enable_complete:
+ # Never contacted the device on the initial startup, do 'activate' steps instead
+ return
+
self._cancel_deferred()
# Drop registration for adapter messages
@@ -1081,6 +1086,10 @@
log.info('image_download', request=request)
try:
+ if not self._initial_enable_complete:
+ # Never contacted the device on the initial startup, do 'activate' steps instead
+ raise Exception('Device has not finished initial activation')
+
if request.name in self._downloads:
raise Exception("Download request with name '{}' already exists".
format(request.name))
@@ -1106,7 +1115,8 @@
except Exception as e:
self.log.exception('create', e=e)
- request.reason = ImageDownload.UNKNOWN_ERROR
+ request.reason = ImageDownload.UNKNOWN_ERROR if self._initial_enable_complete\
+ else ImageDownload.DEVICE_BUSY
request.state = ImageDownload.DOWNLOAD_FAILED
if not request.additional_info:
request.additional_info = e.message
diff --git a/voltha/adapters/adtran_olt/net/adtran_netconf.py b/voltha/adapters/adtran_olt/net/adtran_netconf.py
index 7c4741a..2671b19 100644
--- a/voltha/adapters/adtran_olt/net/adtran_netconf.py
+++ b/voltha/adapters/adtran_olt/net/adtran_netconf.py
@@ -99,7 +99,7 @@
except SSHError as e:
# Log and rethrow exception so any errBack is called
- log.exception('SSHError-during-connect', e=e)
+ log.warn('SSHError-during-connect', e=e)
raise e
except Exception as e: