Chip Boling | 32aab30 | 2019-01-23 10:50:18 -0600 | [diff] [blame] | 1 | # |
| 2 | # Copyright 2017 the original author or authors. |
| 3 | # |
| 4 | # Licensed under the Apache License, Version 2.0 (the "License"); |
| 5 | # you may not use this file except in compliance with the License. |
| 6 | # You may obtain a copy of the License at |
| 7 | # |
| 8 | # http://www.apache.org/licenses/LICENSE-2.0 |
| 9 | # |
| 10 | # Unless required by applicable law or agreed to in writing, software |
| 11 | # distributed under the License is distributed on an "AS IS" BASIS, |
| 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 13 | # See the License for the specific language governing permissions and |
| 14 | # limitations under the License. |
| 15 | # |
| 16 | from task import Task |
| 17 | from twisted.internet.defer import inlineCallbacks, TimeoutError, failure, returnValue |
| 18 | from twisted.internet import reactor |
| 19 | from common.utils.asleep import asleep |
| 20 | from voltha.extensions.omci.database.mib_db_dict import * |
| 21 | from voltha.extensions.omci.omci_defs import AttributeAccess |
| 22 | from voltha.extensions.omci.database.alarm_db_ext import AlarmDbExternal |
| 23 | |
| 24 | AA = AttributeAccess |
| 25 | |
| 26 | |
| 27 | class AlarmCopyException(Exception): |
| 28 | pass |
| 29 | |
| 30 | |
| 31 | class AlarmDownloadException(Exception): |
| 32 | pass |
| 33 | |
| 34 | |
| 35 | class AlarmResyncException(Exception): |
| 36 | pass |
| 37 | |
| 38 | |
| 39 | class AlarmResyncTask(Task): |
| 40 | """ |
| 41 | OpenOMCI ALARM resynchronization Task |
| 42 | |
| 43 | This task should get a copy of the ALARM and compare compare it to a |
| 44 | copy of the database. When the ALARM Upload command is sent to the ONU, |
| 45 | it should make a copy and source the data requested from this database. |
| 46 | The ONU can still source AVC's and the the OLT can still send config |
| 47 | commands to the actual. |
| 48 | """ |
| 49 | task_priority = Task.DEFAULT_PRIORITY |
| 50 | name = "ALARM Resynchronization Task" |
| 51 | |
| 52 | max_retries = 3 |
| 53 | retry_delay = 7 |
| 54 | |
| 55 | max_alarm_upload_next_retries = 3 |
| 56 | alarm_upload_next_delay = 10 # Max * delay < 60 seconds |
| 57 | |
| 58 | def __init__(self, omci_agent, device_id): |
| 59 | """ |
| 60 | Class initialization |
| 61 | |
| 62 | :param omci_agent: (OmciAdapterAgent) OMCI Adapter agent |
| 63 | :param device_id: (str) ONU Device ID |
| 64 | """ |
| 65 | super(AlarmResyncTask, self).__init__(AlarmResyncTask.name, |
| 66 | omci_agent, |
| 67 | device_id, |
| 68 | priority=AlarmResyncTask.task_priority, |
| 69 | exclusive=False) |
| 70 | self._local_deferred = None |
| 71 | self._device = omci_agent.get_device(device_id) |
| 72 | self._db_active = MibDbVolatileDict(omci_agent) |
| 73 | self._db_active.start() |
| 74 | |
| 75 | def cancel_deferred(self): |
| 76 | super(AlarmResyncTask, self).cancel_deferred() |
| 77 | |
| 78 | d, self._local_deferred = self._local_deferred, None |
| 79 | try: |
| 80 | if d is not None and not d.called: |
| 81 | d.cancel() |
| 82 | except: |
| 83 | pass |
| 84 | |
| 85 | def start(self): |
| 86 | """ |
| 87 | Start ALARM Re-Synchronization task |
| 88 | """ |
| 89 | super(AlarmResyncTask, self).start() |
| 90 | self._local_deferred = reactor.callLater(0, self.perform_alarm_resync) |
| 91 | self._db_active.start() |
| 92 | self._db_active.add(self.device_id) |
| 93 | |
| 94 | def stop(self): |
| 95 | """ |
| 96 | Shutdown ALARM Re-Synchronization task |
| 97 | """ |
| 98 | self.log.debug('stopping') |
| 99 | |
| 100 | self.cancel_deferred() |
| 101 | self._device = None |
| 102 | self._db_active.stop() |
| 103 | self._db_active = None |
| 104 | super(AlarmResyncTask, self).stop() |
| 105 | |
| 106 | @inlineCallbacks |
| 107 | def perform_alarm_resync(self): |
| 108 | """ |
| 109 | Perform the ALARM Resynchronization sequence |
| 110 | |
| 111 | The sequence to be performed is: |
| 112 | - get a copy of the current ALARM database |
| 113 | |
| 114 | - perform ALARM upload commands to get ONU's database and save this |
| 115 | to a local DB. |
| 116 | During the alarm upload process, the maximum time between alarm upload next |
| 117 | requests is 1 minute. |
| 118 | """ |
| 119 | self.log.debug('perform-alarm-resync') |
| 120 | |
| 121 | try: |
| 122 | self.strobe_watchdog() |
| 123 | results = yield self.snapshot_alarm() |
| 124 | olt_db_copy = results[0] |
| 125 | number_of_commands = results[1] |
| 126 | |
| 127 | if olt_db_copy is None: |
| 128 | e = AlarmCopyException('Failed to get local database copy') |
| 129 | self.deferred.errback(failure.Failure(e)) |
| 130 | else: |
| 131 | # Start the ALARM upload sequence, save alarms to the table |
| 132 | self.strobe_watchdog() |
| 133 | |
| 134 | if number_of_commands > 0: |
| 135 | commands_retrieved = yield self.upload_alarm(number_of_commands) |
| 136 | else: |
| 137 | commands_retrieved = 0 |
| 138 | |
| 139 | if commands_retrieved != number_of_commands: |
| 140 | e = AlarmDownloadException('Only retrieved {} of {} instances'. |
| 141 | format(commands_retrieved, number_of_commands)) |
| 142 | self.deferred.errback(failure.Failure(e)) |
| 143 | else: |
| 144 | # Compare the databases |
| 145 | onu_db_copy = self._db_active.query(self.device_id) |
| 146 | |
| 147 | on_olt_only, on_onu_only, attr_diffs = \ |
| 148 | self.compare_mibs(olt_db_copy, onu_db_copy) |
| 149 | |
| 150 | on_olt_only = on_olt_only if len(on_olt_only) else None |
| 151 | on_onu_only = on_onu_only if len(on_onu_only) else None |
| 152 | attr_diffs = attr_diffs if len(attr_diffs) else None |
| 153 | |
| 154 | on_olt_only_diffs = on_olt_only if on_olt_only and len(on_olt_only) else None |
| 155 | on_onu_only_diffs = on_onu_only if on_onu_only and len(on_onu_only) else None |
| 156 | attr_diffs = attr_diffs if attr_diffs and len(attr_diffs) else None |
| 157 | |
| 158 | if all(diff is None for diff in [on_olt_only_diffs, on_onu_only_diffs, attr_diffs]): |
| 159 | results = None |
| 160 | else: |
| 161 | results = { |
| 162 | 'onu-only': on_onu_only_diffs, |
| 163 | 'olt-only': on_olt_only_diffs, |
| 164 | 'attr-diffs': attr_diffs, |
| 165 | 'onu-db': onu_db_copy, |
| 166 | 'olt-db': olt_db_copy |
| 167 | } |
| 168 | self.deferred.callback(results) |
| 169 | |
| 170 | except Exception as e: |
| 171 | self.log.exception('resync', e=e) |
| 172 | self.deferred.errback(failure.Failure(e)) |
| 173 | |
| 174 | @inlineCallbacks |
| 175 | def snapshot_alarm(self): |
| 176 | """ |
| 177 | Snapshot the ALARM on the ONU and create a copy of our local ALARM database |
| 178 | |
| 179 | :return: (pair) (command_sequence_number) |
| 180 | """ |
| 181 | olt_db_copy = None |
| 182 | command_sequence_number = None |
| 183 | |
| 184 | try: |
| 185 | max_tries = AlarmResyncTask.max_retries - 1 |
| 186 | |
| 187 | for retries in xrange(0, max_tries + 1): |
| 188 | # Send ALARM Upload so ONU snapshots its ALARM |
| 189 | try: |
| 190 | command_sequence_number = yield self.send_alarm_upload() |
| 191 | self.strobe_watchdog() |
| 192 | |
| 193 | if command_sequence_number is None: |
| 194 | if retries >= max_tries: |
| 195 | olt_db_copy = None |
| 196 | break |
| 197 | |
| 198 | except TimeoutError as e: |
| 199 | self.log.warn('timeout', e=e) |
| 200 | if retries >= max_tries: |
| 201 | raise |
| 202 | |
| 203 | self.strobe_watchdog() |
| 204 | yield asleep(AlarmResyncTask.retry_delay) |
| 205 | continue |
| 206 | |
| 207 | # Get a snapshot of the local MIB database |
| 208 | olt_db_copy = self._device.query_alarm_table() |
| 209 | # if we made it this far, no need to keep trying |
| 210 | break |
| 211 | |
| 212 | except Exception as e: |
| 213 | self.log.exception('alarm-resync', e=e) |
| 214 | raise |
| 215 | |
| 216 | # Handle initial failures |
| 217 | |
| 218 | if olt_db_copy is None or command_sequence_number is None: |
| 219 | raise AlarmCopyException('Failed to snapshot ALARM copy after {} retries'. |
| 220 | format(AlarmResyncTask.max_retries)) |
| 221 | |
| 222 | returnValue((olt_db_copy, command_sequence_number)) |
| 223 | |
| 224 | @inlineCallbacks |
| 225 | def send_alarm_upload(self): |
| 226 | """ |
| 227 | Perform ALARM upload command and get the number of entries to retrieve |
| 228 | |
| 229 | :return: (int) Number of commands to execute or None on error |
| 230 | """ |
| 231 | ######################################## |
| 232 | # Begin ALARM Upload |
| 233 | try: |
| 234 | results = yield self._device.omci_cc.send_get_all_alarm() |
| 235 | self.strobe_watchdog() |
| 236 | command_sequence_number = results.fields['omci_message'].fields['number_of_commands'] |
| 237 | |
| 238 | if command_sequence_number < 0: |
| 239 | raise ValueError('Number of commands was {}'.format(command_sequence_number)) |
| 240 | |
| 241 | returnValue(command_sequence_number) |
| 242 | |
| 243 | except TimeoutError as e: |
| 244 | self.log.warn('alarm-resync-get-timeout', e=e) |
| 245 | raise |
| 246 | |
| 247 | @inlineCallbacks |
| 248 | def upload_alarm(self, command_sequence_number): |
| 249 | ######################################## |
| 250 | # Begin ALARM Upload |
| 251 | seq_no = None |
| 252 | |
| 253 | for seq_no in xrange(command_sequence_number): |
| 254 | max_tries = AlarmResyncTask.max_alarm_upload_next_retries |
| 255 | |
| 256 | for retries in xrange(0, max_tries): |
| 257 | try: |
| 258 | response = yield self._device.omci_cc.send_get_all_alarm_next(seq_no) |
| 259 | self.strobe_watchdog() |
| 260 | |
| 261 | omci_msg = response.fields['omci_message'].fields |
| 262 | alarm_class_id = omci_msg['alarmed_entity_class'] |
| 263 | alarm_entity_id = omci_msg['alarmed_entity_id'] |
| 264 | |
| 265 | alarm_bit_map = omci_msg['alarm_bit_map'] |
| 266 | attributes = {AlarmDbExternal.ALARM_BITMAP_KEY: alarm_bit_map} |
| 267 | |
| 268 | # Save to the database |
| 269 | self._db_active.set(self.device_id, alarm_class_id, |
| 270 | alarm_entity_id, attributes) |
| 271 | break |
| 272 | |
| 273 | except TimeoutError: |
| 274 | self.log.warn('alarm-resync-timeout', seq_no=seq_no, |
| 275 | command_sequence_number=command_sequence_number) |
| 276 | |
| 277 | if retries < max_tries - 1: |
| 278 | yield asleep(AlarmResyncTask.alarm_upload_next_delay) |
| 279 | self.strobe_watchdog() |
| 280 | else: |
| 281 | raise |
| 282 | |
| 283 | except Exception as e: |
| 284 | self.log.exception('resync', e=e, seq_no=seq_no, |
| 285 | command_sequence_number=command_sequence_number) |
| 286 | |
| 287 | returnValue(seq_no + 1) # seq_no is zero based and alarm table. |
| 288 | |
| 289 | def compare_mibs(self, db_copy, db_active): |
| 290 | """ |
| 291 | Compare the our db_copy with the ONU's active copy |
| 292 | |
| 293 | :param db_copy: (dict) OpenOMCI's copy of the database |
| 294 | :param db_active: (dict) ONU's database snapshot |
| 295 | :return: (dict), (dict), dict() Differences |
| 296 | """ |
| 297 | self.strobe_watchdog() |
| 298 | |
| 299 | # Class & Entities only in local copy (OpenOMCI) |
| 300 | on_olt_only = self.get_lsh_only_dict(db_copy, db_active) |
| 301 | |
| 302 | # Class & Entities only on remote (ONU) |
| 303 | on_onu_only = self.get_lsh_only_dict(db_active, db_copy) |
| 304 | |
| 305 | # Class & Entities on both local & remote, but one or more attributes |
| 306 | # are different on the ONU. This is the value that the local (OpenOMCI) |
| 307 | # thinks should be on the remote (ONU) |
| 308 | |
| 309 | me_map = self.omci_agent.get_device(self.device_id).me_map |
| 310 | attr_diffs = self.get_attribute_diffs(db_copy, db_active, me_map) |
| 311 | |
| 312 | return on_olt_only, on_onu_only, attr_diffs |
| 313 | |
| 314 | def get_lsh_only_dict(self, lhs, rhs): |
| 315 | """ |
| 316 | Compare two MIB database dictionaries and return the ME Class ID and |
| 317 | instances that are unique to the lhs dictionary. Both parameters |
| 318 | should be in the common MIB Database output dictionary format that |
| 319 | is returned by the mib 'query' command. |
| 320 | |
| 321 | :param lhs: (dict) Left-hand-side argument. |
| 322 | :param rhs: (dict) Right-hand-side argument |
| 323 | |
| 324 | return: (list(int,int)) List of tuples where (class_id, inst_id) |
| 325 | """ |
| 326 | results = list() |
| 327 | |
| 328 | for cls_id, cls_data in lhs.items(): |
| 329 | # Get unique classes |
| 330 | # |
| 331 | # Skip keys that are not class IDs |
| 332 | if not isinstance(cls_id, int): |
| 333 | continue |
| 334 | |
| 335 | if cls_id not in rhs: |
| 336 | results.extend([(cls_id, inst_id) for inst_id in cls_data.keys() |
| 337 | if isinstance(inst_id, int)]) |
| 338 | else: |
| 339 | # Get unique instances of a class |
| 340 | lhs_cls = cls_data |
| 341 | rhs_cls = rhs[cls_id] |
| 342 | |
| 343 | for inst_id, _ in lhs_cls.items(): |
| 344 | # Skip keys that are not instance IDs |
| 345 | if isinstance(cls_id, int) and inst_id not in rhs_cls: |
| 346 | results.extend([(cls_id, inst_id)]) |
| 347 | |
| 348 | return results |
| 349 | |
| 350 | def get_attribute_diffs(self, omci_copy, onu_copy, me_map): |
| 351 | """ |
| 352 | Compare two OMCI MIBs and return the ME class and instance IDs that exists |
| 353 | on both the local copy and the remote ONU that have different attribute |
| 354 | values. Both parameters should be in the common MIB Database output |
| 355 | dictionary format that is returned by the mib 'query' command. |
| 356 | |
| 357 | :param omci_copy: (dict) OpenOMCI copy (OLT-side) of the MIB Database |
| 358 | :param onu_copy: (dict) active ONU latest copy its database |
| 359 | :param me_map: (dict) ME Class ID MAP for this ONU |
| 360 | |
| 361 | return: (list(int,int,str)) List of tuples where (class_id, inst_id, attribute) |
| 362 | points to the specific ME instance where attributes |
| 363 | are different |
| 364 | """ |
| 365 | results = list() |
| 366 | |
| 367 | # Get class ID's that are in both |
| 368 | class_ids = {cls_id for cls_id, _ in omci_copy.items() |
| 369 | if isinstance(cls_id, int) and cls_id in onu_copy} |
| 370 | |
| 371 | for cls_id in class_ids: |
| 372 | # Get unique instances of a class |
| 373 | olt_cls = omci_copy[cls_id] |
| 374 | onu_cls = onu_copy[cls_id] |
| 375 | |
| 376 | # Get set of common instance IDs |
| 377 | inst_ids = {inst_id for inst_id, _ in olt_cls.items() |
| 378 | if isinstance(inst_id, int) and inst_id in onu_cls} |
| 379 | |
| 380 | for inst_id in inst_ids: |
| 381 | omci_attributes = {k for k in olt_cls[inst_id][ATTRIBUTES_KEY].iterkeys()} |
| 382 | onu_attributes = {k for k in onu_cls[inst_id][ATTRIBUTES_KEY].iterkeys()} |
| 383 | |
| 384 | # Get attributes that exist in one database, but not the other |
| 385 | sym_diffs = (omci_attributes ^ onu_attributes) |
| 386 | results.extend([(cls_id, inst_id, attr) for attr in sym_diffs]) |
| 387 | |
| 388 | # Get common attributes with different values |
| 389 | common_attributes = (omci_attributes & onu_attributes) |
| 390 | results.extend([(cls_id, inst_id, attr) for attr in common_attributes |
| 391 | if olt_cls[inst_id][ATTRIBUTES_KEY][attr] != |
| 392 | onu_cls[inst_id][ATTRIBUTES_KEY][attr]]) |
| 393 | return results |