Wei-Yu Chen | ad55cb8 | 2022-02-15 20:07:01 +0800 | [diff] [blame] | 1 | # SPDX-FileCopyrightText: 2020 The Magma Authors. |
| 2 | # SPDX-FileCopyrightText: 2022 Open Networking Foundation <support@opennetworking.org> |
| 3 | # |
| 4 | # SPDX-License-Identifier: BSD-3-Clause |
Wei-Yu Chen | 49950b9 | 2021-11-08 19:19:18 +0800 | [diff] [blame] | 5 | |
| 6 | import traceback |
| 7 | from abc import abstractmethod |
| 8 | from typing import Any, Dict |
| 9 | |
| 10 | from common.service import MagmaService |
| 11 | import metrics |
| 12 | from data_models.data_model_parameters import ParameterName |
| 13 | from device_config.enodeb_configuration import EnodebConfiguration |
| 14 | from exceptions import ConfigurationError |
| 15 | from logger import EnodebdLogger as logger |
| 16 | from state_machines.enb_acs import EnodebAcsStateMachine |
| 17 | from state_machines.enb_acs_states import EnodebAcsState |
| 18 | from state_machines.timer import StateMachineTimer |
| 19 | from tr069 import models |
| 20 | from tr069.models import Tr069ComplexModel |
| 21 | |
| 22 | |
| 23 | class BasicEnodebAcsStateMachine(EnodebAcsStateMachine): |
| 24 | """ |
| 25 | Most of the EnodebAcsStateMachine classes for each device work about the |
| 26 | same way. Differences lie mainly in the data model, desired configuration, |
| 27 | and the state transition map. |
| 28 | |
| 29 | This class specifies the shared implementation between them. |
| 30 | """ |
| 31 | |
| 32 | # eNodeB connection timeout is used to determine whether or not eNodeB is |
| 33 | # connected to enodebd based on time of last Inform message. By default, |
| 34 | # periodic inform interval is 30secs, so timeout should be larger than |
| 35 | # this. |
| 36 | # Also set timer longer than reboot time, so that an eNodeB reboot does not |
| 37 | # trigger a connection-timeout alarm. |
| 38 | ENB_CONNECTION_TIMEOUT = 600 # In seconds |
| 39 | |
| 40 | # If eNodeB is disconnected from MME for an unknown reason for this time, |
| 41 | # then reboot it. Set to a long time to ensure this doesn't interfere with |
| 42 | # other enodebd configuration processes - it is just a measure of last |
| 43 | # resort for an unlikely error case |
| 44 | MME_DISCONNECT_ENODEB_REBOOT_TIMER = 15 * 60 |
| 45 | |
| 46 | # Check the MME connection status every 15 seconds |
| 47 | MME_CHECK_TIMER = 15 |
| 48 | |
| 49 | def __init__( |
| 50 | self, |
| 51 | service: MagmaService, |
| 52 | use_param_key: bool, |
| 53 | ) -> None: |
| 54 | super().__init__(use_param_key=use_param_key) |
| 55 | self.state = None |
| 56 | self.timeout_handler = None |
| 57 | self.mme_timeout_handler = None |
| 58 | self.mme_timer = None |
| 59 | self._start_state_machine(service) |
Wei-Yu Chen | 5cbdfbb | 2021-12-02 01:10:21 +0800 | [diff] [blame] | 60 | |
Wei-Yu Chen | 49950b9 | 2021-11-08 19:19:18 +0800 | [diff] [blame] | 61 | |
| 62 | def get_state(self) -> str: |
| 63 | if self.state is None: |
| 64 | logger.warning('ACS State machine is not in any state.') |
| 65 | return 'N/A' |
| 66 | return self.state.state_description() |
| 67 | |
| 68 | def handle_tr069_message( |
| 69 | self, |
| 70 | message: Tr069ComplexModel, |
| 71 | ) -> Tr069ComplexModel: |
| 72 | """ |
| 73 | Accept the tr069 message from the eNB and produce a reply. |
| 74 | |
| 75 | States may transition after reading a message but BEFORE producing |
| 76 | a reply. Most steps in the provisioning process are represented as |
| 77 | beginning with enodebd sending a request to the eNB, and waiting for |
| 78 | the reply from the eNB. |
| 79 | """ |
| 80 | # TransferComplete messages come at random times, and we ignore them |
| 81 | if isinstance(message, models.TransferComplete): |
| 82 | return models.TransferCompleteResponse() |
| 83 | try: |
| 84 | self._read_tr069_msg(message) |
| 85 | return self._get_tr069_msg(message) |
| 86 | except Exception: # pylint: disable=broad-except |
| 87 | logger.error('Failed to handle tr069 message') |
| 88 | logger.error(traceback.format_exc()) |
| 89 | self._dump_debug_info() |
| 90 | self.transition(self.unexpected_fault_state_name) |
| 91 | return self._get_tr069_msg(message) |
| 92 | |
| 93 | def transition(self, next_state: str) -> Any: |
| 94 | logger.debug('State transition to <%s>', next_state) |
| 95 | self.state.exit() |
| 96 | self.state = self.state_map[next_state] |
| 97 | self.state.enter() |
| 98 | |
| 99 | def stop_state_machine(self) -> None: |
| 100 | """ Clean up anything the state machine is tracking or doing """ |
| 101 | self.state.exit() |
| 102 | if self.timeout_handler is not None: |
| 103 | self.timeout_handler.cancel() |
| 104 | self.timeout_handler = None |
| 105 | if self.mme_timeout_handler is not None: |
| 106 | self.mme_timeout_handler.cancel() |
| 107 | self.mme_timeout_handler = None |
| 108 | self._service = None |
| 109 | self._desired_cfg = None |
| 110 | self._device_cfg = None |
| 111 | self._data_model = None |
| 112 | |
| 113 | self.mme_timer = None |
| 114 | |
| 115 | def _start_state_machine( |
| 116 | self, |
| 117 | service: MagmaService, |
| 118 | ): |
| 119 | self.service = service |
| 120 | self.data_model = self.data_model_class() |
| 121 | # The current known device config has few known parameters |
| 122 | # The desired configuration depends on what the current configuration |
| 123 | # is. This we don't know fully, yet. |
| 124 | self.device_cfg = EnodebConfiguration(self.data_model) |
| 125 | |
| 126 | self._init_state_map() |
| 127 | self.state = self.state_map[self.disconnected_state_name] |
| 128 | self.state.enter() |
| 129 | self._reset_timeout() |
| 130 | self._periodic_check_mme_connection() |
| 131 | |
| 132 | def _reset_state_machine( |
| 133 | self, |
| 134 | service: MagmaService, |
| 135 | ): |
| 136 | self.stop_state_machine() |
| 137 | self._start_state_machine(service) |
| 138 | |
| 139 | def _read_tr069_msg(self, message: Any) -> None: |
| 140 | """ Process incoming message and maybe transition state """ |
| 141 | self._reset_timeout() |
| 142 | msg_handled, next_state = self.state.read_msg(message) |
Wei-Yu Chen | 678f0a5 | 2021-12-21 13:50:52 +0800 | [diff] [blame] | 143 | logger.info("Received incoming message, transfer to new state: %s", next_state) |
Wei-Yu Chen | 49950b9 | 2021-11-08 19:19:18 +0800 | [diff] [blame] | 144 | if not msg_handled: |
| 145 | self._transition_for_unexpected_msg(message) |
| 146 | _msg_handled, next_state = self.state.read_msg(message) |
| 147 | if next_state is not None: |
| 148 | self.transition(next_state) |
| 149 | |
| 150 | def _get_tr069_msg(self, message: Any) -> Any: |
| 151 | """ Get a new message to send, and maybe transition state """ |
| 152 | msg_and_transition = self.state.get_msg(message) |
Wei-Yu Chen | 678f0a5 | 2021-12-21 13:50:52 +0800 | [diff] [blame] | 153 | logger.debug("Sending a new message to eNodeB") |
Wei-Yu Chen | 49950b9 | 2021-11-08 19:19:18 +0800 | [diff] [blame] | 154 | if msg_and_transition.next_state: |
Wei-Yu Chen | 678f0a5 | 2021-12-21 13:50:52 +0800 | [diff] [blame] | 155 | logger.info("Transfer to new state: %s", msg_and_transition.next_state) |
Wei-Yu Chen | 49950b9 | 2021-11-08 19:19:18 +0800 | [diff] [blame] | 156 | self.transition(msg_and_transition.next_state) |
| 157 | msg = msg_and_transition.msg |
| 158 | return msg |
| 159 | |
| 160 | def _transition_for_unexpected_msg(self, message: Any) -> None: |
| 161 | """ |
| 162 | eNB devices may send an Inform message in the middle of a provisioning |
| 163 | session. To deal with this, transition to a state that expects an |
| 164 | Inform message, but also track the status of the eNB as not having |
| 165 | been disconnected. |
| 166 | """ |
| 167 | if isinstance(message, models.Inform): |
| 168 | logger.debug( |
| 169 | 'ACS in (%s) state. Received an Inform message', |
| 170 | self.state.state_description(), |
| 171 | ) |
| 172 | self._reset_state_machine(self.service) |
| 173 | elif isinstance(message, models.Fault): |
| 174 | logger.debug( |
| 175 | 'ACS in (%s) state. Received a Fault <%s>', |
| 176 | self.state.state_description(), message.FaultString, |
| 177 | ) |
| 178 | self.transition(self.unexpected_fault_state_name) |
| 179 | else: |
| 180 | raise ConfigurationError('Cannot handle unexpected TR069 msg') |
| 181 | |
| 182 | def _reset_timeout(self) -> None: |
| 183 | if self.timeout_handler is not None: |
| 184 | self.timeout_handler.cancel() |
| 185 | |
| 186 | def timed_out(): |
| 187 | self.transition(self.disconnected_state_name) |
| 188 | |
| 189 | self.timeout_handler = self.event_loop.call_later( |
| 190 | self.ENB_CONNECTION_TIMEOUT, |
| 191 | timed_out, |
| 192 | ) |
| 193 | |
| 194 | def _periodic_check_mme_connection(self) -> None: |
| 195 | self._check_mme_connection() |
| 196 | self.mme_timeout_handler = self.event_loop.call_later( |
| 197 | self.MME_CHECK_TIMER, |
| 198 | self._periodic_check_mme_connection, |
| 199 | ) |
| 200 | |
| 201 | def _check_mme_connection(self) -> None: |
| 202 | """ |
| 203 | Check if eNodeB should be connected to MME but isn't, and maybe reboot. |
| 204 | |
| 205 | If the eNB doesn't report connection to MME within a timeout period, |
| 206 | get it to reboot in the hope that it will fix things. |
| 207 | |
| 208 | Usually, enodebd polls the eNodeB for whether it is connected to MME. |
| 209 | This method checks the last polled MME connection status, and if |
| 210 | eNodeB should be connected to MME but it isn't. |
| 211 | """ |
| 212 | if self.device_cfg.has_parameter(ParameterName.MME_STATUS) and \ |
| 213 | self.device_cfg.get_parameter(ParameterName.MME_STATUS): |
| 214 | is_mme_connected = 1 |
| 215 | else: |
| 216 | is_mme_connected = 0 |
| 217 | |
| 218 | # True if we would expect MME to be connected, but it isn't |
| 219 | is_mme_unexpectedly_dc = \ |
| 220 | self.is_enodeb_connected() \ |
| 221 | and self.is_enodeb_configured() \ |
| 222 | and self.mconfig.allow_enodeb_transmit \ |
| 223 | and not is_mme_connected |
| 224 | |
| 225 | if is_mme_unexpectedly_dc: |
| 226 | logger.warning( |
| 227 | 'eNodeB is connected to AGw, is configured, ' |
| 228 | 'and has AdminState enabled for transmit. ' |
| 229 | 'MME connection to eNB is missing.', |
| 230 | ) |
| 231 | if self.mme_timer is None: |
| 232 | logger.warning( |
| 233 | 'eNodeB will be rebooted if MME connection ' |
| 234 | 'is not established in: %s seconds.', |
| 235 | self.MME_DISCONNECT_ENODEB_REBOOT_TIMER, |
| 236 | ) |
| 237 | metrics.STAT_ENODEB_REBOOT_TIMER_ACTIVE.set(1) |
| 238 | self.mme_timer = \ |
| 239 | StateMachineTimer(self.MME_DISCONNECT_ENODEB_REBOOT_TIMER) |
| 240 | elif self.mme_timer.is_done(): |
| 241 | logger.warning( |
| 242 | 'eNodeB has not established MME connection ' |
| 243 | 'within %s seconds - rebooting!', |
| 244 | self.MME_DISCONNECT_ENODEB_REBOOT_TIMER, |
| 245 | ) |
| 246 | metrics.STAT_ENODEB_REBOOTS.labels(cause='MME disconnect').inc() |
| 247 | metrics.STAT_ENODEB_REBOOT_TIMER_ACTIVE.set(0) |
| 248 | self.mme_timer = None |
| 249 | self.reboot_asap() |
| 250 | else: |
| 251 | # eNB is not connected to MME, but we're still waiting to see |
| 252 | # if it will connect within the timeout period. |
| 253 | # Take no action for now. |
| 254 | pass |
| 255 | else: |
| 256 | if self.mme_timer is not None: |
| 257 | logger.info('eNodeB has established MME connection.') |
| 258 | self.mme_timer = None |
| 259 | metrics.STAT_ENODEB_REBOOT_TIMER_ACTIVE.set(0) |
| 260 | |
| 261 | def _dump_debug_info(self) -> None: |
| 262 | if self.device_cfg is not None: |
| 263 | logger.error( |
| 264 | 'Device configuration: %s', |
| 265 | self.device_cfg.get_debug_info(), |
| 266 | ) |
| 267 | else: |
| 268 | logger.error('Device configuration: None') |
| 269 | if self.desired_cfg is not None: |
| 270 | logger.error( |
| 271 | 'Desired configuration: %s', |
| 272 | self.desired_cfg.get_debug_info(), |
| 273 | ) |
| 274 | else: |
| 275 | logger.error('Desired configuration: None') |
| 276 | |
| 277 | @abstractmethod |
| 278 | def _init_state_map(self) -> None: |
| 279 | pass |
| 280 | |
| 281 | @property |
| 282 | @abstractmethod |
| 283 | def state_map(self) -> Dict[str, EnodebAcsState]: |
| 284 | pass |
| 285 | |
| 286 | @property |
| 287 | @abstractmethod |
| 288 | def disconnected_state_name(self) -> str: |
| 289 | pass |
| 290 | |
| 291 | @property |
| 292 | @abstractmethod |
| 293 | def unexpected_fault_state_name(self) -> str: |
| 294 | """ State to handle unexpected Fault messages """ |
| 295 | pass |