Wei-Yu Chen | 49950b9 | 2021-11-08 19:19:18 +0800 | [diff] [blame] | 1 | """ |
| 2 | Copyright 2020 The Magma Authors. |
| 3 | |
| 4 | This source code is licensed under the BSD-style license found in the |
| 5 | LICENSE file in the root directory of this source tree. |
| 6 | |
| 7 | Unless required by applicable law or agreed to in writing, software |
| 8 | distributed under the License is distributed on an "AS IS" BASIS, |
| 9 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 10 | See the License for the specific language governing permissions and |
| 11 | limitations under the License. |
| 12 | """ |
| 13 | |
| 14 | import traceback |
| 15 | from abc import abstractmethod |
| 16 | from typing import Any, Dict |
| 17 | |
| 18 | from common.service import MagmaService |
| 19 | import metrics |
| 20 | from data_models.data_model_parameters import ParameterName |
| 21 | from device_config.enodeb_configuration import EnodebConfiguration |
| 22 | from exceptions import ConfigurationError |
| 23 | from logger import EnodebdLogger as logger |
| 24 | from state_machines.enb_acs import EnodebAcsStateMachine |
| 25 | from state_machines.enb_acs_states import EnodebAcsState |
| 26 | from state_machines.timer import StateMachineTimer |
| 27 | from tr069 import models |
| 28 | from tr069.models import Tr069ComplexModel |
| 29 | |
| 30 | |
| 31 | class BasicEnodebAcsStateMachine(EnodebAcsStateMachine): |
| 32 | """ |
| 33 | Most of the EnodebAcsStateMachine classes for each device work about the |
| 34 | same way. Differences lie mainly in the data model, desired configuration, |
| 35 | and the state transition map. |
| 36 | |
| 37 | This class specifies the shared implementation between them. |
| 38 | """ |
| 39 | |
| 40 | # eNodeB connection timeout is used to determine whether or not eNodeB is |
| 41 | # connected to enodebd based on time of last Inform message. By default, |
| 42 | # periodic inform interval is 30secs, so timeout should be larger than |
| 43 | # this. |
| 44 | # Also set timer longer than reboot time, so that an eNodeB reboot does not |
| 45 | # trigger a connection-timeout alarm. |
| 46 | ENB_CONNECTION_TIMEOUT = 600 # In seconds |
| 47 | |
| 48 | # If eNodeB is disconnected from MME for an unknown reason for this time, |
| 49 | # then reboot it. Set to a long time to ensure this doesn't interfere with |
| 50 | # other enodebd configuration processes - it is just a measure of last |
| 51 | # resort for an unlikely error case |
| 52 | MME_DISCONNECT_ENODEB_REBOOT_TIMER = 15 * 60 |
| 53 | |
| 54 | # Check the MME connection status every 15 seconds |
| 55 | MME_CHECK_TIMER = 15 |
| 56 | |
| 57 | def __init__( |
| 58 | self, |
| 59 | service: MagmaService, |
| 60 | use_param_key: bool, |
| 61 | ) -> None: |
| 62 | super().__init__(use_param_key=use_param_key) |
| 63 | self.state = None |
| 64 | self.timeout_handler = None |
| 65 | self.mme_timeout_handler = None |
| 66 | self.mme_timer = None |
| 67 | self._start_state_machine(service) |
Wei-Yu Chen | 5cbdfbb | 2021-12-02 01:10:21 +0800 | [diff] [blame] | 68 | |
Wei-Yu Chen | 49950b9 | 2021-11-08 19:19:18 +0800 | [diff] [blame] | 69 | |
| 70 | def get_state(self) -> str: |
| 71 | if self.state is None: |
| 72 | logger.warning('ACS State machine is not in any state.') |
| 73 | return 'N/A' |
| 74 | return self.state.state_description() |
| 75 | |
| 76 | def handle_tr069_message( |
| 77 | self, |
| 78 | message: Tr069ComplexModel, |
| 79 | ) -> Tr069ComplexModel: |
| 80 | """ |
| 81 | Accept the tr069 message from the eNB and produce a reply. |
| 82 | |
| 83 | States may transition after reading a message but BEFORE producing |
| 84 | a reply. Most steps in the provisioning process are represented as |
| 85 | beginning with enodebd sending a request to the eNB, and waiting for |
| 86 | the reply from the eNB. |
| 87 | """ |
| 88 | # TransferComplete messages come at random times, and we ignore them |
| 89 | if isinstance(message, models.TransferComplete): |
| 90 | return models.TransferCompleteResponse() |
| 91 | try: |
| 92 | self._read_tr069_msg(message) |
| 93 | return self._get_tr069_msg(message) |
| 94 | except Exception: # pylint: disable=broad-except |
| 95 | logger.error('Failed to handle tr069 message') |
| 96 | logger.error(traceback.format_exc()) |
| 97 | self._dump_debug_info() |
| 98 | self.transition(self.unexpected_fault_state_name) |
| 99 | return self._get_tr069_msg(message) |
| 100 | |
| 101 | def transition(self, next_state: str) -> Any: |
| 102 | logger.debug('State transition to <%s>', next_state) |
| 103 | self.state.exit() |
| 104 | self.state = self.state_map[next_state] |
| 105 | self.state.enter() |
| 106 | |
| 107 | def stop_state_machine(self) -> None: |
| 108 | """ Clean up anything the state machine is tracking or doing """ |
| 109 | self.state.exit() |
| 110 | if self.timeout_handler is not None: |
| 111 | self.timeout_handler.cancel() |
| 112 | self.timeout_handler = None |
| 113 | if self.mme_timeout_handler is not None: |
| 114 | self.mme_timeout_handler.cancel() |
| 115 | self.mme_timeout_handler = None |
| 116 | self._service = None |
| 117 | self._desired_cfg = None |
| 118 | self._device_cfg = None |
| 119 | self._data_model = None |
| 120 | |
| 121 | self.mme_timer = None |
| 122 | |
| 123 | def _start_state_machine( |
| 124 | self, |
| 125 | service: MagmaService, |
| 126 | ): |
| 127 | self.service = service |
| 128 | self.data_model = self.data_model_class() |
| 129 | # The current known device config has few known parameters |
| 130 | # The desired configuration depends on what the current configuration |
| 131 | # is. This we don't know fully, yet. |
| 132 | self.device_cfg = EnodebConfiguration(self.data_model) |
| 133 | |
| 134 | self._init_state_map() |
| 135 | self.state = self.state_map[self.disconnected_state_name] |
| 136 | self.state.enter() |
| 137 | self._reset_timeout() |
| 138 | self._periodic_check_mme_connection() |
| 139 | |
| 140 | def _reset_state_machine( |
| 141 | self, |
| 142 | service: MagmaService, |
| 143 | ): |
| 144 | self.stop_state_machine() |
| 145 | self._start_state_machine(service) |
| 146 | |
| 147 | def _read_tr069_msg(self, message: Any) -> None: |
| 148 | """ Process incoming message and maybe transition state """ |
| 149 | self._reset_timeout() |
| 150 | msg_handled, next_state = self.state.read_msg(message) |
Wei-Yu Chen | 678f0a5 | 2021-12-21 13:50:52 +0800 | [diff] [blame^] | 151 | logger.info("Received incoming message, transfer to new state: %s", next_state) |
Wei-Yu Chen | 49950b9 | 2021-11-08 19:19:18 +0800 | [diff] [blame] | 152 | if not msg_handled: |
| 153 | self._transition_for_unexpected_msg(message) |
| 154 | _msg_handled, next_state = self.state.read_msg(message) |
| 155 | if next_state is not None: |
| 156 | self.transition(next_state) |
| 157 | |
| 158 | def _get_tr069_msg(self, message: Any) -> Any: |
| 159 | """ Get a new message to send, and maybe transition state """ |
| 160 | msg_and_transition = self.state.get_msg(message) |
Wei-Yu Chen | 678f0a5 | 2021-12-21 13:50:52 +0800 | [diff] [blame^] | 161 | logger.debug("Sending a new message to eNodeB") |
Wei-Yu Chen | 49950b9 | 2021-11-08 19:19:18 +0800 | [diff] [blame] | 162 | if msg_and_transition.next_state: |
Wei-Yu Chen | 678f0a5 | 2021-12-21 13:50:52 +0800 | [diff] [blame^] | 163 | logger.info("Transfer to new state: %s", msg_and_transition.next_state) |
Wei-Yu Chen | 49950b9 | 2021-11-08 19:19:18 +0800 | [diff] [blame] | 164 | self.transition(msg_and_transition.next_state) |
| 165 | msg = msg_and_transition.msg |
| 166 | return msg |
| 167 | |
| 168 | def _transition_for_unexpected_msg(self, message: Any) -> None: |
| 169 | """ |
| 170 | eNB devices may send an Inform message in the middle of a provisioning |
| 171 | session. To deal with this, transition to a state that expects an |
| 172 | Inform message, but also track the status of the eNB as not having |
| 173 | been disconnected. |
| 174 | """ |
| 175 | if isinstance(message, models.Inform): |
| 176 | logger.debug( |
| 177 | 'ACS in (%s) state. Received an Inform message', |
| 178 | self.state.state_description(), |
| 179 | ) |
| 180 | self._reset_state_machine(self.service) |
| 181 | elif isinstance(message, models.Fault): |
| 182 | logger.debug( |
| 183 | 'ACS in (%s) state. Received a Fault <%s>', |
| 184 | self.state.state_description(), message.FaultString, |
| 185 | ) |
| 186 | self.transition(self.unexpected_fault_state_name) |
| 187 | else: |
| 188 | raise ConfigurationError('Cannot handle unexpected TR069 msg') |
| 189 | |
| 190 | def _reset_timeout(self) -> None: |
| 191 | if self.timeout_handler is not None: |
| 192 | self.timeout_handler.cancel() |
| 193 | |
| 194 | def timed_out(): |
| 195 | self.transition(self.disconnected_state_name) |
| 196 | |
| 197 | self.timeout_handler = self.event_loop.call_later( |
| 198 | self.ENB_CONNECTION_TIMEOUT, |
| 199 | timed_out, |
| 200 | ) |
| 201 | |
| 202 | def _periodic_check_mme_connection(self) -> None: |
| 203 | self._check_mme_connection() |
| 204 | self.mme_timeout_handler = self.event_loop.call_later( |
| 205 | self.MME_CHECK_TIMER, |
| 206 | self._periodic_check_mme_connection, |
| 207 | ) |
| 208 | |
| 209 | def _check_mme_connection(self) -> None: |
| 210 | """ |
| 211 | Check if eNodeB should be connected to MME but isn't, and maybe reboot. |
| 212 | |
| 213 | If the eNB doesn't report connection to MME within a timeout period, |
| 214 | get it to reboot in the hope that it will fix things. |
| 215 | |
| 216 | Usually, enodebd polls the eNodeB for whether it is connected to MME. |
| 217 | This method checks the last polled MME connection status, and if |
| 218 | eNodeB should be connected to MME but it isn't. |
| 219 | """ |
| 220 | if self.device_cfg.has_parameter(ParameterName.MME_STATUS) and \ |
| 221 | self.device_cfg.get_parameter(ParameterName.MME_STATUS): |
| 222 | is_mme_connected = 1 |
| 223 | else: |
| 224 | is_mme_connected = 0 |
| 225 | |
| 226 | # True if we would expect MME to be connected, but it isn't |
| 227 | is_mme_unexpectedly_dc = \ |
| 228 | self.is_enodeb_connected() \ |
| 229 | and self.is_enodeb_configured() \ |
| 230 | and self.mconfig.allow_enodeb_transmit \ |
| 231 | and not is_mme_connected |
| 232 | |
| 233 | if is_mme_unexpectedly_dc: |
| 234 | logger.warning( |
| 235 | 'eNodeB is connected to AGw, is configured, ' |
| 236 | 'and has AdminState enabled for transmit. ' |
| 237 | 'MME connection to eNB is missing.', |
| 238 | ) |
| 239 | if self.mme_timer is None: |
| 240 | logger.warning( |
| 241 | 'eNodeB will be rebooted if MME connection ' |
| 242 | 'is not established in: %s seconds.', |
| 243 | self.MME_DISCONNECT_ENODEB_REBOOT_TIMER, |
| 244 | ) |
| 245 | metrics.STAT_ENODEB_REBOOT_TIMER_ACTIVE.set(1) |
| 246 | self.mme_timer = \ |
| 247 | StateMachineTimer(self.MME_DISCONNECT_ENODEB_REBOOT_TIMER) |
| 248 | elif self.mme_timer.is_done(): |
| 249 | logger.warning( |
| 250 | 'eNodeB has not established MME connection ' |
| 251 | 'within %s seconds - rebooting!', |
| 252 | self.MME_DISCONNECT_ENODEB_REBOOT_TIMER, |
| 253 | ) |
| 254 | metrics.STAT_ENODEB_REBOOTS.labels(cause='MME disconnect').inc() |
| 255 | metrics.STAT_ENODEB_REBOOT_TIMER_ACTIVE.set(0) |
| 256 | self.mme_timer = None |
| 257 | self.reboot_asap() |
| 258 | else: |
| 259 | # eNB is not connected to MME, but we're still waiting to see |
| 260 | # if it will connect within the timeout period. |
| 261 | # Take no action for now. |
| 262 | pass |
| 263 | else: |
| 264 | if self.mme_timer is not None: |
| 265 | logger.info('eNodeB has established MME connection.') |
| 266 | self.mme_timer = None |
| 267 | metrics.STAT_ENODEB_REBOOT_TIMER_ACTIVE.set(0) |
| 268 | |
| 269 | def _dump_debug_info(self) -> None: |
| 270 | if self.device_cfg is not None: |
| 271 | logger.error( |
| 272 | 'Device configuration: %s', |
| 273 | self.device_cfg.get_debug_info(), |
| 274 | ) |
| 275 | else: |
| 276 | logger.error('Device configuration: None') |
| 277 | if self.desired_cfg is not None: |
| 278 | logger.error( |
| 279 | 'Desired configuration: %s', |
| 280 | self.desired_cfg.get_debug_info(), |
| 281 | ) |
| 282 | else: |
| 283 | logger.error('Desired configuration: None') |
| 284 | |
| 285 | @abstractmethod |
| 286 | def _init_state_map(self) -> None: |
| 287 | pass |
| 288 | |
| 289 | @property |
| 290 | @abstractmethod |
| 291 | def state_map(self) -> Dict[str, EnodebAcsState]: |
| 292 | pass |
| 293 | |
| 294 | @property |
| 295 | @abstractmethod |
| 296 | def disconnected_state_name(self) -> str: |
| 297 | pass |
| 298 | |
| 299 | @property |
| 300 | @abstractmethod |
| 301 | def unexpected_fault_state_name(self) -> str: |
| 302 | """ State to handle unexpected Fault messages """ |
| 303 | pass |