Wei-Yu Chen | 49950b9 | 2021-11-08 19:19:18 +0800 | [diff] [blame^] | 1 | """ |
| 2 | Copyright 2020 The Magma Authors. |
| 3 | |
| 4 | This source code is licensed under the BSD-style license found in the |
| 5 | LICENSE file in the root directory of this source tree. |
| 6 | |
| 7 | Unless required by applicable law or agreed to in writing, software |
| 8 | distributed under the License is distributed on an "AS IS" BASIS, |
| 9 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 10 | See the License for the specific language governing permissions and |
| 11 | limitations under the License. |
| 12 | """ |
| 13 | |
| 14 | import traceback |
| 15 | from abc import abstractmethod |
| 16 | from typing import Any, Dict |
| 17 | |
| 18 | from common.service import MagmaService |
| 19 | import metrics |
| 20 | from data_models.data_model_parameters import ParameterName |
| 21 | from device_config.enodeb_configuration import EnodebConfiguration |
| 22 | from exceptions import ConfigurationError |
| 23 | from logger import EnodebdLogger as logger |
| 24 | from state_machines.enb_acs import EnodebAcsStateMachine |
| 25 | from state_machines.enb_acs_states import EnodebAcsState |
| 26 | from state_machines.timer import StateMachineTimer |
| 27 | from tr069 import models |
| 28 | from tr069.models import Tr069ComplexModel |
| 29 | |
| 30 | |
| 31 | class BasicEnodebAcsStateMachine(EnodebAcsStateMachine): |
| 32 | """ |
| 33 | Most of the EnodebAcsStateMachine classes for each device work about the |
| 34 | same way. Differences lie mainly in the data model, desired configuration, |
| 35 | and the state transition map. |
| 36 | |
| 37 | This class specifies the shared implementation between them. |
| 38 | """ |
| 39 | |
| 40 | # eNodeB connection timeout is used to determine whether or not eNodeB is |
| 41 | # connected to enodebd based on time of last Inform message. By default, |
| 42 | # periodic inform interval is 30secs, so timeout should be larger than |
| 43 | # this. |
| 44 | # Also set timer longer than reboot time, so that an eNodeB reboot does not |
| 45 | # trigger a connection-timeout alarm. |
| 46 | ENB_CONNECTION_TIMEOUT = 600 # In seconds |
| 47 | |
| 48 | # If eNodeB is disconnected from MME for an unknown reason for this time, |
| 49 | # then reboot it. Set to a long time to ensure this doesn't interfere with |
| 50 | # other enodebd configuration processes - it is just a measure of last |
| 51 | # resort for an unlikely error case |
| 52 | MME_DISCONNECT_ENODEB_REBOOT_TIMER = 15 * 60 |
| 53 | |
| 54 | # Check the MME connection status every 15 seconds |
| 55 | MME_CHECK_TIMER = 15 |
| 56 | |
| 57 | def __init__( |
| 58 | self, |
| 59 | service: MagmaService, |
| 60 | use_param_key: bool, |
| 61 | ) -> None: |
| 62 | super().__init__(use_param_key=use_param_key) |
| 63 | self.state = None |
| 64 | self.timeout_handler = None |
| 65 | self.mme_timeout_handler = None |
| 66 | self.mme_timer = None |
| 67 | self._start_state_machine(service) |
| 68 | |
| 69 | def get_state(self) -> str: |
| 70 | if self.state is None: |
| 71 | logger.warning('ACS State machine is not in any state.') |
| 72 | return 'N/A' |
| 73 | return self.state.state_description() |
| 74 | |
| 75 | def handle_tr069_message( |
| 76 | self, |
| 77 | message: Tr069ComplexModel, |
| 78 | ) -> Tr069ComplexModel: |
| 79 | """ |
| 80 | Accept the tr069 message from the eNB and produce a reply. |
| 81 | |
| 82 | States may transition after reading a message but BEFORE producing |
| 83 | a reply. Most steps in the provisioning process are represented as |
| 84 | beginning with enodebd sending a request to the eNB, and waiting for |
| 85 | the reply from the eNB. |
| 86 | """ |
| 87 | # TransferComplete messages come at random times, and we ignore them |
| 88 | if isinstance(message, models.TransferComplete): |
| 89 | return models.TransferCompleteResponse() |
| 90 | try: |
| 91 | self._read_tr069_msg(message) |
| 92 | return self._get_tr069_msg(message) |
| 93 | except Exception: # pylint: disable=broad-except |
| 94 | logger.error('Failed to handle tr069 message') |
| 95 | logger.error(traceback.format_exc()) |
| 96 | self._dump_debug_info() |
| 97 | self.transition(self.unexpected_fault_state_name) |
| 98 | return self._get_tr069_msg(message) |
| 99 | |
| 100 | def transition(self, next_state: str) -> Any: |
| 101 | logger.debug('State transition to <%s>', next_state) |
| 102 | self.state.exit() |
| 103 | self.state = self.state_map[next_state] |
| 104 | self.state.enter() |
| 105 | |
| 106 | def stop_state_machine(self) -> None: |
| 107 | """ Clean up anything the state machine is tracking or doing """ |
| 108 | self.state.exit() |
| 109 | if self.timeout_handler is not None: |
| 110 | self.timeout_handler.cancel() |
| 111 | self.timeout_handler = None |
| 112 | if self.mme_timeout_handler is not None: |
| 113 | self.mme_timeout_handler.cancel() |
| 114 | self.mme_timeout_handler = None |
| 115 | self._service = None |
| 116 | self._desired_cfg = None |
| 117 | self._device_cfg = None |
| 118 | self._data_model = None |
| 119 | |
| 120 | self.mme_timer = None |
| 121 | |
| 122 | def _start_state_machine( |
| 123 | self, |
| 124 | service: MagmaService, |
| 125 | ): |
| 126 | self.service = service |
| 127 | self.data_model = self.data_model_class() |
| 128 | # The current known device config has few known parameters |
| 129 | # The desired configuration depends on what the current configuration |
| 130 | # is. This we don't know fully, yet. |
| 131 | self.device_cfg = EnodebConfiguration(self.data_model) |
| 132 | |
| 133 | self._init_state_map() |
| 134 | self.state = self.state_map[self.disconnected_state_name] |
| 135 | self.state.enter() |
| 136 | self._reset_timeout() |
| 137 | self._periodic_check_mme_connection() |
| 138 | |
| 139 | def _reset_state_machine( |
| 140 | self, |
| 141 | service: MagmaService, |
| 142 | ): |
| 143 | self.stop_state_machine() |
| 144 | self._start_state_machine(service) |
| 145 | |
| 146 | def _read_tr069_msg(self, message: Any) -> None: |
| 147 | """ Process incoming message and maybe transition state """ |
| 148 | self._reset_timeout() |
| 149 | msg_handled, next_state = self.state.read_msg(message) |
| 150 | if not msg_handled: |
| 151 | self._transition_for_unexpected_msg(message) |
| 152 | _msg_handled, next_state = self.state.read_msg(message) |
| 153 | if next_state is not None: |
| 154 | self.transition(next_state) |
| 155 | |
| 156 | def _get_tr069_msg(self, message: Any) -> Any: |
| 157 | """ Get a new message to send, and maybe transition state """ |
| 158 | msg_and_transition = self.state.get_msg(message) |
| 159 | if msg_and_transition.next_state: |
| 160 | self.transition(msg_and_transition.next_state) |
| 161 | msg = msg_and_transition.msg |
| 162 | return msg |
| 163 | |
| 164 | def _transition_for_unexpected_msg(self, message: Any) -> None: |
| 165 | """ |
| 166 | eNB devices may send an Inform message in the middle of a provisioning |
| 167 | session. To deal with this, transition to a state that expects an |
| 168 | Inform message, but also track the status of the eNB as not having |
| 169 | been disconnected. |
| 170 | """ |
| 171 | if isinstance(message, models.Inform): |
| 172 | logger.debug( |
| 173 | 'ACS in (%s) state. Received an Inform message', |
| 174 | self.state.state_description(), |
| 175 | ) |
| 176 | self._reset_state_machine(self.service) |
| 177 | elif isinstance(message, models.Fault): |
| 178 | logger.debug( |
| 179 | 'ACS in (%s) state. Received a Fault <%s>', |
| 180 | self.state.state_description(), message.FaultString, |
| 181 | ) |
| 182 | self.transition(self.unexpected_fault_state_name) |
| 183 | else: |
| 184 | raise ConfigurationError('Cannot handle unexpected TR069 msg') |
| 185 | |
| 186 | def _reset_timeout(self) -> None: |
| 187 | if self.timeout_handler is not None: |
| 188 | self.timeout_handler.cancel() |
| 189 | |
| 190 | def timed_out(): |
| 191 | self.transition(self.disconnected_state_name) |
| 192 | |
| 193 | self.timeout_handler = self.event_loop.call_later( |
| 194 | self.ENB_CONNECTION_TIMEOUT, |
| 195 | timed_out, |
| 196 | ) |
| 197 | |
| 198 | def _periodic_check_mme_connection(self) -> None: |
| 199 | self._check_mme_connection() |
| 200 | self.mme_timeout_handler = self.event_loop.call_later( |
| 201 | self.MME_CHECK_TIMER, |
| 202 | self._periodic_check_mme_connection, |
| 203 | ) |
| 204 | |
| 205 | def _check_mme_connection(self) -> None: |
| 206 | """ |
| 207 | Check if eNodeB should be connected to MME but isn't, and maybe reboot. |
| 208 | |
| 209 | If the eNB doesn't report connection to MME within a timeout period, |
| 210 | get it to reboot in the hope that it will fix things. |
| 211 | |
| 212 | Usually, enodebd polls the eNodeB for whether it is connected to MME. |
| 213 | This method checks the last polled MME connection status, and if |
| 214 | eNodeB should be connected to MME but it isn't. |
| 215 | """ |
| 216 | if self.device_cfg.has_parameter(ParameterName.MME_STATUS) and \ |
| 217 | self.device_cfg.get_parameter(ParameterName.MME_STATUS): |
| 218 | is_mme_connected = 1 |
| 219 | else: |
| 220 | is_mme_connected = 0 |
| 221 | |
| 222 | # True if we would expect MME to be connected, but it isn't |
| 223 | is_mme_unexpectedly_dc = \ |
| 224 | self.is_enodeb_connected() \ |
| 225 | and self.is_enodeb_configured() \ |
| 226 | and self.mconfig.allow_enodeb_transmit \ |
| 227 | and not is_mme_connected |
| 228 | |
| 229 | if is_mme_unexpectedly_dc: |
| 230 | logger.warning( |
| 231 | 'eNodeB is connected to AGw, is configured, ' |
| 232 | 'and has AdminState enabled for transmit. ' |
| 233 | 'MME connection to eNB is missing.', |
| 234 | ) |
| 235 | if self.mme_timer is None: |
| 236 | logger.warning( |
| 237 | 'eNodeB will be rebooted if MME connection ' |
| 238 | 'is not established in: %s seconds.', |
| 239 | self.MME_DISCONNECT_ENODEB_REBOOT_TIMER, |
| 240 | ) |
| 241 | metrics.STAT_ENODEB_REBOOT_TIMER_ACTIVE.set(1) |
| 242 | self.mme_timer = \ |
| 243 | StateMachineTimer(self.MME_DISCONNECT_ENODEB_REBOOT_TIMER) |
| 244 | elif self.mme_timer.is_done(): |
| 245 | logger.warning( |
| 246 | 'eNodeB has not established MME connection ' |
| 247 | 'within %s seconds - rebooting!', |
| 248 | self.MME_DISCONNECT_ENODEB_REBOOT_TIMER, |
| 249 | ) |
| 250 | metrics.STAT_ENODEB_REBOOTS.labels(cause='MME disconnect').inc() |
| 251 | metrics.STAT_ENODEB_REBOOT_TIMER_ACTIVE.set(0) |
| 252 | self.mme_timer = None |
| 253 | self.reboot_asap() |
| 254 | else: |
| 255 | # eNB is not connected to MME, but we're still waiting to see |
| 256 | # if it will connect within the timeout period. |
| 257 | # Take no action for now. |
| 258 | pass |
| 259 | else: |
| 260 | if self.mme_timer is not None: |
| 261 | logger.info('eNodeB has established MME connection.') |
| 262 | self.mme_timer = None |
| 263 | metrics.STAT_ENODEB_REBOOT_TIMER_ACTIVE.set(0) |
| 264 | |
| 265 | def _dump_debug_info(self) -> None: |
| 266 | if self.device_cfg is not None: |
| 267 | logger.error( |
| 268 | 'Device configuration: %s', |
| 269 | self.device_cfg.get_debug_info(), |
| 270 | ) |
| 271 | else: |
| 272 | logger.error('Device configuration: None') |
| 273 | if self.desired_cfg is not None: |
| 274 | logger.error( |
| 275 | 'Desired configuration: %s', |
| 276 | self.desired_cfg.get_debug_info(), |
| 277 | ) |
| 278 | else: |
| 279 | logger.error('Desired configuration: None') |
| 280 | |
| 281 | @abstractmethod |
| 282 | def _init_state_map(self) -> None: |
| 283 | pass |
| 284 | |
| 285 | @property |
| 286 | @abstractmethod |
| 287 | def state_map(self) -> Dict[str, EnodebAcsState]: |
| 288 | pass |
| 289 | |
| 290 | @property |
| 291 | @abstractmethod |
| 292 | def disconnected_state_name(self) -> str: |
| 293 | pass |
| 294 | |
| 295 | @property |
| 296 | @abstractmethod |
| 297 | def unexpected_fault_state_name(self) -> str: |
| 298 | """ State to handle unexpected Fault messages """ |
| 299 | pass |