Wei-Yu Chen | ad55cb8 | 2022-02-15 20:07:01 +0800 | [diff] [blame] | 1 | # SPDX-FileCopyrightText: 2020 The Magma Authors. |
| 2 | # SPDX-FileCopyrightText: 2022 Open Networking Foundation <support@opennetworking.org> |
| 3 | # |
| 4 | # SPDX-License-Identifier: BSD-3-Clause |
Wei-Yu Chen | 49950b9 | 2021-11-08 19:19:18 +0800 | [diff] [blame] | 5 | |
Wei-Yu Chen | 49950b9 | 2021-11-08 19:19:18 +0800 | [diff] [blame] | 6 | # pylint: disable=W0223 |
| 7 | |
| 8 | import asyncio |
| 9 | import logging |
| 10 | import os |
| 11 | import time |
| 12 | from typing import List, Optional, Set, cast |
| 13 | |
| 14 | import systemd.daemon |
| 15 | from common.job import Job |
| 16 | |
| 17 | |
| 18 | class SDWatchdogTask(Job): |
| 19 | pass |
| 20 | |
| 21 | |
| 22 | class SDWatchdog(object): |
| 23 | """ |
| 24 | This is a task that utilizes systemd watchdog functionality. |
| 25 | |
| 26 | SDWatchdog() task is started automatically in run in common/service.run(), |
| 27 | where it will look at every task in the loop to see if it is a subclass |
| 28 | of SDWatchdogTask |
| 29 | |
| 30 | To enable systemd watchdog, add "WatchdogSec=60" in the [Service] section |
| 31 | of the systemd service file. |
| 32 | """ |
| 33 | |
| 34 | def __init__( |
| 35 | self, |
| 36 | tasks: Optional[List[SDWatchdogTask]], |
| 37 | update_status: bool = False, # update systemd status field |
| 38 | period: float = 30, |
| 39 | ) -> None: |
| 40 | """ |
| 41 | coroutine that will check each task's time_last_completed_loop to |
| 42 | ensure that it was updated every in the last timeout_s seconds. |
| 43 | |
| 44 | Perform check of each service every period seconds. |
| 45 | """ |
| 46 | |
| 47 | self.tasks = cast(Set[SDWatchdogTask], set()) |
| 48 | self.update_status = update_status |
| 49 | self.period = period |
| 50 | |
| 51 | if tasks: |
| 52 | for t in tasks: |
| 53 | if not issubclass(type(t), SDWatchdogTask): |
| 54 | logging.warning( |
| 55 | "'%s' is not a 'SDWatchdogTask', skipping", repr(t), |
| 56 | ) |
| 57 | else: |
| 58 | self.tasks.add(t) |
| 59 | |
| 60 | @staticmethod |
| 61 | def has_notify() -> bool: |
| 62 | return os.getenv("NOTIFY_SOCKET") is not None |
| 63 | |
| 64 | async def run(self) -> None: |
| 65 | """ |
| 66 | check tasks every self.period seconds to see if they have completed |
| 67 | a loop within the last 'timeout' seconds. If so, sd notify WATCHDOG=1 |
| 68 | """ |
| 69 | if not self.has_notify(): |
| 70 | logging.warning("Missing 'NOTIFY_SOCKET' for SDWatchdog, skipping") |
| 71 | return |
| 72 | logging.info("Starting SDWatchdog...") |
| 73 | while True: |
| 74 | current_time = time.time() |
| 75 | anyStuck = False |
| 76 | for task in self.tasks: |
| 77 | if task.not_completed(current_time): |
| 78 | errmsg = "SDWatchdog service '%s' has not completed %s" % ( |
| 79 | repr(task), time.asctime(time.gmtime(current_time)), |
| 80 | ) |
| 81 | if self.update_status: |
| 82 | systemd.daemon.notify("STATUS=%s\n" % errmsg) |
| 83 | logging.info(errmsg) |
| 84 | anyStuck = True |
| 85 | |
| 86 | if not anyStuck: |
| 87 | systemd.daemon.notify( |
| 88 | 'STATUS=SDWatchdog success %s\n' % |
| 89 | time.asctime(time.gmtime(current_time)), |
| 90 | ) |
| 91 | systemd.daemon.notify("WATCHDOG=1") |
| 92 | systemd.daemon.notify("READY=1") # only active if Type=notify |
| 93 | |
| 94 | await asyncio.sleep(self.period) |