Wei-Yu Chen | 49950b9 | 2021-11-08 19:19:18 +0800 | [diff] [blame] | 1 | """ |
| 2 | Copyright 2020 The Magma Authors. |
| 3 | |
| 4 | This source code is licensed under the BSD-style license found in the |
| 5 | LICENSE file in the root directory of this source tree. |
| 6 | |
| 7 | Unless required by applicable law or agreed to in writing, software |
| 8 | distributed under the License is distributed on an "AS IS" BASIS, |
| 9 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 10 | See the License for the specific language governing permissions and |
| 11 | limitations under the License. |
| 12 | """ |
| 13 | # pylint: disable=W0223 |
| 14 | |
| 15 | import asyncio |
| 16 | import logging |
| 17 | import os |
| 18 | import time |
| 19 | from typing import List, Optional, Set, cast |
| 20 | |
| 21 | import systemd.daemon |
| 22 | from common.job import Job |
| 23 | |
| 24 | |
| 25 | class SDWatchdogTask(Job): |
| 26 | pass |
| 27 | |
| 28 | |
| 29 | class SDWatchdog(object): |
| 30 | """ |
| 31 | This is a task that utilizes systemd watchdog functionality. |
| 32 | |
| 33 | SDWatchdog() task is started automatically in run in common/service.run(), |
| 34 | where it will look at every task in the loop to see if it is a subclass |
| 35 | of SDWatchdogTask |
| 36 | |
| 37 | To enable systemd watchdog, add "WatchdogSec=60" in the [Service] section |
| 38 | of the systemd service file. |
| 39 | """ |
| 40 | |
| 41 | def __init__( |
| 42 | self, |
| 43 | tasks: Optional[List[SDWatchdogTask]], |
| 44 | update_status: bool = False, # update systemd status field |
| 45 | period: float = 30, |
| 46 | ) -> None: |
| 47 | """ |
| 48 | coroutine that will check each task's time_last_completed_loop to |
| 49 | ensure that it was updated every in the last timeout_s seconds. |
| 50 | |
| 51 | Perform check of each service every period seconds. |
| 52 | """ |
| 53 | |
| 54 | self.tasks = cast(Set[SDWatchdogTask], set()) |
| 55 | self.update_status = update_status |
| 56 | self.period = period |
| 57 | |
| 58 | if tasks: |
| 59 | for t in tasks: |
| 60 | if not issubclass(type(t), SDWatchdogTask): |
| 61 | logging.warning( |
| 62 | "'%s' is not a 'SDWatchdogTask', skipping", repr(t), |
| 63 | ) |
| 64 | else: |
| 65 | self.tasks.add(t) |
| 66 | |
| 67 | @staticmethod |
| 68 | def has_notify() -> bool: |
| 69 | return os.getenv("NOTIFY_SOCKET") is not None |
| 70 | |
| 71 | async def run(self) -> None: |
| 72 | """ |
| 73 | check tasks every self.period seconds to see if they have completed |
| 74 | a loop within the last 'timeout' seconds. If so, sd notify WATCHDOG=1 |
| 75 | """ |
| 76 | if not self.has_notify(): |
| 77 | logging.warning("Missing 'NOTIFY_SOCKET' for SDWatchdog, skipping") |
| 78 | return |
| 79 | logging.info("Starting SDWatchdog...") |
| 80 | while True: |
| 81 | current_time = time.time() |
| 82 | anyStuck = False |
| 83 | for task in self.tasks: |
| 84 | if task.not_completed(current_time): |
| 85 | errmsg = "SDWatchdog service '%s' has not completed %s" % ( |
| 86 | repr(task), time.asctime(time.gmtime(current_time)), |
| 87 | ) |
| 88 | if self.update_status: |
| 89 | systemd.daemon.notify("STATUS=%s\n" % errmsg) |
| 90 | logging.info(errmsg) |
| 91 | anyStuck = True |
| 92 | |
| 93 | if not anyStuck: |
| 94 | systemd.daemon.notify( |
| 95 | 'STATUS=SDWatchdog success %s\n' % |
| 96 | time.asctime(time.gmtime(current_time)), |
| 97 | ) |
| 98 | systemd.daemon.notify("WATCHDOG=1") |
| 99 | systemd.daemon.notify("READY=1") # only active if Type=notify |
| 100 | |
| 101 | await asyncio.sleep(self.period) |