blob: eab19de6f4e697c071b3c5d9ed958982df170a67 [file] [log] [blame]
Wei-Yu Chenad55cb82022-02-15 20:07:01 +08001# SPDX-FileCopyrightText: 2020 The Magma Authors.
2# SPDX-FileCopyrightText: 2022 Open Networking Foundation <support@opennetworking.org>
3#
4# SPDX-License-Identifier: BSD-3-Clause
Wei-Yu Chen49950b92021-11-08 19:19:18 +08005
Wei-Yu Chen49950b92021-11-08 19:19:18 +08006# pylint: disable=W0223
7
8import asyncio
9import logging
10import os
11import time
12from typing import List, Optional, Set, cast
13
14import systemd.daemon
15from common.job import Job
16
17
18class SDWatchdogTask(Job):
19 pass
20
21
22class SDWatchdog(object):
23 """
24 This is a task that utilizes systemd watchdog functionality.
25
26 SDWatchdog() task is started automatically in run in common/service.run(),
27 where it will look at every task in the loop to see if it is a subclass
28 of SDWatchdogTask
29
30 To enable systemd watchdog, add "WatchdogSec=60" in the [Service] section
31 of the systemd service file.
32 """
33
34 def __init__(
35 self,
36 tasks: Optional[List[SDWatchdogTask]],
37 update_status: bool = False, # update systemd status field
38 period: float = 30,
39 ) -> None:
40 """
41 coroutine that will check each task's time_last_completed_loop to
42 ensure that it was updated every in the last timeout_s seconds.
43
44 Perform check of each service every period seconds.
45 """
46
47 self.tasks = cast(Set[SDWatchdogTask], set())
48 self.update_status = update_status
49 self.period = period
50
51 if tasks:
52 for t in tasks:
53 if not issubclass(type(t), SDWatchdogTask):
54 logging.warning(
55 "'%s' is not a 'SDWatchdogTask', skipping", repr(t),
56 )
57 else:
58 self.tasks.add(t)
59
60 @staticmethod
61 def has_notify() -> bool:
62 return os.getenv("NOTIFY_SOCKET") is not None
63
64 async def run(self) -> None:
65 """
66 check tasks every self.period seconds to see if they have completed
67 a loop within the last 'timeout' seconds. If so, sd notify WATCHDOG=1
68 """
69 if not self.has_notify():
70 logging.warning("Missing 'NOTIFY_SOCKET' for SDWatchdog, skipping")
71 return
72 logging.info("Starting SDWatchdog...")
73 while True:
74 current_time = time.time()
75 anyStuck = False
76 for task in self.tasks:
77 if task.not_completed(current_time):
78 errmsg = "SDWatchdog service '%s' has not completed %s" % (
79 repr(task), time.asctime(time.gmtime(current_time)),
80 )
81 if self.update_status:
82 systemd.daemon.notify("STATUS=%s\n" % errmsg)
83 logging.info(errmsg)
84 anyStuck = True
85
86 if not anyStuck:
87 systemd.daemon.notify(
88 'STATUS=SDWatchdog success %s\n' %
89 time.asctime(time.gmtime(current_time)),
90 )
91 systemd.daemon.notify("WATCHDOG=1")
92 systemd.daemon.notify("READY=1") # only active if Type=notify
93
94 await asyncio.sleep(self.period)