blob: 2eb4e52a032dc3cd4bda32f378dece7687577619 [file] [log] [blame]
Wei-Yu Chen49950b92021-11-08 19:19:18 +08001"""
2Copyright 2020 The Magma Authors.
3
4This source code is licensed under the BSD-style license found in the
5LICENSE file in the root directory of this source tree.
6
7Unless required by applicable law or agreed to in writing, software
8distributed under the License is distributed on an "AS IS" BASIS,
9WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10See the License for the specific language governing permissions and
11limitations under the License.
12"""
13# pylint: disable=W0223
14
15import asyncio
16import logging
17import os
18import time
19from typing import List, Optional, Set, cast
20
21import systemd.daemon
22from common.job import Job
23
24
25class SDWatchdogTask(Job):
26 pass
27
28
29class SDWatchdog(object):
30 """
31 This is a task that utilizes systemd watchdog functionality.
32
33 SDWatchdog() task is started automatically in run in common/service.run(),
34 where it will look at every task in the loop to see if it is a subclass
35 of SDWatchdogTask
36
37 To enable systemd watchdog, add "WatchdogSec=60" in the [Service] section
38 of the systemd service file.
39 """
40
41 def __init__(
42 self,
43 tasks: Optional[List[SDWatchdogTask]],
44 update_status: bool = False, # update systemd status field
45 period: float = 30,
46 ) -> None:
47 """
48 coroutine that will check each task's time_last_completed_loop to
49 ensure that it was updated every in the last timeout_s seconds.
50
51 Perform check of each service every period seconds.
52 """
53
54 self.tasks = cast(Set[SDWatchdogTask], set())
55 self.update_status = update_status
56 self.period = period
57
58 if tasks:
59 for t in tasks:
60 if not issubclass(type(t), SDWatchdogTask):
61 logging.warning(
62 "'%s' is not a 'SDWatchdogTask', skipping", repr(t),
63 )
64 else:
65 self.tasks.add(t)
66
67 @staticmethod
68 def has_notify() -> bool:
69 return os.getenv("NOTIFY_SOCKET") is not None
70
71 async def run(self) -> None:
72 """
73 check tasks every self.period seconds to see if they have completed
74 a loop within the last 'timeout' seconds. If so, sd notify WATCHDOG=1
75 """
76 if not self.has_notify():
77 logging.warning("Missing 'NOTIFY_SOCKET' for SDWatchdog, skipping")
78 return
79 logging.info("Starting SDWatchdog...")
80 while True:
81 current_time = time.time()
82 anyStuck = False
83 for task in self.tasks:
84 if task.not_completed(current_time):
85 errmsg = "SDWatchdog service '%s' has not completed %s" % (
86 repr(task), time.asctime(time.gmtime(current_time)),
87 )
88 if self.update_status:
89 systemd.daemon.notify("STATUS=%s\n" % errmsg)
90 logging.info(errmsg)
91 anyStuck = True
92
93 if not anyStuck:
94 systemd.daemon.notify(
95 'STATUS=SDWatchdog success %s\n' %
96 time.asctime(time.gmtime(current_time)),
97 )
98 systemd.daemon.notify("WATCHDOG=1")
99 systemd.daemon.notify("READY=1") # only active if Type=notify
100
101 await asyncio.sleep(self.period)