blob: eab19de6f4e697c071b3c5d9ed958982df170a67 [file] [log] [blame]
# SPDX-FileCopyrightText: 2020 The Magma Authors.
# SPDX-FileCopyrightText: 2022 Open Networking Foundation <support@opennetworking.org>
#
# SPDX-License-Identifier: BSD-3-Clause
# pylint: disable=W0223
import asyncio
import logging
import os
import time
from typing import List, Optional, Set, cast
import systemd.daemon
from common.job import Job
class SDWatchdogTask(Job):
pass
class SDWatchdog(object):
"""
This is a task that utilizes systemd watchdog functionality.
SDWatchdog() task is started automatically in run in common/service.run(),
where it will look at every task in the loop to see if it is a subclass
of SDWatchdogTask
To enable systemd watchdog, add "WatchdogSec=60" in the [Service] section
of the systemd service file.
"""
def __init__(
self,
tasks: Optional[List[SDWatchdogTask]],
update_status: bool = False, # update systemd status field
period: float = 30,
) -> None:
"""
coroutine that will check each task's time_last_completed_loop to
ensure that it was updated every in the last timeout_s seconds.
Perform check of each service every period seconds.
"""
self.tasks = cast(Set[SDWatchdogTask], set())
self.update_status = update_status
self.period = period
if tasks:
for t in tasks:
if not issubclass(type(t), SDWatchdogTask):
logging.warning(
"'%s' is not a 'SDWatchdogTask', skipping", repr(t),
)
else:
self.tasks.add(t)
@staticmethod
def has_notify() -> bool:
return os.getenv("NOTIFY_SOCKET") is not None
async def run(self) -> None:
"""
check tasks every self.period seconds to see if they have completed
a loop within the last 'timeout' seconds. If so, sd notify WATCHDOG=1
"""
if not self.has_notify():
logging.warning("Missing 'NOTIFY_SOCKET' for SDWatchdog, skipping")
return
logging.info("Starting SDWatchdog...")
while True:
current_time = time.time()
anyStuck = False
for task in self.tasks:
if task.not_completed(current_time):
errmsg = "SDWatchdog service '%s' has not completed %s" % (
repr(task), time.asctime(time.gmtime(current_time)),
)
if self.update_status:
systemd.daemon.notify("STATUS=%s\n" % errmsg)
logging.info(errmsg)
anyStuck = True
if not anyStuck:
systemd.daemon.notify(
'STATUS=SDWatchdog success %s\n' %
time.asctime(time.gmtime(current_time)),
)
systemd.daemon.notify("WATCHDOG=1")
systemd.daemon.notify("READY=1") # only active if Type=notify
await asyncio.sleep(self.period)