blob: 2eb4e52a032dc3cd4bda32f378dece7687577619 [file] [log] [blame]
"""
Copyright 2020 The Magma Authors.
This source code is licensed under the BSD-style license found in the
LICENSE file in the root directory of this source tree.
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""
# pylint: disable=W0223
import asyncio
import logging
import os
import time
from typing import List, Optional, Set, cast
import systemd.daemon
from common.job import Job
class SDWatchdogTask(Job):
pass
class SDWatchdog(object):
"""
This is a task that utilizes systemd watchdog functionality.
SDWatchdog() task is started automatically in run in common/service.run(),
where it will look at every task in the loop to see if it is a subclass
of SDWatchdogTask
To enable systemd watchdog, add "WatchdogSec=60" in the [Service] section
of the systemd service file.
"""
def __init__(
self,
tasks: Optional[List[SDWatchdogTask]],
update_status: bool = False, # update systemd status field
period: float = 30,
) -> None:
"""
coroutine that will check each task's time_last_completed_loop to
ensure that it was updated every in the last timeout_s seconds.
Perform check of each service every period seconds.
"""
self.tasks = cast(Set[SDWatchdogTask], set())
self.update_status = update_status
self.period = period
if tasks:
for t in tasks:
if not issubclass(type(t), SDWatchdogTask):
logging.warning(
"'%s' is not a 'SDWatchdogTask', skipping", repr(t),
)
else:
self.tasks.add(t)
@staticmethod
def has_notify() -> bool:
return os.getenv("NOTIFY_SOCKET") is not None
async def run(self) -> None:
"""
check tasks every self.period seconds to see if they have completed
a loop within the last 'timeout' seconds. If so, sd notify WATCHDOG=1
"""
if not self.has_notify():
logging.warning("Missing 'NOTIFY_SOCKET' for SDWatchdog, skipping")
return
logging.info("Starting SDWatchdog...")
while True:
current_time = time.time()
anyStuck = False
for task in self.tasks:
if task.not_completed(current_time):
errmsg = "SDWatchdog service '%s' has not completed %s" % (
repr(task), time.asctime(time.gmtime(current_time)),
)
if self.update_status:
systemd.daemon.notify("STATUS=%s\n" % errmsg)
logging.info(errmsg)
anyStuck = True
if not anyStuck:
systemd.daemon.notify(
'STATUS=SDWatchdog success %s\n' %
time.asctime(time.gmtime(current_time)),
)
systemd.daemon.notify("WATCHDOG=1")
systemd.daemon.notify("READY=1") # only active if Type=notify
await asyncio.sleep(self.period)