blob: 381bf85dd12beec4d42f641d8ba0998ae7c1bddd [file] [log] [blame]
Sergio Slobodriancab0a392017-07-13 08:42:10 -04001#!/usr/bin/env python
Zack Williams41513bf2018-07-07 20:08:35 -07002# Copyright 2017-present Open Networking Foundation
3#
4# Licensed under the Apache License, Version 2.0 (the "License");
5# you may not use this file except in compliance with the License.
6# You may obtain a copy of the License at
7#
8# http://www.apache.org/licenses/LICENSE-2.0
9#
10# Unless required by applicable law or agreed to in writing, software
11# distributed under the License is distributed on an "AS IS" BASIS,
12# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13# See the License for the specific language governing permissions and
14# limitations under the License.
Sergio Slobodriancab0a392017-07-13 08:42:10 -040015
16import os
17import signal
18import sys
19import time
20
21restart_epoch = 0
22pid_list = []
23
24def force_kill_all_children():
25 """ Iterate through all known child processes and force kill them. In the future we might consider
26 possibly giving the child processes time to exit but this is fine for now. If someone force kills
27 us and does not clean the process tree this will leave child processes around unless they choose
28 to end themselves if their parent process dies. """
29
30 # First uninstall the SIGCHLD handler so that we don't get called again.
31 signal.signal(signal.SIGCHLD, signal.SIG_DFL)
32
33 global pid_list
34 for pid in pid_list:
35 print "force killing PID={}".format(pid)
36 try:
37 os.kill(pid, signal.SIGKILL)
38 except:
39 print "error force killing PID={} continuing".format(pid)
40
41 pid_list = []
42
43
44def sigterm_handler(signum, frame):
45 """ Handler for SIGTERM. See force_kill_all_children() for further discussion. """
46
47 print "got SIGTERM"
48 force_kill_all_children()
49 sys.exit(0)
50
51
52def sighup_handler(signum, frame):
53 """ Handler for SIGUP. This signal is used to cause the restarter to fork and exec a new
54 child. """
55
56 print "got SIGHUP"
57 fork_and_exec()
58
59def sigusr1_handler(signum, frame):
60 """ Handler for SIGUSR1. Propagate SIGUSR1 to all of the child processes """
61
62 global pid_list
63 for pid in pid_list:
64 print "sending SIGUSR1 to PID={}".format(pid)
65 try:
66 os.kill(pid, signal.SIGUSR1)
67 except:
68 print "error in SIGUSR1 to PID={} continuing".format(pid)
69
70
71def sigchld_handler(signum, frame):
72 """ Handler for SIGCHLD. Iterates through all of our known child processes and figures out whether
73 the signal/exit was expected or not. Python doesn't have any of the native signal handlers
74 ability to get the child process info directly from the signal handler so we need to iterate
75 through all child processes and see what happened."""
76
77 print "got SIGCHLD"
78
79 kill_all_and_exit = False
80 global pid_list
81 pid_list_copy = list(pid_list)
82 for pid in pid_list_copy:
83 ret_pid, exit_status = os.waitpid(pid, os.WNOHANG)
84 if ret_pid == 0 and exit_status == 0:
85 # This child is still running.
86 continue
87
88 pid_list.remove(pid)
89
90 # Now we see how the child exited.
91 if os.WIFEXITED(exit_status):
92 exit_code = os.WEXITSTATUS(exit_status)
93 print "PID={} exited with code={}".format(ret_pid, exit_code)
94 if exit_code == 0:
95 # Normal exit. We assume this was on purpose.
96 pass
97 else:
98 # Something bad happened. We need to tear everything down so that whoever started the
99 # restarter can know about this situation and restart the whole thing.
100 kill_all_and_exit = True
101 elif os.WIFSIGNALED(exit_status):
102 print "PID={} was killed with signal={}".format(ret_pid, os.WTERMSIG(exit_status))
103 kill_all_and_exit = True
104 else:
105 kill_all_and_exit = True
106
107 if kill_all_and_exit:
108 print "Due to abnormal exit, force killing all child processes and exiting"
109 force_kill_all_children()
110
111 # Our last child died, so we have no purpose. Exit.
112 if not pid_list:
113 print "exiting due to lack of child processes"
114 sys.exit(1 if kill_all_and_exit else 0)
115
116
117def fork_and_exec():
118 """ This routine forks and execs a new child process and keeps track of its PID. Before we fork,
119 set the current restart epoch in an env variable that processes can read if they care. """
120
121 global restart_epoch
122 os.environ['RESTART_EPOCH'] = str(restart_epoch)
123 print "forking and execing new child process at epoch {}".format(restart_epoch)
124 restart_epoch += 1
125
126 child_pid = os.fork()
127 if child_pid == 0:
128 # Child process
129 os.execl(sys.argv[1], sys.argv[1])
130 else:
131 # Parent process
132 print "forked new child process with PID={}".format(child_pid)
133 pid_list.append(child_pid)
134
135
136def main():
137 """ Script main. This script is designed so that a process watcher like runit or monit can watch
138 this process and take corrective action if it ever goes away. """
139
140 print "starting hot-restarter with target: {}".format(sys.argv[1])
141
142 signal.signal(signal.SIGTERM, sigterm_handler)
143 signal.signal(signal.SIGHUP, sighup_handler)
144 signal.signal(signal.SIGCHLD, sigchld_handler)
145 signal.signal(signal.SIGUSR1, sigusr1_handler)
146
147 # Start the first child process and then go into an endless loop since everything else happens via
148 # signals.
149 fork_and_exec()
150 while True:
151 time.sleep(60)
152
153if __name__ == '__main__':
154 main()