blob: 54d1f827039401f724703a00d337c9f5b3661ac7 [file] [log] [blame]
Girish Gowdra6e745522020-07-15 15:52:13 -07001#!/bin/bash
2
3#Copyright 2020-present Open Networking Foundation
4#
5#Licensed under the Apache License, Version 2.0 (the "License");
6#you may not use this file except in compliance with the License.
7#You may obtain a copy of the License at
8#
9#http://www.apache.org/licenses/LICENSE-2.0
10#
11#Unless required by applicable law or agreed to in writing, software
12#distributed under the License is distributed on an "AS IS" BASIS,
13#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14#See the License for the specific language governing permissions and
15#limitations under the License.
16
17# This script is a watchdog to monitor openolt and dev_mgmt_daemon processes
18# It restarts the processes when they crash
19# When OLT is working in in-band mode, it just monitors the dev_mgmt_daemon
20# process to start with. Once the openolt processes is also up, it starts
21# monitoring both the processes.
22# When OLT is working in out-of-band, it monitors both the processes to start
23# with. When either of the processes crash, it restarts both the processes.
24
25PID_MONITOR_INTERVAL=1 # in seconds
26MAX_WAIT_TIMER_FOR_OPENOLT_PROCESS_TO_START=120
27
28WATCHDOG_LOG_FILE="/var/log/openolt_process_watchdog.log"
29
30# inband config file
31INBAND_CONFIG_FILE="${BRCM_DIR}/inband.config"
32
33OLT_MODEL=$(cat /sys/devices/virtual/dmi/id/board_name)
34
35ASF16_MODEL="ASXvOLT16"
36
37# vlan id for asfvolt16
38ASFVOLT16_VLAN_ID_ETH2=
39
40# vlan id for asgvolt64
41ASGVOLT64_VLAN_ID_ETH1=
42
43
44#------------------------------------------------------------------------------
45# Function Name: LOG
46# Description:
47# Logs to WATCHDOG_LOG_FILE
48#
49# Globals:
50# None
51#
52# Arguments:
53# string to be logged
54#
55# Returns:
56# None
57#------------------------------------------------------------------------------
58LOG() {
59 echo `date`" $1" >> ${WATCHDOG_LOG_FILE}
60}
61
62#------------------------------------------------------------------------------
63# Function Name: get_vlan_ids
64# Description:
65# This function facilitates to fetch vlan id from inband configuration file
66# located at /broadcom/inband.config
67#
68# Globals:
69# INBAND_CONFIG_FILE, ASFVOLT16_VLAN_ID_ETH2, ASFVOLT16_VLAN_ID_ETH3,
70# ASGVOLT64_VLAN_ID_ETH1, ASGVOLT64_VLAN_ID_ETH2
71#
72# Arguments:
73# None
74#
75# Returns:
76# None
77#------------------------------------------------------------------------------
78get_vlan_ids() {
79 # Read inband.config file to fetch vlan id information
80 if [ -f ${INBAND_CONFIG_FILE} ]; then
81 if [ "${OLT_MODEL}" = ${ASF16_MODEL} ]; then
82 ASFVOLT16_VLAN_ID_ETH2=$(awk '/asfvolt16_vlan_id_eth2/{print $0}' ${INBAND_CONFIG_FILE} | awk -F "=" '{print $2}')
83 else
84 ASGVOLT64_VLAN_ID_ETH1=$(awk '/asgvolt64_vlan_id_eth1/{print $0}' ${INBAND_CONFIG_FILE} | awk -F "=" '{print $2}')
85 fi
86
87 if [ -z ${ASFVOLT16_VLAN_ID_ETH2} ] || [ -z ${ASGVOLT64_VLAN_ID_ETH1} ]; then
88 LOG "ERROR: vlan ids not valid"
89 exit 1
90 fi
91 else
92 LOG "ERROR: ${INBAND_CONFIG_FILE} not found, using default value 4093"
93 fi
94}
95
96#------------------------------------------------------------------------------
97# Function Name: monitor_openolt_and_dev_mgmt_daemon_process
98# Description:
99# Monitors openolt and dev_mgmt_daemon processes in an infinite loop.
100# Restarts both the processes if either of them exits
101#
102# Globals:
103# None
104#
105# Arguments:
106# None
107#
108# Returns:
109#
110#------------------------------------------------------------------------------
111monitor_openolt_and_dev_mgmt_daemon_process() {
112 LOG "start monitor openolt and dev_mgmt_daemon processes"
113
114 # This is builtin bash variable that tracks the number of seconds
115 # elapsed since the shell started. We can reset to 0 and the timer
116 # starts from here
117 SECONDS=0
118
119 while true; do
120
121 DEV_MGMT_DAEMON_PID=`pidof /broadcom/dev_mgmt_daemon`
122 OPENOLT_PID=$(pidof /broadcom/openolt)
123 if [ -z ${OPENOLT_PID} ] || [ -z ${DEV_MGMT_DAEMON_PID} ];then
124
125 if [ -z ${DEV_MGMT_DAEMON_PID} ]; then
126 LOG "dev_mgmt_daemon processes not available"
127 fi
128
129 if [ -z ${OPENOLT_PID} ]; then
130 LOG "openolt processes not available"
131 fi
132
133 # Reboot OLT so that everything recovers in the right state
134 reboot -n
135 exit 1
136 fi
137
138 sleep ${PID_MONITOR_INTERVAL}
139
140 if [ $(is_olt_in_inband_mode; echo $?) -eq 1 ]; then
141 if [ "${OLT_MODEL}" = ${ASF16_MODEL} ]; then
142 INBAND_IF_IP_ADDR=`ip -o -4 addr list eth2.${ASFVOLT16_VLAN_ID_ETH2} | awk '{print $4}' | cut -d/ -f1`
143 else
144 INBAND_IF_IP_ADDR=`ip -o -4 addr list eth1.${ASGVOLT64_VLAN_ID_ETH1} | awk '{print $4}' | cut -d/ -f1`
145 fi
146
147 if [ -z ${INBAND_IF_IP_ADDR} ] && [ ${SECONDS} -gt 60 ]; then
148 # If the in-band interface does not have an IP for more than 60s, reboot the OLT
149 LOG "in-band interface does not have IP for more than 60s, rebooting OLT"
150 reboot -n
151 exit 1
152 elif [ ${#INBAND_IF_IP_ADDR} -gt 0 ]; then
153 # This is case where we have an IP address on the in-band interface
154
155 # Reset timer if the in-band interface has an IP address
156 SECONDS=0
157 fi
158 fi
159 done
160}
161
162#------------------------------------------------------------------------------
163# Function Name: monitor_dev_mgmt_daemon_process
164# Description:
165# Monitors dev_mgmt_daemon in an infinite loop. The loop breaks if openolt
166# agent process starts
167#
168# Globals:
169# None
170#
171# Arguments:
172# None
173#
174# Returns:
175#
176#------------------------------------------------------------------------------
177monitor_dev_mgmt_daemon_process() {
178
179 # This is builtin bash variable that tracks the number of seconds
180 # elapsed since the shell started. We can reset to 0 and the timer
181 # starts from here
182 SECONDS=0
183 while true; do
184 DEV_MGMT_DAEMON_PID=$(pidof /broadcom/dev_mgmt_daemon)
185 if [ -z ${DEV_MGMT_DAEMON_PID} ];then
186 # Reboot OLT so that everything recovers in the right state
187 LOG "dev_mgmt_daemon process crashed or not available in in-band mode of OLT, rebooting OLT"
188 reboot -n
189 fi
190
191 sleep ${PID_MONITOR_INTERVAL}
192
193 OPENOLT_PID=$(pidof /broadcom/openolt)
194 if [ ${OPENOLT_PID} ]; then
195 LOG "openolt process has started. start monitoring both dev_mgmt_daemon and openolt processes"
196 break
197 elif [ ${SECONDS} -ge ${MAX_WAIT_TIMER_FOR_OPENOLT_PROCESS_TO_START} ]; then
198 # Typically the openolt process is supposed to start within MAX_WAIT_TIMER_FOR_OPENOLT_PROCESS_TO_START
199 # If that does not happen, reboot the OLT.
200 LOG "openolt process did not start for more than ${MAX_WAIT_TIMER_FOR_OPENOLT_PROCESS_TO_START} seconds, rebooting OLT"
201 reboot -n
202 exit 1
203 fi
204 done
205}
206
207#------------------------------------------------------------------------------
208# Function Name: is_olt_in_inband_mode
209# Description:
210# Checks if OLT is working in in-band mode
211# Presence of in-band startup scripts indicates in-band mode. These scripts
212# are not packaged as part of the debian package or the ONL when OLT is working
213# in out-of-band mode.
214#
215# Globals:
216# None
217#
218# Arguments:
219# None
220#
221# Returns:
222#
223#------------------------------------------------------------------------------
224is_olt_in_inband_mode() {
225 ! [ -f "/etc/init.d/start_inband_oltservices.sh" ]
226}
227
228
229# Execution starts here
230LOG "---- watchdog started ----"
231
232# Source watchdog configuration
233 . /opt/openolt/watchdog.config
234
235if [ ${enable_watchdog} = "yes" ]; then
236 LOG "watchdog is enabled"
237else
238 LOG "watchdog is disabled - exiting"
239 exit 0
240fi
241
242# If OLT is working in out-of-band start with monitoring only dev_mgmt_daemon process
243if [ $(is_olt_in_inband_mode; echo $?) -eq 1 ]; then
244 LOG "openolt working in in-band mode, starting to monitor dev_mgmt_daemon process first"
245 monitor_dev_mgmt_daemon_process
246 # If we are here, that means the openolt process has just started and we now need to
247 # monitor both dev_mgmt_daemon and openolt process
248else
249 sleep ${watchdog_startup_timer} # Allow some time for openolt and dev_mgmt_daemon process to start
250 LOG "openolt working in out-of-band mode"
251fi
252
253monitor_openolt_and_dev_mgmt_daemon_process