Girish Gowdra | 6e74552 | 2020-07-15 15:52:13 -0700 | [diff] [blame] | 1 | #!/bin/bash |
| 2 | |
| 3 | #Copyright 2020-present Open Networking Foundation |
| 4 | # |
| 5 | #Licensed under the Apache License, Version 2.0 (the "License"); |
| 6 | #you may not use this file except in compliance with the License. |
| 7 | #You may obtain a copy of the License at |
| 8 | # |
| 9 | #http://www.apache.org/licenses/LICENSE-2.0 |
| 10 | # |
| 11 | #Unless required by applicable law or agreed to in writing, software |
| 12 | #distributed under the License is distributed on an "AS IS" BASIS, |
| 13 | #WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 14 | #See the License for the specific language governing permissions and |
| 15 | #limitations under the License. |
| 16 | |
| 17 | # This script is a watchdog to monitor openolt and dev_mgmt_daemon processes |
| 18 | # It restarts the processes when they crash |
| 19 | # When OLT is working in in-band mode, it just monitors the dev_mgmt_daemon |
| 20 | # process to start with. Once the openolt processes is also up, it starts |
| 21 | # monitoring both the processes. |
| 22 | # When OLT is working in out-of-band, it monitors both the processes to start |
| 23 | # with. When either of the processes crash, it restarts both the processes. |
| 24 | |
| 25 | PID_MONITOR_INTERVAL=1 # in seconds |
| 26 | MAX_WAIT_TIMER_FOR_OPENOLT_PROCESS_TO_START=120 |
| 27 | |
| 28 | WATCHDOG_LOG_FILE="/var/log/openolt_process_watchdog.log" |
| 29 | |
Girish Gowdra | 70feafd | 2020-08-06 20:08:26 -0700 | [diff] [blame] | 30 | BRCM_DIR="/broadcom" |
| 31 | |
Girish Gowdra | 6e74552 | 2020-07-15 15:52:13 -0700 | [diff] [blame] | 32 | # inband config file |
| 33 | INBAND_CONFIG_FILE="${BRCM_DIR}/inband.config" |
| 34 | |
| 35 | OLT_MODEL=$(cat /sys/devices/virtual/dmi/id/board_name) |
| 36 | |
| 37 | ASF16_MODEL="ASXvOLT16" |
| 38 | |
| 39 | # vlan id for asfvolt16 |
| 40 | ASFVOLT16_VLAN_ID_ETH2= |
| 41 | |
| 42 | # vlan id for asgvolt64 |
| 43 | ASGVOLT64_VLAN_ID_ETH1= |
| 44 | |
| 45 | |
| 46 | #------------------------------------------------------------------------------ |
| 47 | # Function Name: LOG |
| 48 | # Description: |
| 49 | # Logs to WATCHDOG_LOG_FILE |
| 50 | # |
| 51 | # Globals: |
| 52 | # None |
| 53 | # |
| 54 | # Arguments: |
| 55 | # string to be logged |
| 56 | # |
| 57 | # Returns: |
| 58 | # None |
| 59 | #------------------------------------------------------------------------------ |
| 60 | LOG() { |
| 61 | echo `date`" $1" >> ${WATCHDOG_LOG_FILE} |
| 62 | } |
| 63 | |
| 64 | #------------------------------------------------------------------------------ |
| 65 | # Function Name: get_vlan_ids |
| 66 | # Description: |
| 67 | # This function facilitates to fetch vlan id from inband configuration file |
| 68 | # located at /broadcom/inband.config |
| 69 | # |
| 70 | # Globals: |
| 71 | # INBAND_CONFIG_FILE, ASFVOLT16_VLAN_ID_ETH2, ASFVOLT16_VLAN_ID_ETH3, |
| 72 | # ASGVOLT64_VLAN_ID_ETH1, ASGVOLT64_VLAN_ID_ETH2 |
| 73 | # |
| 74 | # Arguments: |
| 75 | # None |
| 76 | # |
| 77 | # Returns: |
| 78 | # None |
| 79 | #------------------------------------------------------------------------------ |
| 80 | get_vlan_ids() { |
| 81 | # Read inband.config file to fetch vlan id information |
| 82 | if [ -f ${INBAND_CONFIG_FILE} ]; then |
| 83 | if [ "${OLT_MODEL}" = ${ASF16_MODEL} ]; then |
| 84 | ASFVOLT16_VLAN_ID_ETH2=$(awk '/asfvolt16_vlan_id_eth2/{print $0}' ${INBAND_CONFIG_FILE} | awk -F "=" '{print $2}') |
| 85 | else |
| 86 | ASGVOLT64_VLAN_ID_ETH1=$(awk '/asgvolt64_vlan_id_eth1/{print $0}' ${INBAND_CONFIG_FILE} | awk -F "=" '{print $2}') |
| 87 | fi |
| 88 | |
Girish Gowdra | 70feafd | 2020-08-06 20:08:26 -0700 | [diff] [blame] | 89 | if [ -z ${ASFVOLT16_VLAN_ID_ETH2} ] && [ -z ${ASGVOLT64_VLAN_ID_ETH1} ]; then |
Girish Gowdra | 6e74552 | 2020-07-15 15:52:13 -0700 | [diff] [blame] | 90 | LOG "ERROR: vlan ids not valid" |
| 91 | exit 1 |
| 92 | fi |
| 93 | else |
| 94 | LOG "ERROR: ${INBAND_CONFIG_FILE} not found, using default value 4093" |
| 95 | fi |
| 96 | } |
| 97 | |
| 98 | #------------------------------------------------------------------------------ |
| 99 | # Function Name: monitor_openolt_and_dev_mgmt_daemon_process |
| 100 | # Description: |
| 101 | # Monitors openolt and dev_mgmt_daemon processes in an infinite loop. |
| 102 | # Restarts both the processes if either of them exits |
| 103 | # |
| 104 | # Globals: |
| 105 | # None |
| 106 | # |
| 107 | # Arguments: |
| 108 | # None |
| 109 | # |
| 110 | # Returns: |
| 111 | # |
| 112 | #------------------------------------------------------------------------------ |
| 113 | monitor_openolt_and_dev_mgmt_daemon_process() { |
| 114 | LOG "start monitor openolt and dev_mgmt_daemon processes" |
| 115 | |
Girish Gowdra | 3b0ae45 | 2020-09-28 10:53:18 -0700 | [diff] [blame] | 116 | get_vlan_ids |
Girish Gowdra | 70feafd | 2020-08-06 20:08:26 -0700 | [diff] [blame] | 117 | if [ "${OLT_MODEL}" = ${ASF16_MODEL} ]; then |
| 118 | INBAND_IF="eth2.${ASFVOLT16_VLAN_ID_ETH2}" |
| 119 | else |
| 120 | INBAND_IF="eth1.${ASGVOLT64_VLAN_ID_ETH1}" |
| 121 | fi |
| 122 | |
Girish Gowdra | 6e74552 | 2020-07-15 15:52:13 -0700 | [diff] [blame] | 123 | # This is builtin bash variable that tracks the number of seconds |
| 124 | # elapsed since the shell started. We can reset to 0 and the timer |
| 125 | # starts from here |
| 126 | SECONDS=0 |
Girish Gowdra | 6e74552 | 2020-07-15 15:52:13 -0700 | [diff] [blame] | 127 | while true; do |
| 128 | |
| 129 | DEV_MGMT_DAEMON_PID=`pidof /broadcom/dev_mgmt_daemon` |
| 130 | OPENOLT_PID=$(pidof /broadcom/openolt) |
| 131 | if [ -z ${OPENOLT_PID} ] || [ -z ${DEV_MGMT_DAEMON_PID} ];then |
| 132 | |
| 133 | if [ -z ${DEV_MGMT_DAEMON_PID} ]; then |
| 134 | LOG "dev_mgmt_daemon processes not available" |
| 135 | fi |
| 136 | |
| 137 | if [ -z ${OPENOLT_PID} ]; then |
| 138 | LOG "openolt processes not available" |
| 139 | fi |
| 140 | |
| 141 | # Reboot OLT so that everything recovers in the right state |
| 142 | reboot -n |
| 143 | exit 1 |
| 144 | fi |
| 145 | |
| 146 | sleep ${PID_MONITOR_INTERVAL} |
| 147 | |
| 148 | if [ $(is_olt_in_inband_mode; echo $?) -eq 1 ]; then |
Girish Gowdra | 70feafd | 2020-08-06 20:08:26 -0700 | [diff] [blame] | 149 | INBAND_IF_IP_ADDR=`ip -o -4 addr list ${INBAND_IF} | awk '{print $4}' | cut -d/ -f1` |
Girish Gowdra | 6e74552 | 2020-07-15 15:52:13 -0700 | [diff] [blame] | 150 | |
| 151 | if [ -z ${INBAND_IF_IP_ADDR} ] && [ ${SECONDS} -gt 60 ]; then |
| 152 | # If the in-band interface does not have an IP for more than 60s, reboot the OLT |
Girish Gowdra | 70feafd | 2020-08-06 20:08:26 -0700 | [diff] [blame] | 153 | LOG "in-band interface ${INBAND_IF} does not have IP for more than 60s, rebooting OLT" |
Girish Gowdra | 6e74552 | 2020-07-15 15:52:13 -0700 | [diff] [blame] | 154 | reboot -n |
| 155 | exit 1 |
| 156 | elif [ ${#INBAND_IF_IP_ADDR} -gt 0 ]; then |
| 157 | # This is case where we have an IP address on the in-band interface |
| 158 | |
| 159 | # Reset timer if the in-band interface has an IP address |
| 160 | SECONDS=0 |
| 161 | fi |
| 162 | fi |
| 163 | done |
| 164 | } |
| 165 | |
| 166 | #------------------------------------------------------------------------------ |
| 167 | # Function Name: monitor_dev_mgmt_daemon_process |
| 168 | # Description: |
| 169 | # Monitors dev_mgmt_daemon in an infinite loop. The loop breaks if openolt |
| 170 | # agent process starts |
| 171 | # |
| 172 | # Globals: |
| 173 | # None |
| 174 | # |
| 175 | # Arguments: |
| 176 | # None |
| 177 | # |
| 178 | # Returns: |
| 179 | # |
| 180 | #------------------------------------------------------------------------------ |
| 181 | monitor_dev_mgmt_daemon_process() { |
| 182 | |
| 183 | # This is builtin bash variable that tracks the number of seconds |
| 184 | # elapsed since the shell started. We can reset to 0 and the timer |
| 185 | # starts from here |
| 186 | SECONDS=0 |
| 187 | while true; do |
| 188 | DEV_MGMT_DAEMON_PID=$(pidof /broadcom/dev_mgmt_daemon) |
| 189 | if [ -z ${DEV_MGMT_DAEMON_PID} ];then |
| 190 | # Reboot OLT so that everything recovers in the right state |
| 191 | LOG "dev_mgmt_daemon process crashed or not available in in-band mode of OLT, rebooting OLT" |
| 192 | reboot -n |
| 193 | fi |
| 194 | |
| 195 | sleep ${PID_MONITOR_INTERVAL} |
| 196 | |
| 197 | OPENOLT_PID=$(pidof /broadcom/openolt) |
| 198 | if [ ${OPENOLT_PID} ]; then |
| 199 | LOG "openolt process has started. start monitoring both dev_mgmt_daemon and openolt processes" |
| 200 | break |
| 201 | elif [ ${SECONDS} -ge ${MAX_WAIT_TIMER_FOR_OPENOLT_PROCESS_TO_START} ]; then |
| 202 | # Typically the openolt process is supposed to start within MAX_WAIT_TIMER_FOR_OPENOLT_PROCESS_TO_START |
| 203 | # If that does not happen, reboot the OLT. |
| 204 | LOG "openolt process did not start for more than ${MAX_WAIT_TIMER_FOR_OPENOLT_PROCESS_TO_START} seconds, rebooting OLT" |
| 205 | reboot -n |
| 206 | exit 1 |
| 207 | fi |
| 208 | done |
| 209 | } |
| 210 | |
| 211 | #------------------------------------------------------------------------------ |
| 212 | # Function Name: is_olt_in_inband_mode |
| 213 | # Description: |
| 214 | # Checks if OLT is working in in-band mode |
| 215 | # Presence of in-band startup scripts indicates in-band mode. These scripts |
| 216 | # are not packaged as part of the debian package or the ONL when OLT is working |
| 217 | # in out-of-band mode. |
| 218 | # |
| 219 | # Globals: |
| 220 | # None |
| 221 | # |
| 222 | # Arguments: |
| 223 | # None |
| 224 | # |
| 225 | # Returns: |
| 226 | # |
| 227 | #------------------------------------------------------------------------------ |
| 228 | is_olt_in_inband_mode() { |
| 229 | ! [ -f "/etc/init.d/start_inband_oltservices.sh" ] |
| 230 | } |
| 231 | |
| 232 | |
| 233 | # Execution starts here |
| 234 | LOG "---- watchdog started ----" |
| 235 | |
| 236 | # Source watchdog configuration |
| 237 | . /opt/openolt/watchdog.config |
| 238 | |
| 239 | if [ ${enable_watchdog} = "yes" ]; then |
| 240 | LOG "watchdog is enabled" |
| 241 | else |
| 242 | LOG "watchdog is disabled - exiting" |
| 243 | exit 0 |
| 244 | fi |
| 245 | |
| 246 | # If OLT is working in out-of-band start with monitoring only dev_mgmt_daemon process |
| 247 | if [ $(is_olt_in_inband_mode; echo $?) -eq 1 ]; then |
| 248 | LOG "openolt working in in-band mode, starting to monitor dev_mgmt_daemon process first" |
| 249 | monitor_dev_mgmt_daemon_process |
| 250 | # If we are here, that means the openolt process has just started and we now need to |
| 251 | # monitor both dev_mgmt_daemon and openolt process |
| 252 | else |
| 253 | sleep ${watchdog_startup_timer} # Allow some time for openolt and dev_mgmt_daemon process to start |
| 254 | LOG "openolt working in out-of-band mode" |
| 255 | fi |
| 256 | |
| 257 | monitor_openolt_and_dev_mgmt_daemon_process |