blob: 6c627cbf9babde4e45cc81691e60dacf84532966 [file] [log] [blame]
Sergio Slobodrianbcd30b12017-08-22 22:32:00 -04001#!/bin/bash
2
3# This script will collect all of the pertinent logs from a voltha
4# HA swarm cluster host and place them in replicated storage.
5
6volthaDir="/cord/incubator/voltha"
7hName=`hostname`
8declare -A lNames
9declare -A lPids
10declare -A lSizes
11
12# Checks if a value is not in an array.
13notIn() {
14 local e match=$1
15 shift
16 for e; do [[ "$e" == "$match" ]] && return 1; done
17 return 0
18}
19
20pushd ${volthaDir}/registry_data/registry_volume/log_tmp
21
22# Get the image list from this host
23echo "Getting docker image ls from ${hName}"
24docker image ls > docker_image_ls_${hName} 2>&1
25# Get the memory info for this host
26echo "Getting memory info from ${hName}"
27cat /proc/meminfo > meminfo_${hName} 2>&1
28# Get the disk info for this host
29echo "Getting disk info from ${hName}"
30df -h > df_${hName} 2>&1
31
32#
33# If too many logs are generated it's not unusual that docker service logs
34# hangs and never produces the totality of logs for a service. In order
35# to get as much information as possible get the individual container logs
36# for each container on each host
37#
38
39# Get the container logs for this host
40# Start of cut range
41st=`docker ps | head -n 1 | sed -e 's/NAMES.*//' | wc -c`
42ed=`expr $st + 100`
43containers=`docker ps | tail -n +2 | awk '{print $1}'`
44for i in $containers
45do
46 cont=`docker ps | grep $i | cut -c ${st}-${ed}`
47 lNames[$cont]=$cont
48 lSizes[$cont]=0
49 echo "Getting logs for ${cont} on host ${hName}"
50 docker logs $i > "docker_logs_${hName}_${cont}" 2>&1 &
51 lPids[$cont]=$!
52done
53
54patience=5
55while [ "${#lNames[*]}" -ne 0 ]
56do
Sergio Slobodrian8725ea82017-08-27 23:47:41 -040057 echo "*** Waiting on log collection to complete (patience = ${patience}). Outstanding jobs: ${#lNames[*]} (${lNames[@]})"
Sergio Slobodrianbcd30b12017-08-22 22:32:00 -040058 sleep 10
59 # Check which collectors are done are remove them from the list
60 jobs > /dev/null # Don't delete this useless line or the next one will eroniously report a PID
61 pids=`jobs -p`
62 for i in "${lNames[@]}"
63 do
64 if notIn "${lPids[$i]}" $pids; then
65 unset lPids[$i]
66 unset lNames[$i]
67 unset lSizes[$i]
68 fi
69 done
70 unset pids
71 # Now for all remaining jobs check the file size of the log file for growth
72 # reset the timeout if the file is still growing. If no files are still growing
73 # then don't touch the timeout.
74 for i in "${lNames[@]}"
75 do
76 fsz=`stat --format=%s "docker_logs_${hName}_${i}"`
77 if [ ${lSizes[$i]} -lt $fsz ]; then
78 patience=5
79 lSizes[$i]=$fsz
80 fi
81 done
82 patience=`expr $patience - 1`
83 if [ $patience -eq 0 ]; then
84 echo "Log collection stuck, killing any active collectors"
85 for i in "${lNames[@]}"
86 do
87 echo "${i}:${lNames[$i]}:${lSizes[$i]}:${lPids[$i]}"
88 kill -s TERM ${lPids[$i]}
89 done
90 break
91 fi
92done
93
94
95popd