blob: 9e924de8de50459d6f78123f1cc4347b222de2c1 [file] [log] [blame]
Sergio Slobodrianbcd30b12017-08-22 22:32:00 -04001#!/bin/bash
Zack Williams41513bf2018-07-07 20:08:35 -07002# Copyright 2017-present Open Networking Foundation
3#
4# Licensed under the Apache License, Version 2.0 (the "License");
5# you may not use this file except in compliance with the License.
6# You may obtain a copy of the License at
7#
8# http://www.apache.org/licenses/LICENSE-2.0
9#
10# Unless required by applicable law or agreed to in writing, software
11# distributed under the License is distributed on an "AS IS" BASIS,
12# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13# See the License for the specific language governing permissions and
14# limitations under the License.
Sergio Slobodrianbcd30b12017-08-22 22:32:00 -040015
16# This script will collect all of the pertinent logs from a voltha
17# HA swarm cluster host and place them in replicated storage.
18
19volthaDir="/cord/incubator/voltha"
20hName=`hostname`
21declare -A lNames
22declare -A lPids
23declare -A lSizes
24
25# Checks if a value is not in an array.
26notIn() {
27 local e match=$1
28 shift
29 for e; do [[ "$e" == "$match" ]] && return 1; done
30 return 0
31}
32
33pushd ${volthaDir}/registry_data/registry_volume/log_tmp
34
35# Get the image list from this host
36echo "Getting docker image ls from ${hName}"
37docker image ls > docker_image_ls_${hName} 2>&1
38# Get the memory info for this host
39echo "Getting memory info from ${hName}"
40cat /proc/meminfo > meminfo_${hName} 2>&1
41# Get the disk info for this host
42echo "Getting disk info from ${hName}"
43df -h > df_${hName} 2>&1
44
45#
46# If too many logs are generated it's not unusual that docker service logs
47# hangs and never produces the totality of logs for a service. In order
48# to get as much information as possible get the individual container logs
49# for each container on each host
50#
51
52# Get the container logs for this host
53# Start of cut range
54st=`docker ps | head -n 1 | sed -e 's/NAMES.*//' | wc -c`
55ed=`expr $st + 100`
56containers=`docker ps | tail -n +2 | awk '{print $1}'`
57for i in $containers
58do
59 cont=`docker ps | grep $i | cut -c ${st}-${ed}`
60 lNames[$cont]=$cont
61 lSizes[$cont]=0
62 echo "Getting logs for ${cont} on host ${hName}"
63 docker logs $i > "docker_logs_${hName}_${cont}" 2>&1 &
64 lPids[$cont]=$!
65done
66
67patience=5
68while [ "${#lNames[*]}" -ne 0 ]
69do
Sergio Slobodrian8725ea82017-08-27 23:47:41 -040070 echo "*** Waiting on log collection to complete (patience = ${patience}). Outstanding jobs: ${#lNames[*]} (${lNames[@]})"
Sergio Slobodrianbcd30b12017-08-22 22:32:00 -040071 sleep 10
72 # Check which collectors are done are remove them from the list
73 jobs > /dev/null # Don't delete this useless line or the next one will eroniously report a PID
74 pids=`jobs -p`
75 for i in "${lNames[@]}"
76 do
77 if notIn "${lPids[$i]}" $pids; then
78 unset lPids[$i]
79 unset lNames[$i]
80 unset lSizes[$i]
81 fi
82 done
83 unset pids
84 # Now for all remaining jobs check the file size of the log file for growth
85 # reset the timeout if the file is still growing. If no files are still growing
86 # then don't touch the timeout.
87 for i in "${lNames[@]}"
88 do
89 fsz=`stat --format=%s "docker_logs_${hName}_${i}"`
90 if [ ${lSizes[$i]} -lt $fsz ]; then
91 patience=5
92 lSizes[$i]=$fsz
93 fi
94 done
95 patience=`expr $patience - 1`
96 if [ $patience -eq 0 ]; then
97 echo "Log collection stuck, killing any active collectors"
98 for i in "${lNames[@]}"
99 do
100 echo "${i}:${lNames[$i]}:${lSizes[$i]}:${lPids[$i]}"
101 kill -s TERM ${lPids[$i]}
102 done
103 break
104 fi
105done
106
107
108popd