Blame - install/get-host-logs.sh - voltha

blob: 6c627cbf9babde4e45cc81691e60dacf84532966 [file] [log] [blame]

Sergio Slobodrian	bcd30b1	2017-08-22 22:32:00 -0400	[diff] [blame]	1	#!/bin/bash
				2
				3	# This script will collect all of the pertinent logs from a voltha
				4	# HA swarm cluster host and place them in replicated storage.
				5
				6	volthaDir="/cord/incubator/voltha"
				7	hName=`hostname`
				8	declare -A lNames
				9	declare -A lPids
				10	declare -A lSizes
				11
				12	# Checks if a value is not in an array.
				13	notIn() {
				14	local e match=$1
				15	shift
				16	for e; do [[ "$e" == "$match" ]] && return 1; done
				17	return 0
				18	}
				19
				20	pushd ${volthaDir}/registry_data/registry_volume/log_tmp
				21
				22	# Get the image list from this host
				23	echo "Getting docker image ls from ${hName}"
				24	docker image ls > docker_image_ls_${hName} 2>&1
				25	# Get the memory info for this host
				26	echo "Getting memory info from ${hName}"
				27	cat /proc/meminfo > meminfo_${hName} 2>&1
				28	# Get the disk info for this host
				29	echo "Getting disk info from ${hName}"
				30	df -h > df_${hName} 2>&1
				31
				32	#
				33	# If too many logs are generated it's not unusual that docker service logs
				34	# hangs and never produces the totality of logs for a service. In order
				35	# to get as much information as possible get the individual container logs
				36	# for each container on each host
				37	#
				38
				39	# Get the container logs for this host
				40	# Start of cut range
				41	st=`docker ps \| head -n 1 \| sed -e 's/NAMES.*//' \| wc -c`
				42	ed=`expr $st + 100`
				43	containers=`docker ps \| tail -n +2 \| awk '{print $1}'`
				44	for i in $containers
				45	do
				46	cont=`docker ps \| grep $i \| cut -c ${st}-${ed}`
				47	lNames[$cont]=$cont
				48	lSizes[$cont]=0
				49	echo "Getting logs for ${cont} on host ${hName}"
				50	docker logs $i > "docker_logs_${hName}_${cont}" 2>&1 &
				51	lPids[$cont]=$!
				52	done
				53
				54	patience=5
				55	while [ "${#lNames[*]}" -ne 0 ]
				56	do
Sergio Slobodrian	8725ea8	2017-08-27 23:47:41 -0400	[diff] [blame]	57	echo "*** Waiting on log collection to complete (patience = ${patience}). Outstanding jobs: ${#lNames[*]} (${lNames[@]})"
Sergio Slobodrian	bcd30b1	2017-08-22 22:32:00 -0400	[diff] [blame]	58	sleep 10
				59	# Check which collectors are done are remove them from the list
				60	jobs > /dev/null # Don't delete this useless line or the next one will eroniously report a PID
				61	pids=`jobs -p`
				62	for i in "${lNames[@]}"
				63	do
				64	if notIn "${lPids[$i]}" $pids; then
				65	unset lPids[$i]
				66	unset lNames[$i]
				67	unset lSizes[$i]
				68	fi
				69	done
				70	unset pids
				71	# Now for all remaining jobs check the file size of the log file for growth
				72	# reset the timeout if the file is still growing. If no files are still growing
				73	# then don't touch the timeout.
				74	for i in "${lNames[@]}"
				75	do
				76	fsz=`stat --format=%s "docker_logs_${hName}_${i}"`
				77	if [ ${lSizes[$i]} -lt $fsz ]; then
				78	patience=5
				79	lSizes[$i]=$fsz
				80	fi
				81	done
				82	patience=`expr $patience - 1`
				83	if [ $patience -eq 0 ]; then
				84	echo "Log collection stuck, killing any active collectors"
				85	for i in "${lNames[@]}"
				86	do
				87	echo "${i}:${lNames[$i]}:${lSizes[$i]}:${lPids[$i]}"
				88	kill -s TERM ${lPids[$i]}
				89	done
				90	break
				91	fi
				92	done
				93
				94
				95	popd