install/get-logs.sh - voltha - Gitiles

 #!/bin/bash

 # This script will collect all of the pertinent logs from a voltha
 # HA swarm cluster, tar, and bizip them to facilitate sending them
 # to the suspected issue owner. The replicated storage is used to
 # allow all hosts to place the logs in a single place.

 volthaDir="/cord/incubator/voltha"
 declare -A lNames
 declare -A lPids
 declare -A lSizes

 # Checks if a value is not in an array.
 notIn() {
 	local e match=$1
 	shift
 	for e; do [[ "$e" == "$match" ]] && return 1; done
 	return 0
 }

 # Get the list of the other hosts that make up the cluster
 hosts=`docker node ls | tail -n +2 | grep -v "*" | grep -v "Down" | awk '{print $2}'`

 echo "Collecting logs for hosts: `hostname` ${hosts}"

 # Create a temporary directory for temporary storage of all the logs
 mkdir ${volthaDir}/registry_data/registry_volume/log_tmp
 pushd ${volthaDir}/registry_data/registry_volume/log_tmp

 # Docker health in general.

 echo "Getting docker node ls"
 docker node ls > docker_node_ls.log 2>&1
 echo "Getting docker service ls"
 docker service ls > docker_service_ls.log 2>&1

 # Get the list of services to ps each one and get logs for each one.
 svcs=`docker service ls | tail -n +2 | awk '{print $2}'`

 # Get the PS information
 for i in $svcs
 do
 	echo "Getting docker service ps $i"
 	docker service ps ${i} > docker_service_ps_${i} 2>&1
 done

 # Get the logs for each service
 for i in $svcs
 do
 	echo "Getting docker service logs $i"
 	lNames[$i]=$i
 	lSizes[$i]=0
 	docker service logs ${i} > docker_service_logs_${i} 2>&1 &
 	lPids[$i]=$!
 done

 patience=5
 while [ "${#lNames[*]}" -ne 0  ]
 do
 	echo "*** Waiting on log collection to complete (patience = ${patience}). Outstanding jobs: ${#lNames[*]} (${lNames[@]})"
 	sleep 10
 	# Check which collectors are done are remove them from the list
 	jobs > /dev/null # Don't delete this useless line or the next one will eroniously report a PID
 	pids=`jobs -p`
 	for i in "${lNames[@]}"
 	do
 		if notIn "${lPids[$i]}" $pids; then
 			unset lPids[$i]
 			unset lNames[$i]
 			unset lSizes[$i]
 		fi
 	done
 	unset pids
 	# Now for all remaining jobs check the file size of the log file for growth
 	# reset the timeout if the file is still growing. If no files are still growing
 	# then don't touch the timeout.
 	for i in "${lNames[@]}"
 	do
 		fsz=`stat --format=%s "docker_service_logs_${i}"`
 		if [ ${lSizes[$i]} -lt $fsz ]; then
 			patience=5
 			lSizes[$i]=$fsz
 		fi
 	done
 	patience=`expr $patience - 1`
 	if [ $patience -eq 0 ]; then
 		echo "Log collection stuck, killing any active collectors"
 		for i in "${lNames[@]}"
 		do
 			echo "${i}:${lNames[$i]}:${lSizes[$i]}:${lPids[$i]}"
 			kill -s TERM ${lPids[$i]}
 		done
 		break
 	fi
 done

 # Get the image list from this host
 #echo "Getting docker image ls from `hostname`"
 #docker image ls > docker_image_ls_`hostname` 2>&1
 # Get the memory info for this host
 #echo "Getting memory info from `hostname`"
 #cat /proc/meminfo > meminfo_`hostname` 2>&1
 # Get the disk info for this host
 #echo "Getting disk info from `hostname`"
 #df -h > df_`hostname` 2>&1

 #
 # If too many logs are generated it's not unusual that docker service logs
 # hangs and never produces the totality of logs for a service. In order
 # to get as much information as possible get the individual container logs
 # for each container on each host
 #

 # Get the logs for this host
 ${volthaDir}/get-host-logs.sh


 # Get the logs for the other hosts
 for i in $hosts
 do
 	ssh voltha@$i ${volthaDir}/get-host-logs.sh
 done

 popd
 pushd ${volthaDir}/registry_data/registry_volume
 tar cjvf ${volthaDir}/logs.tar`date "+%Y%m%d-%H:%M:%S"`.bz2 log_tmp/*
 rm -fr log_tmp
 popd
	#!/bin/bash

	# This script will collect all of the pertinent logs from a voltha
	# HA swarm cluster, tar, and bizip them to facilitate sending them
	# to the suspected issue owner. The replicated storage is used to
	# allow all hosts to place the logs in a single place.

	volthaDir="/cord/incubator/voltha"
	declare -A lNames
	declare -A lPids
	declare -A lSizes

	# Checks if a value is not in an array.
	notIn() {
	local e match=$1
	shift
	for e; do [[ "$e" == "$match" ]] && return 1; done
	return 0
	}

	# Get the list of the other hosts that make up the cluster
	hosts=`docker node ls \| tail -n +2 \| grep -v "*" \| grep -v "Down" \| awk '{print $2}'`

	echo "Collecting logs for hosts: `hostname` ${hosts}"

	# Create a temporary directory for temporary storage of all the logs
	mkdir ${volthaDir}/registry_data/registry_volume/log_tmp
	pushd ${volthaDir}/registry_data/registry_volume/log_tmp

	# Docker health in general.

	echo "Getting docker node ls"
	docker node ls > docker_node_ls.log 2>&1
	echo "Getting docker service ls"
	docker service ls > docker_service_ls.log 2>&1

	# Get the list of services to ps each one and get logs for each one.
	svcs=`docker service ls \| tail -n +2 \| awk '{print $2}'`

	# Get the PS information
	for i in $svcs
	do
	echo "Getting docker service ps $i"
	docker service ps ${i} > docker_service_ps_${i} 2>&1
	done

	# Get the logs for each service
	for i in $svcs
	do
	echo "Getting docker service logs $i"
	lNames[$i]=$i
	lSizes[$i]=0
	docker service logs ${i} > docker_service_logs_${i} 2>&1 &
	lPids[$i]=$!
	done

	patience=5
	while [ "${#lNames[*]}" -ne 0 ]
	do
	echo "*** Waiting on log collection to complete (patience = ${patience}). Outstanding jobs: ${#lNames[*]} (${lNames[@]})"
	sleep 10
	# Check which collectors are done are remove them from the list
	jobs > /dev/null # Don't delete this useless line or the next one will eroniously report a PID
	pids=`jobs -p`
	for i in "${lNames[@]}"
	do
	if notIn "${lPids[$i]}" $pids; then
	unset lPids[$i]
	unset lNames[$i]
	unset lSizes[$i]
	fi
	done
	unset pids
	# Now for all remaining jobs check the file size of the log file for growth
	# reset the timeout if the file is still growing. If no files are still growing
	# then don't touch the timeout.
	for i in "${lNames[@]}"
	do
	fsz=`stat --format=%s "docker_service_logs_${i}"`
	if [ ${lSizes[$i]} -lt $fsz ]; then
	patience=5
	lSizes[$i]=$fsz
	fi
	done
	patience=`expr $patience - 1`
	if [ $patience -eq 0 ]; then
	echo "Log collection stuck, killing any active collectors"
	for i in "${lNames[@]}"
	do
	echo "${i}:${lNames[$i]}:${lSizes[$i]}:${lPids[$i]}"
	kill -s TERM ${lPids[$i]}
	done
	break
	fi
	done

	# Get the image list from this host
	#echo "Getting docker image ls from `hostname`"
	#docker image ls > docker_image_ls_`hostname` 2>&1
	# Get the memory info for this host
	#echo "Getting memory info from `hostname`"
	#cat /proc/meminfo > meminfo_`hostname` 2>&1
	# Get the disk info for this host
	#echo "Getting disk info from `hostname`"
	#df -h > df_`hostname` 2>&1

	#
	# If too many logs are generated it's not unusual that docker service logs
	# hangs and never produces the totality of logs for a service. In order
	# to get as much information as possible get the individual container logs
	# for each container on each host
	#

	# Get the logs for this host
	${volthaDir}/get-host-logs.sh


	# Get the logs for the other hosts
	for i in $hosts
	do
	ssh voltha@$i ${volthaDir}/get-host-logs.sh
	done

	popd
	pushd ${volthaDir}/registry_data/registry_volume
	tar cjvf ${volthaDir}/logs.tar`date "+%Y%m%d-%H:%M:%S"`.bz2 log_tmp/*
	rm -fr log_tmp
	popd