Using kail to capture logs during the scale test
Creating parameters for ETCD and Kafka scaling
Avoid disabling ONOS apps (the liveness/readiness probe check for them)
Change-Id: Icbb6fa2e32d2eaaeeb0bd74aade04a7cf2dce9a3
diff --git a/jjb/pipeline/voltha-scale-test.groovy b/jjb/pipeline/voltha-scale-test.groovy
index 0cf443c..dc4d12e 100644
--- a/jjb/pipeline/voltha-scale-test.groovy
+++ b/jjb/pipeline/voltha-scale-test.groovy
@@ -62,6 +62,8 @@
VOLTHA_BBSIM_CHART="${bbsimChart}"
VOLTHA_ADAPTER_OPEN_OLT_CHART="${openoltAdapterChart}"
VOLTHA_ADAPTER_OPEN_ONU_CHART="${openonuAdapterChart}"
+
+ APPS_TO_LOG="etcd kafka onos-onos-classic adapter-open-onu adapter-open-olt rw-core ofagent bbsim radius"
}
stages {
@@ -147,12 +149,12 @@
// includes monitoring, kafka, etcd
steps {
sh '''
- helm install kafka incubator/kafka --set replicas=3 --set persistence.enabled=false --set zookeeper.replicaCount=3 --set zookeeper.persistence.enabled=false
+ helm install kafka incubator/kafka --set replicas=${kafkaReplicas} --set persistence.enabled=false --set zookeeper.replicaCount=${kafkaReplicas} --set zookeeper.persistence.enabled=false
# the ETCD chart use "auth" for resons different than BBsim, so strip that away
ETCD_FLAGS=$(echo ${extraHelmFlags} | sed -e 's/--set auth=false / /g') | sed -e 's/--set auth=true / /g'
ETCD_FLAGS+=" --set memoryMode=${inMemoryEtcdStorage} "
- helm install -f $WORKSPACE/kind-voltha/values.yaml --set etcd.replicas=3 etcd etcd/etcd $ETCD_FLAGS
+ helm install -f $WORKSPACE/kind-voltha/values.yaml --set replicas=${etcdReplicas} etcd etcd/etcd $ETCD_FLAGS
if [ ${withMonitoring} = true ] ; then
helm install nem-monitoring cord/nem-monitoring \
@@ -231,6 +233,19 @@
_TAG=etcd-port-forward kubectl port-forward --address 0.0.0.0 -n default service/etcd $VOLTHA_ETCD_PORT:2379&
"""
}
+ sh returnStdout: false, script: '''
+ # start logging with kail
+
+ LOG_FOLDER=$WORKSPACE/logs
+ mkdir -p $LOG_FOLDER
+
+ list=($APPS_TO_LOG)
+ for app in "${list[@]}"
+ do
+ echo "Starting logs for: ${app}"
+ _TAG=kail-$app kail -l app=$app --since 1h > $LOG_FOLDER/$app.log&
+ done
+ '''
// bbsim-sadis server takes a while to cache the subscriber entries
// wait for that before starting the tests
sleep(120)
@@ -239,9 +254,6 @@
stage('Configuration') {
steps {
sh '''
- # Always deactivate org.opencord.kafka
- sshpass -e ssh -q -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no -p 8101 karaf@127.0.0.1 app deactivate org.opencord.kafka
-
#Setting link discovery
sshpass -e ssh -q -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no -p 8101 karaf@127.0.0.1 cfg set org.onosproject.provider.lldp.impl.LldpLinkProvider enabled ${withLLDP}
@@ -253,20 +265,6 @@
sshpass -e ssh -q -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no -p 8101 karaf@127.0.0.1 cfg set org.onosproject.provider.of.flow.impl.OpenFlowRuleProvider flowPollFrequency ${onosStatInterval}
sshpass -e ssh -q -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no -p 8101 karaf@127.0.0.1 cfg set org.onosproject.provider.of.device.impl.OpenFlowDeviceProvider portStatsPollFrequency ${onosStatInterval}
- if [ ${withEapol} = false ] || [ ${withFlows} = false ]; then
- sshpass -e ssh -q -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no -p 8101 karaf@127.0.0.1 app deactivate org.opencord.aaa
- fi
-
- if [ ${withDhcp} = false ] || [ ${withFlows} = false ]; then
- sshpass -e ssh -q -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no -p 8101 karaf@127.0.0.1 app deactivate org.opencord.dhcpl2relay
- fi
-
- if [ ${withIgmp} = false ] || [ ${withFlows} = false ]; then
- # FIXME will actually affected the tests only after VOL-3054 is addressed
- sshpass -e ssh -q -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no -p 8101 karaf@127.0.0.1 app deactivate org.opencord.igmpproxy
- sshpass -e ssh -q -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no -p 8101 karaf@127.0.0.1 app deactivate org.opencord.mcast
- fi
-
if [ ${withFlows} = false ]; then
sshpass -e ssh -q -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no -p 8101 karaf@127.0.0.1 app deactivate org.opencord.olt
fi
@@ -376,6 +374,21 @@
// event of a timeout in the tests
sh '''
+ # stop the kail processes
+ list=($APPS_TO_LOG)
+ for app in "${list[@]}"
+ do
+ echo "Stopping logs for: ${app}"
+ _TAG="kail-$app"
+ P_IDS="$(ps e -ww -A | grep "_TAG=$_TAG" | grep -v grep | awk '{print $1}')"
+ if [ -n "$P_IDS" ]; then
+ echo $P_IDS
+ for P_ID in $P_IDS; do
+ kill -9 $P_ID
+ done
+ fi
+ done
+
if [ ${withPcap} = true ] ; then
# stop ofAgent tcpdump
P_ID="\$(ps e -ww -A | grep "_TAG=ofagent-tcpdump" | grep -v grep | awk '{print \$1}')"
@@ -390,7 +403,7 @@
cd voltha-system-tests
source ./vst_venv/bin/activate
- python tests/scale/collect-result.py -r $WORKSPACE/RobotLogs/output.xml -p $WORKSPACE/plots > $WORKSPACE/execution-time.txt
+ python tests/scale/collect-result.py -r $WORKSPACE/RobotLogs/output.xml -p $WORKSPACE/plots > $WORKSPACE/execution-time.txt || true
cat $WORKSPACE/execution-time.txt
'''
sh '''
@@ -432,8 +445,6 @@
unstableThreshold: 0]);
// get all the logs from kubernetes PODs
sh returnStdout: false, script: '''
- LOG_FOLDER=$WORKSPACE/logs
- mkdir -p $LOG_FOLDER
# store information on running charts
helm ls > $LOG_FOLDER/helm-list.txt || true
@@ -443,18 +454,6 @@
kubectl get pods --all-namespaces -o jsonpath="{range .items[*].status.containerStatuses[*]}{.image}{'\\n'}" | sort | uniq | tee $LOG_FOLDER/pod-images.txt || true
kubectl get pods --all-namespaces -o jsonpath="{range .items[*].status.containerStatuses[*]}{.imageID}{'\\n'}" | sort | uniq | tee $LOG_FOLDER/pod-imagesId.txt || true
- # log in individual files for all the container that match the selector app=$APP_TO_LOG
- APPS_TO_LOG=(etcd kafka onos adapter-open-onu adapter-open-olt rw-core ofagent bbsim radius)
- for app in "${APPS_TO_LOG[@]}"
- do
- echo "Getting logs for: ${app}"
- kubectl get pods -l app=${app} -o=jsonpath=\"{.items[*]['metadata.name']}\"
- printf '%s\n' $(kubectl get pods -l app=$app -o=jsonpath="{.items[*]['metadata.name']}") | xargs -I# bash -c "kubectl logs # > $LOG_FOLDER/#.log" || true
-
- # Get the logs from the previous POD if any (useful in case of restarts)
- printf '%s\n' $(kubectl get pods -l app=$app -o=jsonpath="{.items[*]['metadata.name']}") | xargs -I# bash -c "kubectl logs -p # > $LOG_FOLDER/#-previous.log" || true
- done
-
# copy the ONOS logs directly from the container to avoid the color codes
printf '%s\n' $(kubectl get pods -l app=onos-onos-classic -o=jsonpath="{.items[*]['metadata.name']}") | xargs -I# bash -c "kubectl cp #:${karafHome}/data/log/karaf.log $LOG_FOLDER/#.log" || true
'''
@@ -523,10 +522,12 @@
}
// get cpu usage by container
sh '''
- cd $WORKSPACE/voltha-system-tests
- source ./vst_venv/bin/activate
- sleep 60 # we have to wait for prometheus to collect all the information
- python tests/scale/sizing.py -o $WORKSPACE/plots || true
+ if [ ${withMonitoring} = true ] ; then
+ cd $WORKSPACE/voltha-system-tests
+ source ./vst_venv/bin/activate
+ sleep 60 # we have to wait for prometheus to collect all the information
+ python tests/scale/sizing.py -o $WORKSPACE/plots || true
+ fi
'''
archiveArtifacts artifacts: 'kind-voltha/install-minimal.log,execution-time.txt,logs/*,logs/pprof/*,RobotLogs/*,plots/*.txt,plots/*.pdf,etcd-metrics/*'
}
diff --git a/jjb/voltha-scale.yaml b/jjb/voltha-scale.yaml
index eb97cfb..8321ee5 100644
--- a/jjb/voltha-scale.yaml
+++ b/jjb/voltha-scale.yaml
@@ -342,6 +342,16 @@
description: 'How many Atomix instances to run'
- string:
+ name: kafkaReplicas
+ default: '{kafkaReplicas}'
+ description: 'How many Kafka instances to run'
+
+ - string:
+ name: etcdReplicas
+ default: '{etcdReplicas}'
+ description: 'How many ETCD instances to run'
+
+ - string:
name: onosStatInterval
default: '{onosStatInterval}'
description: 'How often ONOS should poll for ports, flows and meters'
@@ -470,6 +480,8 @@
openonuAdapterReplicas: 1
onosReplicas: 1
atomixReplicas: 0
+ kafkaReplicas: 3
+ etcdReplicas: 3
extraHelmFlags: ''
onosStatInterval: 5
volthaSystemTestsChange: ''
@@ -543,6 +555,8 @@
openonuAdapterReplicas: 1
onosReplicas: 1
atomixReplicas: 0
+ kafkaReplicas: 1
+ etcdReplicas: 1
extraHelmFlags: ''
onosStatInterval: 5
volthaSystemTestsChange: ''