Merge "Using kail to capture logs during the scale test Creating parameters for ETCD and Kafka scaling Avoid disabling ONOS apps (the liveness/readiness probe check for them)"
diff --git a/jjb/pipeline/voltha-scale-test.groovy b/jjb/pipeline/voltha-scale-test.groovy
index 0cf443c..dc4d12e 100644
--- a/jjb/pipeline/voltha-scale-test.groovy
+++ b/jjb/pipeline/voltha-scale-test.groovy
@@ -62,6 +62,8 @@
     VOLTHA_BBSIM_CHART="${bbsimChart}"
     VOLTHA_ADAPTER_OPEN_OLT_CHART="${openoltAdapterChart}"
     VOLTHA_ADAPTER_OPEN_ONU_CHART="${openonuAdapterChart}"
+
+    APPS_TO_LOG="etcd kafka onos-onos-classic adapter-open-onu adapter-open-olt rw-core ofagent bbsim radius"
   }
 
   stages {
@@ -147,12 +149,12 @@
       // includes monitoring, kafka, etcd
       steps {
         sh '''
-        helm install kafka incubator/kafka --set replicas=3 --set persistence.enabled=false --set zookeeper.replicaCount=3 --set zookeeper.persistence.enabled=false
+        helm install kafka incubator/kafka --set replicas=${kafkaReplicas} --set persistence.enabled=false --set zookeeper.replicaCount=${kafkaReplicas} --set zookeeper.persistence.enabled=false
 
         # the ETCD chart use "auth" for resons different than BBsim, so strip that away
         ETCD_FLAGS=$(echo ${extraHelmFlags} | sed -e 's/--set auth=false / /g') | sed -e 's/--set auth=true / /g'
         ETCD_FLAGS+=" --set memoryMode=${inMemoryEtcdStorage} "
-        helm install -f $WORKSPACE/kind-voltha/values.yaml --set etcd.replicas=3 etcd etcd/etcd $ETCD_FLAGS
+        helm install -f $WORKSPACE/kind-voltha/values.yaml --set replicas=${etcdReplicas} etcd etcd/etcd $ETCD_FLAGS
 
         if [ ${withMonitoring} = true ] ; then
           helm install nem-monitoring cord/nem-monitoring \
@@ -231,6 +233,19 @@
             _TAG=etcd-port-forward kubectl port-forward --address 0.0.0.0 -n default service/etcd $VOLTHA_ETCD_PORT:2379&
           """
         }
+        sh returnStdout: false, script: '''
+        # start logging with kail
+
+        LOG_FOLDER=$WORKSPACE/logs
+        mkdir -p $LOG_FOLDER
+
+        list=($APPS_TO_LOG)
+        for app in "${list[@]}"
+        do
+          echo "Starting logs for: ${app}"
+          _TAG=kail-$app kail -l app=$app --since 1h > $LOG_FOLDER/$app.log&
+        done
+        '''
         // bbsim-sadis server takes a while to cache the subscriber entries
         // wait for that before starting the tests
         sleep(120)
@@ -239,9 +254,6 @@
     stage('Configuration') {
       steps {
         sh '''
-          # Always deactivate org.opencord.kafka
-          sshpass -e ssh -q -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no -p 8101 karaf@127.0.0.1 app deactivate org.opencord.kafka
-
           #Setting link discovery
           sshpass -e ssh -q -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no -p 8101 karaf@127.0.0.1 cfg set org.onosproject.provider.lldp.impl.LldpLinkProvider enabled ${withLLDP}
 
@@ -253,20 +265,6 @@
           sshpass -e ssh -q -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no -p 8101 karaf@127.0.0.1 cfg set org.onosproject.provider.of.flow.impl.OpenFlowRuleProvider flowPollFrequency ${onosStatInterval}
           sshpass -e ssh -q -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no -p 8101 karaf@127.0.0.1 cfg set org.onosproject.provider.of.device.impl.OpenFlowDeviceProvider portStatsPollFrequency ${onosStatInterval}
 
-          if [ ${withEapol} = false ] || [ ${withFlows} = false ]; then
-            sshpass -e ssh -q -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no -p 8101 karaf@127.0.0.1 app deactivate org.opencord.aaa
-          fi
-
-          if [ ${withDhcp} = false ] || [ ${withFlows} = false ]; then
-            sshpass -e ssh -q -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no -p 8101 karaf@127.0.0.1 app deactivate org.opencord.dhcpl2relay
-          fi
-
-          if [ ${withIgmp} = false ] || [ ${withFlows} = false ]; then
-            # FIXME will actually affected the tests only after VOL-3054 is addressed
-            sshpass -e ssh -q -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no -p 8101 karaf@127.0.0.1 app deactivate org.opencord.igmpproxy
-            sshpass -e ssh -q -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no -p 8101 karaf@127.0.0.1 app deactivate org.opencord.mcast
-          fi
-
           if [ ${withFlows} = false ]; then
             sshpass -e ssh -q -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no -p 8101 karaf@127.0.0.1 app deactivate org.opencord.olt
           fi
@@ -376,6 +374,21 @@
       // event of a timeout in the tests
       sh '''
 
+        # stop the kail processes
+        list=($APPS_TO_LOG)
+        for app in "${list[@]}"
+        do
+          echo "Stopping logs for: ${app}"
+          _TAG="kail-$app"
+          P_IDS="$(ps e -ww -A | grep "_TAG=$_TAG" | grep -v grep | awk '{print $1}')"
+          if [ -n "$P_IDS" ]; then
+            echo $P_IDS
+            for P_ID in $P_IDS; do
+              kill -9 $P_ID
+            done
+          fi
+        done
+
         if [ ${withPcap} = true ] ; then
           # stop ofAgent tcpdump
           P_ID="\$(ps e -ww -A | grep "_TAG=ofagent-tcpdump" | grep -v grep | awk '{print \$1}')"
@@ -390,7 +403,7 @@
 
         cd voltha-system-tests
         source ./vst_venv/bin/activate
-        python tests/scale/collect-result.py -r $WORKSPACE/RobotLogs/output.xml -p $WORKSPACE/plots > $WORKSPACE/execution-time.txt
+        python tests/scale/collect-result.py -r $WORKSPACE/RobotLogs/output.xml -p $WORKSPACE/plots > $WORKSPACE/execution-time.txt || true
         cat $WORKSPACE/execution-time.txt
       '''
       sh '''
@@ -432,8 +445,6 @@
         unstableThreshold: 0]);
       // get all the logs from kubernetes PODs
       sh returnStdout: false, script: '''
-        LOG_FOLDER=$WORKSPACE/logs
-        mkdir -p $LOG_FOLDER
 
         # store information on running charts
         helm ls > $LOG_FOLDER/helm-list.txt || true
@@ -443,18 +454,6 @@
         kubectl get pods --all-namespaces -o jsonpath="{range .items[*].status.containerStatuses[*]}{.image}{'\\n'}" | sort | uniq | tee $LOG_FOLDER/pod-images.txt || true
         kubectl get pods --all-namespaces -o jsonpath="{range .items[*].status.containerStatuses[*]}{.imageID}{'\\n'}" | sort | uniq | tee $LOG_FOLDER/pod-imagesId.txt || true
 
-        # log in individual files for all the container that match the selector app=$APP_TO_LOG
-        APPS_TO_LOG=(etcd kafka onos adapter-open-onu adapter-open-olt rw-core ofagent bbsim radius)
-        for app in "${APPS_TO_LOG[@]}"
-        do
-          echo "Getting logs for: ${app}"
-          kubectl get pods -l app=${app} -o=jsonpath=\"{.items[*]['metadata.name']}\"
-          printf '%s\n' $(kubectl get pods -l app=$app -o=jsonpath="{.items[*]['metadata.name']}") | xargs -I# bash -c "kubectl logs # > $LOG_FOLDER/#.log" || true
-
-          # Get the logs from the previous POD if any (useful in case of restarts)
-          printf '%s\n' $(kubectl get pods -l app=$app -o=jsonpath="{.items[*]['metadata.name']}") | xargs -I# bash -c "kubectl logs -p # > $LOG_FOLDER/#-previous.log" || true
-        done
-
         # copy the ONOS logs directly from the container to avoid the color codes
         printf '%s\n' $(kubectl get pods -l app=onos-onos-classic -o=jsonpath="{.items[*]['metadata.name']}") | xargs -I# bash -c "kubectl cp #:${karafHome}/data/log/karaf.log $LOG_FOLDER/#.log" || true
       '''
@@ -523,10 +522,12 @@
       }
       // get cpu usage by container
       sh '''
-      cd $WORKSPACE/voltha-system-tests
-      source ./vst_venv/bin/activate
-      sleep 60 # we have to wait for prometheus to collect all the information
-      python tests/scale/sizing.py -o $WORKSPACE/plots || true
+      if [ ${withMonitoring} = true ] ; then
+        cd $WORKSPACE/voltha-system-tests
+        source ./vst_venv/bin/activate
+        sleep 60 # we have to wait for prometheus to collect all the information
+        python tests/scale/sizing.py -o $WORKSPACE/plots || true
+      fi
       '''
       archiveArtifacts artifacts: 'kind-voltha/install-minimal.log,execution-time.txt,logs/*,logs/pprof/*,RobotLogs/*,plots/*.txt,plots/*.pdf,etcd-metrics/*'
     }
diff --git a/jjb/voltha-scale.yaml b/jjb/voltha-scale.yaml
index eb97cfb..8321ee5 100644
--- a/jjb/voltha-scale.yaml
+++ b/jjb/voltha-scale.yaml
@@ -342,6 +342,16 @@
           description: 'How many Atomix instances to run'
 
       - string:
+          name: kafkaReplicas
+          default: '{kafkaReplicas}'
+          description: 'How many Kafka instances to run'
+
+      - string:
+          name: etcdReplicas
+          default: '{etcdReplicas}'
+          description: 'How many ETCD instances to run'
+
+      - string:
           name: onosStatInterval
           default: '{onosStatInterval}'
           description: 'How often ONOS should poll for ports, flows and meters'
@@ -470,6 +480,8 @@
     openonuAdapterReplicas: 1
     onosReplicas: 1
     atomixReplicas: 0
+    kafkaReplicas: 3
+    etcdReplicas: 3
     extraHelmFlags: ''
     onosStatInterval: 5
     volthaSystemTestsChange: ''
@@ -543,6 +555,8 @@
     openonuAdapterReplicas: 1
     onosReplicas: 1
     atomixReplicas: 0
+    kafkaReplicas: 1
+    etcdReplicas: 1
     extraHelmFlags: ''
     onosStatInterval: 5
     volthaSystemTestsChange: ''