blob: 360374818a7c51c9ea1368330b5f525a2c99c348 [file] [log] [blame]
# Copyright 2017-present Open Networking Foundation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import argparse
import csv
import glob
import re
STATS_TO_AGGREGATE = [
'memory',
'etcd_stats',
'kafka_msg_per_topic',
'cpu',
]
def data_to_csv(data, output=None):
"""
Get a dictionary of lists saves a csv
:param data: the input dictionary
:type data: {metric: []values}
:param output: the destination file
:type output: str
"""
csv_file = open(output, "w+")
csv_writer = csv.writer(csv_file, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
for k, v in data.items():
csv_writer.writerow([k] + v)
def aggergateCpu(files):
"""
Aggregates memory consuption from multiple runs of the pipeline
:param files: The list of files to aggregate
:type files: []str
:return: a dictionary of aggregate stats
:rtype: {podName: []values}
"""
cpu_values = {}
for file in files:
csv_file = open(file)
csv_reader = csv.reader(csv_file, delimiter=',')
for row in csv_reader:
# for each row remove the random chars from the pod name
# and concat the values to the existing one
regex = "(.*)-[a-z0-9]{9,10}-[a-z0-9]{5}$"
x = re.search(regex, row[0])
if x is not None:
podName = x.groups(1)[0]
if not podName in cpu_values:
cpu_values[podName] = []
cpu_values[podName] = cpu_values[podName] + row[1:]
return cpu_values
def aggregateMemory(files):
"""
Aggregates memory consuption from multiple runs of the pipeline
:param files: The list of files to aggregate
:type files: []str
:return: a dictionary of aggregate stats
:rtype: {podName: []values}
"""
# this function assumes that the files are ordered by time
mem_values = {}
for file in files:
csv_file = open(file)
csv_reader = csv.reader(csv_file, delimiter=',')
for row in csv_reader:
# for each row remove the random chars from the pod name
# and concat the values to the existing one
regex = "(.*)-[a-z0-9]{9,10}-[a-z0-9]{5}$"
x = re.search(regex, row[0])
if x is not None:
podName = x.groups(1)[0]
if not podName in mem_values:
mem_values[podName] = []
mem_values[podName] = mem_values[podName] + row[1:]
return mem_values
def aggregateEtcd(files):
etcd_size = {}
etcd_keys = {}
for file in files:
csv_file = open(file)
csv_reader = csv.reader(csv_file, delimiter=',')
regex = ".*\/([0-9-]{5})\/.*"
topology = re.search(regex, file).groups(1)[0]
for row in csv_reader:
if row[0] == "keys":
if topology not in etcd_keys:
etcd_keys[topology] = []
etcd_keys[topology].append(row[1])
if row[0] == "size":
if topology not in etcd_size:
etcd_size[topology] = []
etcd_size[topology].append(row[1])
return [etcd_keys, etcd_size]
def aggregateKafka(files):
kafka = {}
for file in files:
csv_file = open(file)
csv_reader = csv.reader(csv_file, delimiter=',')
for row in csv_reader:
topic = row[0]
count = row[1]
if topic not in kafka:
kafka[topic] = []
kafka[topic].append(count)
return kafka
def aggregateStats(stat, files, out_dir):
# sort file in alphabetical order
# we assume that we always run the topologies in incremental order
files.sort()
if stat == "memory":
agg = aggregateMemory(files)
data_to_csv(agg, output="%s/aggregated-memory.csv" % out_dir)
if stat == "cpu":
agg = aggregateMemory(files)
data_to_csv(agg, output="%s/aggregated-cpu.csv" % out_dir)
if stat == "etcd_stats":
[keys, size] = aggregateEtcd(files)
data_to_csv(keys, output="%s/aggregated-etcd-keys.csv" % out_dir)
data_to_csv(size, output="%s/aggregated-etcd-size.csv" % out_dir)
if stat == "kafka_msg_per_topic":
agg = aggregateKafka(files)
data_to_csv(agg, output="%s/aggregated-kafka-msg-count.csv" % out_dir)
def main(source, out_dir):
for stat in STATS_TO_AGGREGATE:
files = [f for f in glob.iglob('%s/**/%s.csv' % (source, stat), recursive=True)]
aggregateStats(stat, files, out_dir)
if __name__ == "__main__":
parser = argparse.ArgumentParser(prog="stats-aggregation")
parser.add_argument("-o", "--output", help="Where to output the generated files", default="plots")
parser.add_argument("-s", "--source", help="Directory in which to look for stats", required=True)
args = parser.parse_args()
main(args.source, args.output)