Blame - tests/scale/sizing.py - voltha-system-tests

blob: f00dbe6108e67e464fb40d47fbbbba96053ad330 [file] [log] [blame]

Matteo Scandolo	3ed8987	2020-07-15 17:01:02 -0700	[diff] [blame]	1	# Copyright 2017-present Open Networking Foundation
				2	#
				3	# Licensed under the Apache License, Version 2.0 (the "License");
				4	# you may not use this file except in compliance with the License.
				5	# You may obtain a copy of the License at
				6	#
				7	# http://www.apache.org/licenses/LICENSE-2.0
				8	#
				9	# Unless required by applicable law or agreed to in writing, software
				10	# distributed under the License is distributed on an "AS IS" BASIS,
				11	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
				12	# See the License for the specific language governing permissions and
				13	# limitations under the License.
				14
				15	# This tool collects CPU and Memory informations for each container in the VOLTHA stack
				16
				17	# NOTE
				18	# Collecting the info for all containers in the same chart can be confusing,
				19	# we may want to create subcharts for the different groups, eg: infra, ONOS, core, adapters
				20
Matteo Scandolo	7274b43	2020-08-27 14:28:43 -0700	[diff] [blame]	21	import csv
				22	from sys import platform as sys_pf
				23
				24	if sys_pf == 'darwin':
				25	import matplotlib
				26
				27	matplotlib.use("TkAgg")
				28
Matteo Scandolo	3ed8987	2020-07-15 17:01:02 -0700	[diff] [blame]	29	import argparse
				30	import requests
				31	import matplotlib.pyplot as plt
				32	import matplotlib.dates as mdates
				33	from datetime import datetime
				34	import time
				35
				36	EXCLUDED_POD_NAMES = [
				37	"kube", "coredns", "kind", "grafana",
				38	"prometheus", "tiller", "control-plane",
Matteo Scandolo	7274b43	2020-08-27 14:28:43 -0700	[diff] [blame]	39	"calico", "nginx", "registry", "cattle", "canal", "metrics",
Matteo Scandolo	3ed8987	2020-07-15 17:01:02 -0700	[diff] [blame]	40	]
				41
				42	DATE_FORMATTER_FN = mdates.DateFormatter('%Y-%m-%d %H:%M:%S')
				43
Matteo Scandolo	88d01c1	2020-11-02 17:11:26 -0800	[diff] [blame]	44	KAFKA_TOPICS = [
				45	"openolt",
				46	"brcm_openomci_onu",
				47	"voltha",
				48	"adapters",
				49	"rwcore"
				50	]
Matteo Scandolo	3ed8987	2020-07-15 17:01:02 -0700	[diff] [blame]	51
				52	def main(address, out_folder, since):
				53	"""
				54	Query Prometheus and generate .pdf files for CPU and Memory consumption for each POD
				55	:param address: string The address of the Prometheus instance to query
				56	:param out_folder: string The output folder (where to save the .pdf files)
				57	:param since: int When to start collection data (minutes in the past)
				58	:return: void
				59	"""
				60	time_delta = int(since) * 60
Matteo Scandolo	7274b43	2020-08-27 14:28:43 -0700	[diff] [blame]	61
Matteo Scandolo	86334f5	2020-08-28 10:56:25 -0700	[diff] [blame]	62	container_mem_query = "sum by(pod) (container_memory_working_set_bytes{namespace='default',container!='',container!='POD'})"
				63
				64	container_cpu_query = "sum by(pod) (rate(container_cpu_usage_seconds_total{namespace='default',container!='',container!='POD'}[%sm])) * 100" % since
Matteo Scandolo	3ed8987	2020-07-15 17:01:02 -0700	[diff] [blame]	65
				66	now = time.time()
				67	cpu_params = {
				68	"query": container_cpu_query,
				69	"start": now - time_delta,
				70	"end": now,
				71	"step": "30",
				72	}
Matteo Scandolo	86334f5	2020-08-28 10:56:25 -0700	[diff] [blame]	73
Matteo Scandolo	3ed8987	2020-07-15 17:01:02 -0700	[diff] [blame]	74	r = requests.get("http://%s/api/v1/query_range" % address, cpu_params)
				75	print("Downloading CPU info from: %s" % r.url)
				76	container_cpu = r.json()["data"]["result"]
Matteo Scandolo	7274b43	2020-08-27 14:28:43 -0700	[diff] [blame]	77	containers = remove_unwanted_containers(container_cpu)
				78	plot_cpu_consumption(containers,
Matteo Scandolo	806637d	2020-07-30 02:07:06 +0000	[diff] [blame]	79	output="%s/cpu.pdf" % out_folder)
Matteo Scandolo	7274b43	2020-08-27 14:28:43 -0700	[diff] [blame]	80	data_to_csv(containers, output="%s/cpu.csv" % out_folder,
Matteo Scandolo	86334f5	2020-08-28 10:56:25 -0700	[diff] [blame]	81	convert_values=lambda values: ["{:.2f}".format(v) for v in values])
Matteo Scandolo	3ed8987	2020-07-15 17:01:02 -0700	[diff] [blame]	82
Matteo Scandolo	7274b43	2020-08-27 14:28:43 -0700	[diff] [blame]	83	mem_params = {
				84	"query": container_mem_query,
				85	"start": now - time_delta,
				86	"end": now,
				87	"step": "30",
				88	}
				89
				90	r = requests.get("http://%s/api/v1/query_range" % address, mem_params)
Matteo Scandolo	3ed8987	2020-07-15 17:01:02 -0700	[diff] [blame]	91	print("Downloading Memory info from: %s" % r.url)
				92	container_mem = r.json()["data"]["result"]
Matteo Scandolo	7274b43	2020-08-27 14:28:43 -0700	[diff] [blame]	93	containers = remove_unwanted_containers(container_mem)
				94	plot_memory_consumption(containers, output="%s/memory.pdf" % out_folder)
				95	data_to_csv(containers, output="%s/memory.csv" % out_folder,
Matteo Scandolo	86334f5	2020-08-28 10:56:25 -0700	[diff] [blame]	96	convert_values=lambda values: ["{:.2f}".format(bytesto(v, "m")) for v in values])
Matteo Scandolo	7274b43	2020-08-27 14:28:43 -0700	[diff] [blame]	97
Matteo Scandolo	88d01c1	2020-11-02 17:11:26 -0800	[diff] [blame]	98	print("Downloading KAFKA stats")
				99	get_kafka_stats(address, out_folder)
				100	print("Downloading ETCD stats")
				101	get_etcd_stats(address, out_folder)
				102
				103
Matteo Scandolo	7274b43	2020-08-27 14:28:43 -0700	[diff] [blame]	104
				105	def data_to_csv(containers, output=None, convert_values=None):
				106	"""
				107	Get a list of prometheus metrics and dumps them in a csv
				108	:param containers: Prometheus metrics
				109	:param output: Destination file
				110	:param convert_values: Function to convert the valus, take a list on numbers
				111	"""
				112	csv_file = open(output, "w+")
				113	csv_writer = csv.writer(csv_file, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
				114
				115	# we assume all the containers have the same timestamps
Matteo Scandolo	7e3dd12	2020-11-04 15:24:00 -0800	[diff] [blame]	116	# FIXME pods may have different timestamps depending on when the collection started
				117	# - find the longest list in containers
				118	# - add empty values at the beginning of the other list
Matteo Scandolo	7274b43	2020-08-27 14:28:43 -0700	[diff] [blame]	119	dates = [datetime.fromtimestamp(x[0]) for x in containers[0]["values"]]
				120	csv_writer.writerow([''] + dates)
				121
				122	for c in containers:
				123	name = c["metric"]["pod"]
				124	data = c["values"]
				125
				126	values = [float(x[1]) for x in data]
				127
				128	if convert_values:
				129	values = convert_values(values)
				130	csv_writer.writerow([name] + values)
Matteo Scandolo	3ed8987	2020-07-15 17:01:02 -0700	[diff] [blame]	131
				132
				133	def plot_cpu_consumption(containers, output=None):
Matteo Scandolo	3ed8987	2020-07-15 17:01:02 -0700	[diff] [blame]	134	plt.figure('cpu')
				135	fig, ax = plt.subplots()
				136	ax.xaxis.set_major_formatter(DATE_FORMATTER_FN)
				137	ax.xaxis_date()
				138	fig.autofmt_xdate()
				139
				140	plt.title("CPU Usage per POD")
				141	plt.xlabel("Timestamp")
				142	plt.ylabel("% used")
				143
				144	for c in containers:
Matteo Scandolo	7274b43	2020-08-27 14:28:43 -0700	[diff] [blame]	145	name = c["metric"]["pod"]
Matteo Scandolo	3ed8987	2020-07-15 17:01:02 -0700	[diff] [blame]	146	data = c["values"]
				147
				148	dates = [datetime.fromtimestamp(x[0]) for x in data]
				149
				150	values = [float(x[1]) for x in data]
				151
				152	plt.plot(dates, values, label=name, lw=2, color=get_line_color(name))
				153	# plt.plot(dates[1:], get_diff(values), label=name, lw=2, color=get_line_color(name))
				154
Matteo Scandolo	7274b43	2020-08-27 14:28:43 -0700	[diff] [blame]	155	plt.legend(loc='upper left', title="CPU Consumption", bbox_to_anchor=(1.05, 1))
Matteo Scandolo	3ed8987	2020-07-15 17:01:02 -0700	[diff] [blame]	156
				157	fig = plt.gcf()
				158	fig.set_size_inches(20, 11)
				159
Matteo Scandolo	7274b43	2020-08-27 14:28:43 -0700	[diff] [blame]	160	plt.savefig(output, bbox_inches="tight")
Matteo Scandolo	3ed8987	2020-07-15 17:01:02 -0700	[diff] [blame]	161
				162
				163	def plot_memory_consumption(containers, output=None):
				164	plt.figure("memory")
				165	fig, ax = plt.subplots()
				166	ax.xaxis.set_major_formatter(DATE_FORMATTER_FN)
				167	ax.xaxis_date()
				168	fig.autofmt_xdate()
				169	plt.title("Memory Usage")
				170	plt.xlabel("Timestamp")
				171	plt.ylabel("MB")
				172
				173	for c in containers:
Matteo Scandolo	7274b43	2020-08-27 14:28:43 -0700	[diff] [blame]	174	name = c["metric"]["pod"]
Matteo Scandolo	3ed8987	2020-07-15 17:01:02 -0700	[diff] [blame]	175	data = c["values"]
				176
				177	dates = [datetime.fromtimestamp(x[0]) for x in data]
				178	values = [bytesto(float(x[1]), "m") for x in data]
				179
Matteo Scandolo	7274b43	2020-08-27 14:28:43 -0700	[diff] [blame]	180	# plt.plot(dates[1:], get_diff(values), label=name, lw=2, color=get_line_color(name))
				181	plt.plot(dates[1:], values[1:], label=name, lw=2, color=get_line_color(name))
Matteo Scandolo	3ed8987	2020-07-15 17:01:02 -0700	[diff] [blame]	182
Matteo Scandolo	7274b43	2020-08-27 14:28:43 -0700	[diff] [blame]	183	plt.legend(loc='upper left', title="Memory Usage", bbox_to_anchor=(1.05, 1))
Matteo Scandolo	3ed8987	2020-07-15 17:01:02 -0700	[diff] [blame]	184
				185	fig = plt.gcf()
				186	fig.set_size_inches(20, 11)
				187
Matteo Scandolo	7274b43	2020-08-27 14:28:43 -0700	[diff] [blame]	188	plt.savefig(output, bbox_inches="tight")
Matteo Scandolo	3ed8987	2020-07-15 17:01:02 -0700	[diff] [blame]	189
				190
				191	def remove_unwanted_containers(cpus):
				192	res = []
				193	for c in cpus:
Matteo Scandolo	3ed8987	2020-07-15 17:01:02 -0700	[diff] [blame]	194
Matteo Scandolo	7274b43	2020-08-27 14:28:43 -0700	[diff] [blame]	195	if "pod" in c["metric"]:
				196	pod_name = c["metric"]["pod"]
Matteo Scandolo	3ed8987	2020-07-15 17:01:02 -0700	[diff] [blame]	197	if any(x in pod_name for x in EXCLUDED_POD_NAMES):
				198	continue
Matteo Scandolo	3ed8987	2020-07-15 17:01:02 -0700	[diff] [blame]	199	res.append(c)
Matteo Scandolo	806637d	2020-07-30 02:07:06 +0000	[diff] [blame]	200
Matteo Scandolo	3ed8987	2020-07-15 17:01:02 -0700	[diff] [blame]	201	return res
				202
				203
				204	def get_line_color(container_name):
				205	colors = {
				206	"bbsim0": "#884EA0",
				207	"bbsim1": "#9B59B6",
				208	"bbsim-sadis-server": "#D2B4DE",
				209	"onos-atomix-0": "#85C1E9",
				210	"onos-atomix-1": "#7FB3D5",
				211	"onos-atomix-2": "#3498DB",
				212	"onos-onos-classic-0": "#1A5276",
				213	"onos-onos-classic-1": "#1B4F72",
				214	"onos-onos-classic-2": "#154360",
				215	"etcd-0": "#7D6608",
				216	"etcd-1": "#9A7D0A",
				217	"etcd-2": "#B7950B",
				218	"open-olt-voltha-adapter-openolt": "#7E5109",
				219	"open-onu-voltha-adapter-openonu-0": "#6E2C00",
				220	"open-onu-voltha-adapter-openonu-1": "#873600",
				221	"open-onu-voltha-adapter-openonu-2": "#A04000",
				222	"open-onu-voltha-adapter-openonu-3": "#BA4A00",
				223	"open-onu-voltha-adapter-openonu-4": "#D35400",
				224	"open-onu-voltha-adapter-openonu-5": "#D35400",
				225	"open-onu-voltha-adapter-openonu-6": "#E59866",
				226	"open-onu-voltha-adapter-openonu-7": "#EDBB99",
				227	"kafka-0": "#4D5656",
				228	"kafka-1": "#5F6A6A",
				229	"kafka-2": "#717D7E",
				230	"kafka-zookeeper-0": "#839192",
				231	"kafka-zookeeper-1": "#95A5A6",
				232	"kafka-zookeeper-2": "#717D7E",
				233	"radius": "#82E0AA",
				234	"voltha-voltha-ofagent": "#641E16",
				235	"voltha-voltha-rw-core": "#7B241C",
				236	}
				237
				238	if container_name in colors:
				239	return colors[container_name]
				240	elif "openolt" in container_name:
				241	return colors["open-olt-voltha-adapter-openolt"]
				242	elif "ofagent" in container_name:
				243	return colors["voltha-voltha-ofagent"]
				244	elif "rw-core" in container_name:
				245	return colors["voltha-voltha-rw-core"]
				246	elif "bbsim0" in container_name:
				247	return colors["bbsim0"]
				248	elif "bbsim1" in container_name:
				249	return colors["bbsim1"]
				250	elif "bbsim-sadis-server" in container_name:
				251	return colors["bbsim-sadis-server"]
				252	elif "radius" in container_name:
				253	return colors["radius"]
				254	else:
				255	return "black"
				256
				257
				258	def get_diff(data):
Matteo Scandolo	7274b43	2020-08-27 14:28:43 -0700	[diff] [blame]	259	# get the delta between the current data and the previous point
Matteo Scandolo	3ed8987	2020-07-15 17:01:02 -0700	[diff] [blame]	260	return [x - data[i - 1] for i, x in enumerate(data)][1:]
				261
				262
				263	def bytesto(b, to, bsize=1024):
				264	"""convert bytes to megabytes, etc.
				265	sample code:
				266	print('mb= ' + str(bytesto(314575262000000, 'm')))
				267	sample output:
				268	mb= 300002347.946
				269	"""
				270
				271	a = {'k': 1, 'm': 2, 'g': 3, 't': 4, 'p': 5, 'e': 6}
				272	r = float(b)
				273	for i in range(a[to]):
				274	r = r / bsize
				275
				276	return r
				277
				278
Matteo Scandolo	88d01c1	2020-11-02 17:11:26 -0800	[diff] [blame]	279
				280	def get_etcd_stats(address, out_folder):
				281	"""
				282	:param address: The prometheus address
				283	:param out_folder: The folder in which store the output files
				284	"""
				285
				286	etcd_stats = {
				287	"size":"etcd_debugging_mvcc_db_total_size_in_bytes",
				288	"keys":"etcd_debugging_mvcc_keys_total"
				289	}
				290
				291	etcd = {}
				292
				293	time_delta = 80
				294	for stat,query in etcd_stats.items():
				295	now = time.time()
				296	etcd_params = {
				297	"query": "%s{}" % query,
				298	"start": now - time_delta,
				299	"end": now,
				300	"step": "30",
				301	}
				302	r = requests.get("http://%s/api/v1/query_range" % address, etcd_params)
				303
				304	i = r.json()["data"]["result"][0]
				305	etcd[stat] = i["values"][-1][1]
				306
				307	csv_file = open("%s/etcd_stats.csv" % out_folder, "w+")
				308	csv_writer = csv.writer(csv_file, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
				309
				310	for k,v in etcd.items():
				311	csv_writer.writerow([k, v])
				312
				313	def get_kafka_stats(address, out_folder):
				314	"""
				315	:param address: The prometheus address
				316	:param out_folder: The folder in which store the output files
				317	"""
				318	# get the last information for all topics, we only care about the last value so a short interval is fine
				319	now = time.time()
				320	time_delta = 80
				321	kafka_params = {
				322	"query": "kafka_topic_partition_current_offset{}",
				323	"start": now - time_delta,
				324	"end": now,
				325	"step": "30",
				326	}
				327
				328	r = requests.get("http://%s/api/v1/query_range" % address, kafka_params)
				329
				330	msg_per_topic = {}
				331
				332	for t in r.json()["data"]["result"]:
				333	# we only care about some topics
				334	topic_name = t["metric"]["topic"]
				335
				336	if any(x in topic_name for x in KAFKA_TOPICS):
				337	# get only the value at the last timestamp
				338	msg_per_topic[t["metric"]["topic"]] = t["values"][-1][1]
				339
				340	csv_file = open("%s/kafka_msg_per_topic.csv" % out_folder, "w+")
				341	csv_writer = csv.writer(csv_file, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
				342
				343	for k,v in msg_per_topic.items():
				344	csv_writer.writerow([k, v])
				345
Matteo Scandolo	3ed8987	2020-07-15 17:01:02 -0700	[diff] [blame]	346	if __name__ == "__main__":
				347	parser = argparse.ArgumentParser(prog="sizing")
				348	parser.add_argument("-a", "--address", help="The address of the Prometheus instance we're targeting",
				349	default="127.0.0.1:31301")
				350	parser.add_argument("-o", "--output", help="Where to output the generated files",
				351	default="plots")
				352	parser.add_argument("-s", "--since", help="When to start sampling the data (in minutes before now)",
				353	default=10)
				354
				355	args = parser.parse_args()
				356	main(args.address, args.output, args.since)