blob: c1de793f6aee9c0bc1fd2d7aafa84446e7fb5409 [file] [log] [blame]
# -*- python -*-
# -----------------------------------------------------------------------
# Copyright 2022-2024 Open Networking Foundation (ONF) and the ONF Contributors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# -----------------------------------------------------------------------
# This tool collects CPU and Memory informations for each container in the VOLTHA stack
# NOTE
# Collecting the info for all containers in the same chart can be confusing,
# we may want to create subcharts for the different groups, eg: infra, ONOS, core, adapters
import csv
from sys import platform as sys_pf
if sys_pf == 'darwin':
import matplotlib
matplotlib.use("TkAgg")
import argparse
import requests
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
from datetime import datetime
import time
EXCLUDED_POD_NAMES = [
"kube", "coredns", "kind", "grafana",
"prometheus", "tiller", "control-plane",
"calico", "nginx", "registry", "cattle", "canal", "metrics",
]
DATE_FORMATTER_FN = mdates.DateFormatter('%Y-%m-%d %H:%M:%S')
KAFKA_TOPICS = [
"openolt",
"brcm_openomci_onu",
"voltha",
"adapters",
"rwcore"
]
def main(address, out_folder, since, namespace="default", ratePeriod = "5m", step = 30):
"""
Query Prometheus and generate .pdf files for CPU and Memory consumption for each POD
:param address: string The address of the Prometheus instance to query
:param out_folder: string The output folder (where to save the .pdf files)
:param since: int When to start collection data (minutes in the past)
:return: void
"""
time_delta = int(since) * 60
container_mem_query = 'sum by(pod) (container_memory_working_set_bytes{namespace="%s",container!="",container!="POD"})' % namespace
container_cpu_query = 'sum by(pod) (rate(container_cpu_usage_seconds_total{namespace="%s",container!="",container!="POD"}[%s]))' % (namespace, ratePeriod)
now = time.time()
cpu_params = {
"query": container_cpu_query,
"start": now - time_delta,
"end": now,
"step": step,
}
print("CPU usage query: %s" % cpu_params)
r = requests.get("http://%s/api/v1/query_range" % address, cpu_params)
print("Downloading CPU info from: %s" % r.url)
container_cpu = r.json()["data"]["result"]
containers = remove_unwanted_containers(container_cpu)
plot_cpu_consumption(containers,
output="%s/cpu.pdf" % out_folder)
data_to_csv(containers, output="%s/cpu.csv" % out_folder,
convert_values=lambda values: ["{:.2f}".format(v) for v in values])
mem_params = {
"query": container_mem_query,
"start": now - time_delta,
"end": now,
"step": step,
}
print("Memory query: %s" % mem_params)
r = requests.get("http://%s/api/v1/query_range" % address, mem_params)
print("Downloading Memory info from: %s" % r.url)
container_mem = r.json()["data"]["result"]
containers = remove_unwanted_containers(container_mem)
plot_memory_consumption(containers, output="%s/memory.pdf" % out_folder)
data_to_csv(containers, output="%s/memory.csv" % out_folder,
convert_values=lambda values: ["{:.2f}".format(bytesto(v, "m")) for v in values])
print("Downloading KAFKA stats")
get_kafka_stats(address, out_folder)
print("Downloading ETCD stats")
get_etcd_stats(address, out_folder)
def data_to_csv(containers, output=None, convert_values=None):
"""
Get a list of prometheus metrics and dumps them in a csv
:param containers: Prometheus metrics
:param output: Destination file
:param convert_values: Function to convert the valus, take a list on numbers
"""
csv_file = open(output, "w+")
csv_writer = csv.writer(csv_file, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
# we assume all the containers have the same timestamps
# FIXME pods may have different timestamps depending on when the collection started
# - find the longest list in containers
# - add empty values at the beginning of the other list
if not containers:
return
container_index_longest_row = 0
longest_row = 0
for i, c in enumerate(containers):
cur_row_len = len(c["values"])
if cur_row_len > longest_row:
longest_row = cur_row_len
container_index_longest_row = i
dates = [datetime.fromtimestamp(x[0]) for x in containers[container_index_longest_row]["values"]]
csv_writer.writerow([''] + dates)
for c in containers:
name = c["metric"]["pod"]
data = c["values"]
values = [float(x[1]) for x in data]
if convert_values:
values = convert_values(values)
csv_writer.writerow([name] + values)
def plot_cpu_consumption(containers, output=None):
plt.figure('cpu')
fig, ax = plt.subplots()
ax.xaxis.set_major_formatter(DATE_FORMATTER_FN)
ax.xaxis_date()
fig.autofmt_xdate()
plt.title("CPU Usage per POD")
plt.xlabel("Timestamp")
plt.ylabel("CPU cores used")
for i, c in enumerate(containers):
name = c["metric"]["pod"]
data = c["values"]
dates = [datetime.fromtimestamp(x[0]) for x in data]
values = [float(x[1]) for x in data]
plt.plot(dates, values, label=name, lw=2, color=get_line_color(name, i))
# plt.plot(dates[1:], get_diff(values), label=name, lw=2, color=get_line_color(name))
plt.legend(loc='upper left', title="CPU Consumption", bbox_to_anchor=(1.05, 1))
fig = plt.gcf()
fig.set_size_inches(20, 11)
plt.savefig(output, bbox_inches="tight")
def plot_memory_consumption(containers, output=None):
plt.figure("memory")
fig, ax = plt.subplots()
ax.xaxis.set_major_formatter(DATE_FORMATTER_FN)
ax.xaxis_date()
fig.autofmt_xdate()
plt.title("Memory Usage")
plt.xlabel("Timestamp")
plt.ylabel("MB")
for i, c in enumerate(containers):
name = c["metric"]["pod"]
data = c["values"]
dates = [datetime.fromtimestamp(x[0]) for x in data]
values = [bytesto(float(x[1]), "m") for x in data]
# plt.plot(dates[1:], get_diff(values), label=name, lw=2, color=get_line_color(name))
plt.plot(dates[1:], values[1:], label=name, lw=2, color=get_line_color(name, i))
plt.legend(loc='upper left', title="Memory Usage", bbox_to_anchor=(1.05, 1))
fig = plt.gcf()
fig.set_size_inches(20, 11)
plt.savefig(output, bbox_inches="tight")
def remove_unwanted_containers(cpus):
res = []
for c in cpus:
if "pod" in c["metric"]:
pod_name = c["metric"]["pod"]
if any(x in pod_name for x in EXCLUDED_POD_NAMES):
continue
res.append(c)
return res
def get_line_color(container_name, i):
colors = {
"bbsim0": "#884EA0",
"bbsim1": "#9B59B6",
"bbsim-sadis-server": "#D2B4DE",
"onos-atomix-0": "#85C1E9",
"onos-atomix-1": "#7FB3D5",
"onos-atomix-2": "#3498DB",
"onos-onos-classic-0": "#1A5276",
"onos-onos-classic-1": "#1B4F72",
"onos-onos-classic-2": "#154360",
"etcd-0": "#7D6608",
"etcd-1": "#9A7D0A",
"etcd-2": "#B7950B",
"open-olt-voltha-adapter-openolt": "#7E5109",
"open-onu-voltha-adapter-openonu-0": "#6E2C00",
"open-onu-voltha-adapter-openonu-1": "#873600",
"open-onu-voltha-adapter-openonu-2": "#A04000",
"open-onu-voltha-adapter-openonu-3": "#BA4A00",
"open-onu-voltha-adapter-openonu-4": "#D35400",
"open-onu-voltha-adapter-openonu-5": "#D35400",
"open-onu-voltha-adapter-openonu-6": "#E59866",
"open-onu-voltha-adapter-openonu-7": "#EDBB99",
"kafka-0": "#4D5656",
"kafka-1": "#5F6A6A",
"kafka-2": "#717D7E",
"kafka-zookeeper-0": "#839192",
"kafka-zookeeper-1": "#95A5A6",
"kafka-zookeeper-2": "#717D7E",
"radius": "#82E0AA",
"voltha-voltha-ofagent": "#641E16",
"voltha-voltha-rw-core": "#7B241C",
}
colorsToPickup = [
"#f44336",
"#4bde31",
"#31dea7",
"#31a5de",
"#313dde",
"#ffac2c",
"#f16443",
"#8cff00",
"#990000",
"#b8ce85",
"#5662f6",
"#e42491",
"#5b4f5b",
"#df1019",
"#b9faf8",
"#1d903f",
"#56c7f2",
"#40dfa0",
"#5662f6",
"#400080",
"#b73e34",
]
if container_name in colors:
return colors[container_name]
elif "openolt" in container_name:
return colors["open-olt-voltha-adapter-openolt"]
elif "ofagent" in container_name:
return colors["voltha-voltha-ofagent"]
elif "rw-core" in container_name:
return colors["voltha-voltha-rw-core"]
elif "bbsim0" in container_name:
return colors["bbsim0"]
elif "bbsim1" in container_name:
return colors["bbsim1"]
elif "bbsim-sadis-server" in container_name:
return colors["bbsim-sadis-server"]
elif "radius" in container_name:
return colors["radius"]
else:
colorIdx = i % len(colorsToPickup)
pickupColor = colorsToPickup[colorIdx]
return pickupColor
def get_diff(data):
# get the delta between the current data and the previous point
return [x - data[i - 1] for i, x in enumerate(data)][1:]
def bytesto(b, to, bsize=1024):
"""convert bytes to megabytes, etc.
sample code:
print('mb= ' + str(bytesto(314575262000000, 'm')))
sample output:
mb= 300002347.946
"""
a = {'k': 1, 'm': 2, 'g': 3, 't': 4, 'p': 5, 'e': 6}
r = float(b)
for i in range(a[to]):
r = r / bsize
return r
def get_etcd_stats(address, out_folder):
"""
:param address: The prometheus address
:param out_folder: The folder in which store the output files
"""
etcd_stats = {
"size":"etcd_debugging_mvcc_db_total_size_in_bytes",
"keys":"etcd_debugging_mvcc_keys_total"
}
etcd = {}
time_delta = 80
for stat,query in etcd_stats.items():
now = time.time()
etcd_params = {
"query": "%s{}" % query,
"start": now - time_delta,
"end": now,
"step": "30",
}
r = requests.get("http://%s/api/v1/query_range" % address, etcd_params)
etcdStats = r.json()["data"]["result"]
if etcdStats:
i = etcdStats[0]
etcd[stat] = i["values"][-1][1]
csv_file = open("%s/etcd_stats.csv" % out_folder, "w+")
csv_writer = csv.writer(csv_file, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
for k,v in etcd.items():
csv_writer.writerow([k, v])
def get_kafka_stats(address, out_folder):
"""
:param address: The prometheus address
:param out_folder: The folder in which store the output files
"""
# get the last information for all topics, we only care about the last value so a short interval is fine
now = time.time()
time_delta = 80
kafka_params = {
"query": "kafka_topic_partition_current_offset{}",
"start": now - time_delta,
"end": now,
"step": "30",
}
r = requests.get("http://%s/api/v1/query_range" % address, kafka_params)
msg_per_topic = {}
for t in r.json()["data"]["result"]:
# we only care about some topics
topic_name = t["metric"]["topic"]
if any(x in topic_name for x in KAFKA_TOPICS):
# get only the value at the last timestamp
msg_per_topic[t["metric"]["topic"]] = t["values"][-1][1]
csv_file = open("%s/kafka_msg_per_topic.csv" % out_folder, "w+")
csv_writer = csv.writer(csv_file, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
for k,v in msg_per_topic.items():
csv_writer.writerow([k, v])
if __name__ == "__main__":
parser = argparse.ArgumentParser(prog="sizing")
parser.add_argument("-a", "--address", help="The address of the Prometheus instance we're targeting",
default="127.0.0.1:31301")
parser.add_argument("-o", "--output", help="Where to output the generated files",
default="plots")
parser.add_argument("-s", "--since", help="When to start sampling the data (in minutes before now)",
default=10)
parser.add_argument("-n", "--namespace", help="Kubernetes namespace for collecting metrics",
default="default")
parser.add_argument("-r", "--rate", help="Rate period",
default="5m")
parser.add_argument("-t", "--step", help="Step in seconds",
default=30)
args = parser.parse_args()
main(args.address, args.output, args.since, args.namespace, args.rate, args.step)