blob: c1de793f6aee9c0bc1fd2d7aafa84446e7fb5409 [file] [log] [blame]
Joey Armstrong44fa7d82022-11-01 17:46:04 -04001# -*- python -*-
2# -----------------------------------------------------------------------
Joey Armstrong9fadcbe2024-01-17 19:00:37 -05003# Copyright 2022-2024 Open Networking Foundation (ONF) and the ONF Contributors
Matteo Scandolo3ed89872020-07-15 17:01:02 -07004#
5# Licensed under the Apache License, Version 2.0 (the "License");
6# you may not use this file except in compliance with the License.
7# You may obtain a copy of the License at
8#
9# http://www.apache.org/licenses/LICENSE-2.0
10#
11# Unless required by applicable law or agreed to in writing, software
12# distributed under the License is distributed on an "AS IS" BASIS,
13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14# See the License for the specific language governing permissions and
15# limitations under the License.
Joey Armstrong44fa7d82022-11-01 17:46:04 -040016# -----------------------------------------------------------------------
Matteo Scandolo3ed89872020-07-15 17:01:02 -070017
18# This tool collects CPU and Memory informations for each container in the VOLTHA stack
19
20# NOTE
21# Collecting the info for all containers in the same chart can be confusing,
22# we may want to create subcharts for the different groups, eg: infra, ONOS, core, adapters
23
Matteo Scandolo7274b432020-08-27 14:28:43 -070024import csv
25from sys import platform as sys_pf
26
27if sys_pf == 'darwin':
28 import matplotlib
29
30 matplotlib.use("TkAgg")
31
Matteo Scandolo3ed89872020-07-15 17:01:02 -070032import argparse
33import requests
34import matplotlib.pyplot as plt
35import matplotlib.dates as mdates
36from datetime import datetime
37import time
38
39EXCLUDED_POD_NAMES = [
40 "kube", "coredns", "kind", "grafana",
41 "prometheus", "tiller", "control-plane",
Matteo Scandolo7274b432020-08-27 14:28:43 -070042 "calico", "nginx", "registry", "cattle", "canal", "metrics",
Matteo Scandolo3ed89872020-07-15 17:01:02 -070043]
44
45DATE_FORMATTER_FN = mdates.DateFormatter('%Y-%m-%d %H:%M:%S')
46
Matteo Scandolo88d01c12020-11-02 17:11:26 -080047KAFKA_TOPICS = [
48 "openolt",
49 "brcm_openomci_onu",
50 "voltha",
51 "adapters",
52 "rwcore"
53]
Matteo Scandolo3ed89872020-07-15 17:01:02 -070054
Andrey Pozolotin0f437712021-07-30 17:36:41 +030055def main(address, out_folder, since, namespace="default", ratePeriod = "5m", step = 30):
Matteo Scandolo3ed89872020-07-15 17:01:02 -070056 """
57 Query Prometheus and generate .pdf files for CPU and Memory consumption for each POD
58 :param address: string The address of the Prometheus instance to query
59 :param out_folder: string The output folder (where to save the .pdf files)
60 :param since: int When to start collection data (minutes in the past)
61 :return: void
62 """
63 time_delta = int(since) * 60
Matteo Scandolo7274b432020-08-27 14:28:43 -070064
Andrea Campanella010428f2021-08-24 11:58:47 +020065 container_mem_query = 'sum by(pod) (container_memory_working_set_bytes{namespace="%s",container!="",container!="POD"})' % namespace
Matteo Scandolo86334f52020-08-28 10:56:25 -070066
Andrea Campanella010428f2021-08-24 11:58:47 +020067 container_cpu_query = 'sum by(pod) (rate(container_cpu_usage_seconds_total{namespace="%s",container!="",container!="POD"}[%s]))' % (namespace, ratePeriod)
Matteo Scandolo3ed89872020-07-15 17:01:02 -070068
69 now = time.time()
70 cpu_params = {
71 "query": container_cpu_query,
72 "start": now - time_delta,
73 "end": now,
Andrey Pozolotin0f437712021-07-30 17:36:41 +030074 "step": step,
Matteo Scandolo3ed89872020-07-15 17:01:02 -070075 }
Andrey Pozolotin0f437712021-07-30 17:36:41 +030076 print("CPU usage query: %s" % cpu_params)
Matteo Scandolo86334f52020-08-28 10:56:25 -070077
Matteo Scandolo3ed89872020-07-15 17:01:02 -070078 r = requests.get("http://%s/api/v1/query_range" % address, cpu_params)
79 print("Downloading CPU info from: %s" % r.url)
80 container_cpu = r.json()["data"]["result"]
Matteo Scandolo7274b432020-08-27 14:28:43 -070081 containers = remove_unwanted_containers(container_cpu)
82 plot_cpu_consumption(containers,
Matteo Scandolo806637d2020-07-30 02:07:06 +000083 output="%s/cpu.pdf" % out_folder)
Matteo Scandolo7274b432020-08-27 14:28:43 -070084 data_to_csv(containers, output="%s/cpu.csv" % out_folder,
Matteo Scandolo86334f52020-08-28 10:56:25 -070085 convert_values=lambda values: ["{:.2f}".format(v) for v in values])
Matteo Scandolo3ed89872020-07-15 17:01:02 -070086
Matteo Scandolo7274b432020-08-27 14:28:43 -070087 mem_params = {
88 "query": container_mem_query,
89 "start": now - time_delta,
90 "end": now,
Andrey Pozolotin0f437712021-07-30 17:36:41 +030091 "step": step,
Matteo Scandolo7274b432020-08-27 14:28:43 -070092 }
Andrey Pozolotin0f437712021-07-30 17:36:41 +030093 print("Memory query: %s" % mem_params)
Matteo Scandolo7274b432020-08-27 14:28:43 -070094
95 r = requests.get("http://%s/api/v1/query_range" % address, mem_params)
Matteo Scandolo3ed89872020-07-15 17:01:02 -070096 print("Downloading Memory info from: %s" % r.url)
97 container_mem = r.json()["data"]["result"]
Matteo Scandolo7274b432020-08-27 14:28:43 -070098 containers = remove_unwanted_containers(container_mem)
99 plot_memory_consumption(containers, output="%s/memory.pdf" % out_folder)
100 data_to_csv(containers, output="%s/memory.csv" % out_folder,
Matteo Scandolo86334f52020-08-28 10:56:25 -0700101 convert_values=lambda values: ["{:.2f}".format(bytesto(v, "m")) for v in values])
Matteo Scandolo7274b432020-08-27 14:28:43 -0700102
Matteo Scandolo88d01c12020-11-02 17:11:26 -0800103 print("Downloading KAFKA stats")
104 get_kafka_stats(address, out_folder)
105 print("Downloading ETCD stats")
106 get_etcd_stats(address, out_folder)
107
108
Matteo Scandolo7274b432020-08-27 14:28:43 -0700109
110def data_to_csv(containers, output=None, convert_values=None):
111 """
112 Get a list of prometheus metrics and dumps them in a csv
113 :param containers: Prometheus metrics
114 :param output: Destination file
115 :param convert_values: Function to convert the valus, take a list on numbers
116 """
117 csv_file = open(output, "w+")
118 csv_writer = csv.writer(csv_file, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
119
120 # we assume all the containers have the same timestamps
Matteo Scandolo7e3dd122020-11-04 15:24:00 -0800121 # FIXME pods may have different timestamps depending on when the collection started
122 # - find the longest list in containers
123 # - add empty values at the beginning of the other list
Andrey Pozolotine78670c2021-07-30 13:33:27 +0300124 if not containers:
125 return
Andrey Pozolotin0f437712021-07-30 17:36:41 +0300126
127 container_index_longest_row = 0
128 longest_row = 0
129 for i, c in enumerate(containers):
130 cur_row_len = len(c["values"])
131 if cur_row_len > longest_row:
132 longest_row = cur_row_len
133 container_index_longest_row = i
134
135 dates = [datetime.fromtimestamp(x[0]) for x in containers[container_index_longest_row]["values"]]
Matteo Scandolo7274b432020-08-27 14:28:43 -0700136 csv_writer.writerow([''] + dates)
137
138 for c in containers:
139 name = c["metric"]["pod"]
140 data = c["values"]
141
142 values = [float(x[1]) for x in data]
143
144 if convert_values:
145 values = convert_values(values)
146 csv_writer.writerow([name] + values)
Matteo Scandolo3ed89872020-07-15 17:01:02 -0700147
148
149def plot_cpu_consumption(containers, output=None):
Matteo Scandolo3ed89872020-07-15 17:01:02 -0700150 plt.figure('cpu')
151 fig, ax = plt.subplots()
152 ax.xaxis.set_major_formatter(DATE_FORMATTER_FN)
153 ax.xaxis_date()
154 fig.autofmt_xdate()
155
156 plt.title("CPU Usage per POD")
157 plt.xlabel("Timestamp")
Andrey Pozolotinced58a02021-07-13 18:49:05 +0300158 plt.ylabel("CPU cores used")
Matteo Scandolo3ed89872020-07-15 17:01:02 -0700159
Andrey Pozolotin0f437712021-07-30 17:36:41 +0300160 for i, c in enumerate(containers):
Matteo Scandolo7274b432020-08-27 14:28:43 -0700161 name = c["metric"]["pod"]
Matteo Scandolo3ed89872020-07-15 17:01:02 -0700162 data = c["values"]
163
164 dates = [datetime.fromtimestamp(x[0]) for x in data]
165
166 values = [float(x[1]) for x in data]
167
Andrey Pozolotin0f437712021-07-30 17:36:41 +0300168 plt.plot(dates, values, label=name, lw=2, color=get_line_color(name, i))
Matteo Scandolo3ed89872020-07-15 17:01:02 -0700169 # plt.plot(dates[1:], get_diff(values), label=name, lw=2, color=get_line_color(name))
170
Matteo Scandolo7274b432020-08-27 14:28:43 -0700171 plt.legend(loc='upper left', title="CPU Consumption", bbox_to_anchor=(1.05, 1))
Matteo Scandolo3ed89872020-07-15 17:01:02 -0700172
173 fig = plt.gcf()
174 fig.set_size_inches(20, 11)
175
Matteo Scandolo7274b432020-08-27 14:28:43 -0700176 plt.savefig(output, bbox_inches="tight")
Matteo Scandolo3ed89872020-07-15 17:01:02 -0700177
178
179def plot_memory_consumption(containers, output=None):
180 plt.figure("memory")
181 fig, ax = plt.subplots()
182 ax.xaxis.set_major_formatter(DATE_FORMATTER_FN)
183 ax.xaxis_date()
184 fig.autofmt_xdate()
185 plt.title("Memory Usage")
186 plt.xlabel("Timestamp")
187 plt.ylabel("MB")
188
Andrey Pozolotin0f437712021-07-30 17:36:41 +0300189 for i, c in enumerate(containers):
Matteo Scandolo7274b432020-08-27 14:28:43 -0700190 name = c["metric"]["pod"]
Matteo Scandolo3ed89872020-07-15 17:01:02 -0700191 data = c["values"]
192
193 dates = [datetime.fromtimestamp(x[0]) for x in data]
194 values = [bytesto(float(x[1]), "m") for x in data]
195
Matteo Scandolo7274b432020-08-27 14:28:43 -0700196 # plt.plot(dates[1:], get_diff(values), label=name, lw=2, color=get_line_color(name))
Andrey Pozolotin0f437712021-07-30 17:36:41 +0300197 plt.plot(dates[1:], values[1:], label=name, lw=2, color=get_line_color(name, i))
Matteo Scandolo3ed89872020-07-15 17:01:02 -0700198
Matteo Scandolo7274b432020-08-27 14:28:43 -0700199 plt.legend(loc='upper left', title="Memory Usage", bbox_to_anchor=(1.05, 1))
Matteo Scandolo3ed89872020-07-15 17:01:02 -0700200
201 fig = plt.gcf()
202 fig.set_size_inches(20, 11)
203
Matteo Scandolo7274b432020-08-27 14:28:43 -0700204 plt.savefig(output, bbox_inches="tight")
Matteo Scandolo3ed89872020-07-15 17:01:02 -0700205
206
207def remove_unwanted_containers(cpus):
208 res = []
209 for c in cpus:
Matteo Scandolo3ed89872020-07-15 17:01:02 -0700210
Matteo Scandolo7274b432020-08-27 14:28:43 -0700211 if "pod" in c["metric"]:
212 pod_name = c["metric"]["pod"]
Matteo Scandolo3ed89872020-07-15 17:01:02 -0700213 if any(x in pod_name for x in EXCLUDED_POD_NAMES):
214 continue
Matteo Scandolo3ed89872020-07-15 17:01:02 -0700215 res.append(c)
Matteo Scandolo806637d2020-07-30 02:07:06 +0000216
Matteo Scandolo3ed89872020-07-15 17:01:02 -0700217 return res
218
219
Andrey Pozolotin0f437712021-07-30 17:36:41 +0300220def get_line_color(container_name, i):
Matteo Scandolo3ed89872020-07-15 17:01:02 -0700221 colors = {
222 "bbsim0": "#884EA0",
223 "bbsim1": "#9B59B6",
224 "bbsim-sadis-server": "#D2B4DE",
225 "onos-atomix-0": "#85C1E9",
226 "onos-atomix-1": "#7FB3D5",
227 "onos-atomix-2": "#3498DB",
228 "onos-onos-classic-0": "#1A5276",
229 "onos-onos-classic-1": "#1B4F72",
230 "onos-onos-classic-2": "#154360",
231 "etcd-0": "#7D6608",
232 "etcd-1": "#9A7D0A",
233 "etcd-2": "#B7950B",
234 "open-olt-voltha-adapter-openolt": "#7E5109",
235 "open-onu-voltha-adapter-openonu-0": "#6E2C00",
236 "open-onu-voltha-adapter-openonu-1": "#873600",
237 "open-onu-voltha-adapter-openonu-2": "#A04000",
238 "open-onu-voltha-adapter-openonu-3": "#BA4A00",
239 "open-onu-voltha-adapter-openonu-4": "#D35400",
240 "open-onu-voltha-adapter-openonu-5": "#D35400",
241 "open-onu-voltha-adapter-openonu-6": "#E59866",
242 "open-onu-voltha-adapter-openonu-7": "#EDBB99",
243 "kafka-0": "#4D5656",
244 "kafka-1": "#5F6A6A",
245 "kafka-2": "#717D7E",
246 "kafka-zookeeper-0": "#839192",
247 "kafka-zookeeper-1": "#95A5A6",
248 "kafka-zookeeper-2": "#717D7E",
249 "radius": "#82E0AA",
250 "voltha-voltha-ofagent": "#641E16",
251 "voltha-voltha-rw-core": "#7B241C",
252 }
253
Andrey Pozolotin0f437712021-07-30 17:36:41 +0300254 colorsToPickup = [
255 "#f44336",
256 "#4bde31",
257 "#31dea7",
258 "#31a5de",
259 "#313dde",
260 "#ffac2c",
261 "#f16443",
262 "#8cff00",
263 "#990000",
264 "#b8ce85",
265 "#5662f6",
266 "#e42491",
267 "#5b4f5b",
268 "#df1019",
269 "#b9faf8",
270 "#1d903f",
271 "#56c7f2",
272 "#40dfa0",
273 "#5662f6",
274 "#400080",
275 "#b73e34",
276 ]
277
Matteo Scandolo3ed89872020-07-15 17:01:02 -0700278 if container_name in colors:
279 return colors[container_name]
280 elif "openolt" in container_name:
281 return colors["open-olt-voltha-adapter-openolt"]
282 elif "ofagent" in container_name:
283 return colors["voltha-voltha-ofagent"]
284 elif "rw-core" in container_name:
285 return colors["voltha-voltha-rw-core"]
286 elif "bbsim0" in container_name:
287 return colors["bbsim0"]
288 elif "bbsim1" in container_name:
289 return colors["bbsim1"]
290 elif "bbsim-sadis-server" in container_name:
291 return colors["bbsim-sadis-server"]
292 elif "radius" in container_name:
293 return colors["radius"]
294 else:
Andrey Pozolotin0f437712021-07-30 17:36:41 +0300295 colorIdx = i % len(colorsToPickup)
296 pickupColor = colorsToPickup[colorIdx]
297 return pickupColor
Matteo Scandolo3ed89872020-07-15 17:01:02 -0700298
299
300def get_diff(data):
Matteo Scandolo7274b432020-08-27 14:28:43 -0700301 # get the delta between the current data and the previous point
Matteo Scandolo3ed89872020-07-15 17:01:02 -0700302 return [x - data[i - 1] for i, x in enumerate(data)][1:]
303
304
305def bytesto(b, to, bsize=1024):
306 """convert bytes to megabytes, etc.
307 sample code:
308 print('mb= ' + str(bytesto(314575262000000, 'm')))
309 sample output:
310 mb= 300002347.946
311 """
312
313 a = {'k': 1, 'm': 2, 'g': 3, 't': 4, 'p': 5, 'e': 6}
314 r = float(b)
315 for i in range(a[to]):
316 r = r / bsize
317
318 return r
319
320
Matteo Scandolo88d01c12020-11-02 17:11:26 -0800321
322def get_etcd_stats(address, out_folder):
323 """
324 :param address: The prometheus address
325 :param out_folder: The folder in which store the output files
326 """
327
328 etcd_stats = {
329 "size":"etcd_debugging_mvcc_db_total_size_in_bytes",
330 "keys":"etcd_debugging_mvcc_keys_total"
331 }
332
333 etcd = {}
334
335 time_delta = 80
336 for stat,query in etcd_stats.items():
337 now = time.time()
338 etcd_params = {
339 "query": "%s{}" % query,
340 "start": now - time_delta,
341 "end": now,
342 "step": "30",
343 }
344 r = requests.get("http://%s/api/v1/query_range" % address, etcd_params)
Andrey Pozolotine78670c2021-07-30 13:33:27 +0300345 etcdStats = r.json()["data"]["result"]
346 if etcdStats:
347 i = etcdStats[0]
348 etcd[stat] = i["values"][-1][1]
Matteo Scandolo88d01c12020-11-02 17:11:26 -0800349
350 csv_file = open("%s/etcd_stats.csv" % out_folder, "w+")
351 csv_writer = csv.writer(csv_file, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
352
353 for k,v in etcd.items():
354 csv_writer.writerow([k, v])
355
356def get_kafka_stats(address, out_folder):
357 """
358 :param address: The prometheus address
359 :param out_folder: The folder in which store the output files
360 """
361 # get the last information for all topics, we only care about the last value so a short interval is fine
362 now = time.time()
363 time_delta = 80
364 kafka_params = {
365 "query": "kafka_topic_partition_current_offset{}",
366 "start": now - time_delta,
367 "end": now,
368 "step": "30",
369 }
370
371 r = requests.get("http://%s/api/v1/query_range" % address, kafka_params)
372
373 msg_per_topic = {}
374
375 for t in r.json()["data"]["result"]:
376 # we only care about some topics
377 topic_name = t["metric"]["topic"]
378
379 if any(x in topic_name for x in KAFKA_TOPICS):
380 # get only the value at the last timestamp
381 msg_per_topic[t["metric"]["topic"]] = t["values"][-1][1]
382
383 csv_file = open("%s/kafka_msg_per_topic.csv" % out_folder, "w+")
384 csv_writer = csv.writer(csv_file, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
385
386 for k,v in msg_per_topic.items():
387 csv_writer.writerow([k, v])
388
Matteo Scandolo3ed89872020-07-15 17:01:02 -0700389if __name__ == "__main__":
390 parser = argparse.ArgumentParser(prog="sizing")
391 parser.add_argument("-a", "--address", help="The address of the Prometheus instance we're targeting",
392 default="127.0.0.1:31301")
393 parser.add_argument("-o", "--output", help="Where to output the generated files",
394 default="plots")
395 parser.add_argument("-s", "--since", help="When to start sampling the data (in minutes before now)",
396 default=10)
Andrey Pozolotine78670c2021-07-30 13:33:27 +0300397 parser.add_argument("-n", "--namespace", help="Kubernetes namespace for collecting metrics",
398 default="default")
Andrey Pozolotin0f437712021-07-30 17:36:41 +0300399 parser.add_argument("-r", "--rate", help="Rate period",
400 default="5m")
401 parser.add_argument("-t", "--step", help="Step in seconds",
402 default=30)
Matteo Scandolo3ed89872020-07-15 17:01:02 -0700403
404 args = parser.parse_args()
Andrey Pozolotin0f437712021-07-30 17:36:41 +0300405 main(args.address, args.output, args.since, args.namespace, args.rate, args.step)