blob: 649fd2e5400183f54286b85351da8bf3e7be66e0 [file] [log] [blame]
Matteo Scandolo3ed89872020-07-15 17:01:02 -07001# Copyright 2017-present Open Networking Foundation
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7# http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14
15# This tool collects CPU and Memory informations for each container in the VOLTHA stack
16
17# NOTE
18# Collecting the info for all containers in the same chart can be confusing,
19# we may want to create subcharts for the different groups, eg: infra, ONOS, core, adapters
20
Matteo Scandolo7274b432020-08-27 14:28:43 -070021import csv
22from sys import platform as sys_pf
23
24if sys_pf == 'darwin':
25 import matplotlib
26
27 matplotlib.use("TkAgg")
28
Matteo Scandolo3ed89872020-07-15 17:01:02 -070029import argparse
30import requests
31import matplotlib.pyplot as plt
32import matplotlib.dates as mdates
33from datetime import datetime
34import time
35
36EXCLUDED_POD_NAMES = [
37 "kube", "coredns", "kind", "grafana",
38 "prometheus", "tiller", "control-plane",
Matteo Scandolo7274b432020-08-27 14:28:43 -070039 "calico", "nginx", "registry", "cattle", "canal", "metrics",
Matteo Scandolo3ed89872020-07-15 17:01:02 -070040]
41
42DATE_FORMATTER_FN = mdates.DateFormatter('%Y-%m-%d %H:%M:%S')
43
Matteo Scandolo88d01c12020-11-02 17:11:26 -080044KAFKA_TOPICS = [
45 "openolt",
46 "brcm_openomci_onu",
47 "voltha",
48 "adapters",
49 "rwcore"
50]
Matteo Scandolo3ed89872020-07-15 17:01:02 -070051
Andrey Pozolotine78670c2021-07-30 13:33:27 +030052def main(address, out_folder, since, namespace="default"):
Matteo Scandolo3ed89872020-07-15 17:01:02 -070053 """
54 Query Prometheus and generate .pdf files for CPU and Memory consumption for each POD
55 :param address: string The address of the Prometheus instance to query
56 :param out_folder: string The output folder (where to save the .pdf files)
57 :param since: int When to start collection data (minutes in the past)
58 :return: void
59 """
60 time_delta = int(since) * 60
Matteo Scandolo7274b432020-08-27 14:28:43 -070061
Andrey Pozolotine78670c2021-07-30 13:33:27 +030062 container_mem_query = "sum by(pod) (container_memory_working_set_bytes{namespace='%s',container!='',container!='POD'})" % namespace
Matteo Scandolo86334f52020-08-28 10:56:25 -070063
Andrey Pozolotine78670c2021-07-30 13:33:27 +030064 container_cpu_query = "sum by(pod) (rate(container_cpu_usage_seconds_total{namespace='%s',container!='',container!='POD'}[%sm]))" % (namespace, since)
Matteo Scandolo3ed89872020-07-15 17:01:02 -070065
66 now = time.time()
67 cpu_params = {
68 "query": container_cpu_query,
69 "start": now - time_delta,
70 "end": now,
71 "step": "30",
72 }
Matteo Scandolo86334f52020-08-28 10:56:25 -070073
Matteo Scandolo3ed89872020-07-15 17:01:02 -070074 r = requests.get("http://%s/api/v1/query_range" % address, cpu_params)
75 print("Downloading CPU info from: %s" % r.url)
76 container_cpu = r.json()["data"]["result"]
Matteo Scandolo7274b432020-08-27 14:28:43 -070077 containers = remove_unwanted_containers(container_cpu)
78 plot_cpu_consumption(containers,
Matteo Scandolo806637d2020-07-30 02:07:06 +000079 output="%s/cpu.pdf" % out_folder)
Matteo Scandolo7274b432020-08-27 14:28:43 -070080 data_to_csv(containers, output="%s/cpu.csv" % out_folder,
Matteo Scandolo86334f52020-08-28 10:56:25 -070081 convert_values=lambda values: ["{:.2f}".format(v) for v in values])
Matteo Scandolo3ed89872020-07-15 17:01:02 -070082
Matteo Scandolo7274b432020-08-27 14:28:43 -070083 mem_params = {
84 "query": container_mem_query,
85 "start": now - time_delta,
86 "end": now,
87 "step": "30",
88 }
89
90 r = requests.get("http://%s/api/v1/query_range" % address, mem_params)
Matteo Scandolo3ed89872020-07-15 17:01:02 -070091 print("Downloading Memory info from: %s" % r.url)
92 container_mem = r.json()["data"]["result"]
Matteo Scandolo7274b432020-08-27 14:28:43 -070093 containers = remove_unwanted_containers(container_mem)
94 plot_memory_consumption(containers, output="%s/memory.pdf" % out_folder)
95 data_to_csv(containers, output="%s/memory.csv" % out_folder,
Matteo Scandolo86334f52020-08-28 10:56:25 -070096 convert_values=lambda values: ["{:.2f}".format(bytesto(v, "m")) for v in values])
Matteo Scandolo7274b432020-08-27 14:28:43 -070097
Matteo Scandolo88d01c12020-11-02 17:11:26 -080098 print("Downloading KAFKA stats")
99 get_kafka_stats(address, out_folder)
100 print("Downloading ETCD stats")
101 get_etcd_stats(address, out_folder)
102
103
Matteo Scandolo7274b432020-08-27 14:28:43 -0700104
105def data_to_csv(containers, output=None, convert_values=None):
106 """
107 Get a list of prometheus metrics and dumps them in a csv
108 :param containers: Prometheus metrics
109 :param output: Destination file
110 :param convert_values: Function to convert the valus, take a list on numbers
111 """
112 csv_file = open(output, "w+")
113 csv_writer = csv.writer(csv_file, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
114
115 # we assume all the containers have the same timestamps
Matteo Scandolo7e3dd122020-11-04 15:24:00 -0800116 # FIXME pods may have different timestamps depending on when the collection started
117 # - find the longest list in containers
118 # - add empty values at the beginning of the other list
Andrey Pozolotine78670c2021-07-30 13:33:27 +0300119 if not containers:
120 return
Matteo Scandolo7274b432020-08-27 14:28:43 -0700121 dates = [datetime.fromtimestamp(x[0]) for x in containers[0]["values"]]
122 csv_writer.writerow([''] + dates)
123
124 for c in containers:
125 name = c["metric"]["pod"]
126 data = c["values"]
127
128 values = [float(x[1]) for x in data]
129
130 if convert_values:
131 values = convert_values(values)
132 csv_writer.writerow([name] + values)
Matteo Scandolo3ed89872020-07-15 17:01:02 -0700133
134
135def plot_cpu_consumption(containers, output=None):
Matteo Scandolo3ed89872020-07-15 17:01:02 -0700136 plt.figure('cpu')
137 fig, ax = plt.subplots()
138 ax.xaxis.set_major_formatter(DATE_FORMATTER_FN)
139 ax.xaxis_date()
140 fig.autofmt_xdate()
141
142 plt.title("CPU Usage per POD")
143 plt.xlabel("Timestamp")
Andrey Pozolotinced58a02021-07-13 18:49:05 +0300144 plt.ylabel("CPU cores used")
Matteo Scandolo3ed89872020-07-15 17:01:02 -0700145
146 for c in containers:
Matteo Scandolo7274b432020-08-27 14:28:43 -0700147 name = c["metric"]["pod"]
Matteo Scandolo3ed89872020-07-15 17:01:02 -0700148 data = c["values"]
149
150 dates = [datetime.fromtimestamp(x[0]) for x in data]
151
152 values = [float(x[1]) for x in data]
153
154 plt.plot(dates, values, label=name, lw=2, color=get_line_color(name))
155 # plt.plot(dates[1:], get_diff(values), label=name, lw=2, color=get_line_color(name))
156
Matteo Scandolo7274b432020-08-27 14:28:43 -0700157 plt.legend(loc='upper left', title="CPU Consumption", bbox_to_anchor=(1.05, 1))
Matteo Scandolo3ed89872020-07-15 17:01:02 -0700158
159 fig = plt.gcf()
160 fig.set_size_inches(20, 11)
161
Matteo Scandolo7274b432020-08-27 14:28:43 -0700162 plt.savefig(output, bbox_inches="tight")
Matteo Scandolo3ed89872020-07-15 17:01:02 -0700163
164
165def plot_memory_consumption(containers, output=None):
166 plt.figure("memory")
167 fig, ax = plt.subplots()
168 ax.xaxis.set_major_formatter(DATE_FORMATTER_FN)
169 ax.xaxis_date()
170 fig.autofmt_xdate()
171 plt.title("Memory Usage")
172 plt.xlabel("Timestamp")
173 plt.ylabel("MB")
174
175 for c in containers:
Matteo Scandolo7274b432020-08-27 14:28:43 -0700176 name = c["metric"]["pod"]
Matteo Scandolo3ed89872020-07-15 17:01:02 -0700177 data = c["values"]
178
179 dates = [datetime.fromtimestamp(x[0]) for x in data]
180 values = [bytesto(float(x[1]), "m") for x in data]
181
Matteo Scandolo7274b432020-08-27 14:28:43 -0700182 # plt.plot(dates[1:], get_diff(values), label=name, lw=2, color=get_line_color(name))
183 plt.plot(dates[1:], values[1:], label=name, lw=2, color=get_line_color(name))
Matteo Scandolo3ed89872020-07-15 17:01:02 -0700184
Matteo Scandolo7274b432020-08-27 14:28:43 -0700185 plt.legend(loc='upper left', title="Memory Usage", bbox_to_anchor=(1.05, 1))
Matteo Scandolo3ed89872020-07-15 17:01:02 -0700186
187 fig = plt.gcf()
188 fig.set_size_inches(20, 11)
189
Matteo Scandolo7274b432020-08-27 14:28:43 -0700190 plt.savefig(output, bbox_inches="tight")
Matteo Scandolo3ed89872020-07-15 17:01:02 -0700191
192
193def remove_unwanted_containers(cpus):
194 res = []
195 for c in cpus:
Matteo Scandolo3ed89872020-07-15 17:01:02 -0700196
Matteo Scandolo7274b432020-08-27 14:28:43 -0700197 if "pod" in c["metric"]:
198 pod_name = c["metric"]["pod"]
Matteo Scandolo3ed89872020-07-15 17:01:02 -0700199 if any(x in pod_name for x in EXCLUDED_POD_NAMES):
200 continue
Matteo Scandolo3ed89872020-07-15 17:01:02 -0700201 res.append(c)
Matteo Scandolo806637d2020-07-30 02:07:06 +0000202
Matteo Scandolo3ed89872020-07-15 17:01:02 -0700203 return res
204
205
206def get_line_color(container_name):
207 colors = {
208 "bbsim0": "#884EA0",
209 "bbsim1": "#9B59B6",
210 "bbsim-sadis-server": "#D2B4DE",
211 "onos-atomix-0": "#85C1E9",
212 "onos-atomix-1": "#7FB3D5",
213 "onos-atomix-2": "#3498DB",
214 "onos-onos-classic-0": "#1A5276",
215 "onos-onos-classic-1": "#1B4F72",
216 "onos-onos-classic-2": "#154360",
217 "etcd-0": "#7D6608",
218 "etcd-1": "#9A7D0A",
219 "etcd-2": "#B7950B",
220 "open-olt-voltha-adapter-openolt": "#7E5109",
221 "open-onu-voltha-adapter-openonu-0": "#6E2C00",
222 "open-onu-voltha-adapter-openonu-1": "#873600",
223 "open-onu-voltha-adapter-openonu-2": "#A04000",
224 "open-onu-voltha-adapter-openonu-3": "#BA4A00",
225 "open-onu-voltha-adapter-openonu-4": "#D35400",
226 "open-onu-voltha-adapter-openonu-5": "#D35400",
227 "open-onu-voltha-adapter-openonu-6": "#E59866",
228 "open-onu-voltha-adapter-openonu-7": "#EDBB99",
229 "kafka-0": "#4D5656",
230 "kafka-1": "#5F6A6A",
231 "kafka-2": "#717D7E",
232 "kafka-zookeeper-0": "#839192",
233 "kafka-zookeeper-1": "#95A5A6",
234 "kafka-zookeeper-2": "#717D7E",
235 "radius": "#82E0AA",
236 "voltha-voltha-ofagent": "#641E16",
237 "voltha-voltha-rw-core": "#7B241C",
238 }
239
240 if container_name in colors:
241 return colors[container_name]
242 elif "openolt" in container_name:
243 return colors["open-olt-voltha-adapter-openolt"]
244 elif "ofagent" in container_name:
245 return colors["voltha-voltha-ofagent"]
246 elif "rw-core" in container_name:
247 return colors["voltha-voltha-rw-core"]
248 elif "bbsim0" in container_name:
249 return colors["bbsim0"]
250 elif "bbsim1" in container_name:
251 return colors["bbsim1"]
252 elif "bbsim-sadis-server" in container_name:
253 return colors["bbsim-sadis-server"]
254 elif "radius" in container_name:
255 return colors["radius"]
256 else:
257 return "black"
258
259
260def get_diff(data):
Matteo Scandolo7274b432020-08-27 14:28:43 -0700261 # get the delta between the current data and the previous point
Matteo Scandolo3ed89872020-07-15 17:01:02 -0700262 return [x - data[i - 1] for i, x in enumerate(data)][1:]
263
264
265def bytesto(b, to, bsize=1024):
266 """convert bytes to megabytes, etc.
267 sample code:
268 print('mb= ' + str(bytesto(314575262000000, 'm')))
269 sample output:
270 mb= 300002347.946
271 """
272
273 a = {'k': 1, 'm': 2, 'g': 3, 't': 4, 'p': 5, 'e': 6}
274 r = float(b)
275 for i in range(a[to]):
276 r = r / bsize
277
278 return r
279
280
Matteo Scandolo88d01c12020-11-02 17:11:26 -0800281
282def get_etcd_stats(address, out_folder):
283 """
284 :param address: The prometheus address
285 :param out_folder: The folder in which store the output files
286 """
287
288 etcd_stats = {
289 "size":"etcd_debugging_mvcc_db_total_size_in_bytes",
290 "keys":"etcd_debugging_mvcc_keys_total"
291 }
292
293 etcd = {}
294
295 time_delta = 80
296 for stat,query in etcd_stats.items():
297 now = time.time()
298 etcd_params = {
299 "query": "%s{}" % query,
300 "start": now - time_delta,
301 "end": now,
302 "step": "30",
303 }
304 r = requests.get("http://%s/api/v1/query_range" % address, etcd_params)
Andrey Pozolotine78670c2021-07-30 13:33:27 +0300305 etcdStats = r.json()["data"]["result"]
306 if etcdStats:
307 i = etcdStats[0]
308 etcd[stat] = i["values"][-1][1]
Matteo Scandolo88d01c12020-11-02 17:11:26 -0800309
310 csv_file = open("%s/etcd_stats.csv" % out_folder, "w+")
311 csv_writer = csv.writer(csv_file, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
312
313 for k,v in etcd.items():
314 csv_writer.writerow([k, v])
315
316def get_kafka_stats(address, out_folder):
317 """
318 :param address: The prometheus address
319 :param out_folder: The folder in which store the output files
320 """
321 # get the last information for all topics, we only care about the last value so a short interval is fine
322 now = time.time()
323 time_delta = 80
324 kafka_params = {
325 "query": "kafka_topic_partition_current_offset{}",
326 "start": now - time_delta,
327 "end": now,
328 "step": "30",
329 }
330
331 r = requests.get("http://%s/api/v1/query_range" % address, kafka_params)
332
333 msg_per_topic = {}
334
335 for t in r.json()["data"]["result"]:
336 # we only care about some topics
337 topic_name = t["metric"]["topic"]
338
339 if any(x in topic_name for x in KAFKA_TOPICS):
340 # get only the value at the last timestamp
341 msg_per_topic[t["metric"]["topic"]] = t["values"][-1][1]
342
343 csv_file = open("%s/kafka_msg_per_topic.csv" % out_folder, "w+")
344 csv_writer = csv.writer(csv_file, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
345
346 for k,v in msg_per_topic.items():
347 csv_writer.writerow([k, v])
348
Matteo Scandolo3ed89872020-07-15 17:01:02 -0700349if __name__ == "__main__":
350 parser = argparse.ArgumentParser(prog="sizing")
351 parser.add_argument("-a", "--address", help="The address of the Prometheus instance we're targeting",
352 default="127.0.0.1:31301")
353 parser.add_argument("-o", "--output", help="Where to output the generated files",
354 default="plots")
355 parser.add_argument("-s", "--since", help="When to start sampling the data (in minutes before now)",
356 default=10)
Andrey Pozolotine78670c2021-07-30 13:33:27 +0300357 parser.add_argument("-n", "--namespace", help="Kubernetes namespace for collecting metrics",
358 default="default")
Matteo Scandolo3ed89872020-07-15 17:01:02 -0700359
360 args = parser.parse_args()
Andrey Pozolotine78670c2021-07-30 13:33:27 +0300361 main(args.address, args.output, args.since, args.namespace)