Matteo Scandolo | 3ed8987 | 2020-07-15 17:01:02 -0700 | [diff] [blame] | 1 | # Copyright 2017-present Open Networking Foundation |
| 2 | # |
| 3 | # Licensed under the Apache License, Version 2.0 (the "License"); |
| 4 | # you may not use this file except in compliance with the License. |
| 5 | # You may obtain a copy of the License at |
| 6 | # |
| 7 | # http://www.apache.org/licenses/LICENSE-2.0 |
| 8 | # |
| 9 | # Unless required by applicable law or agreed to in writing, software |
| 10 | # distributed under the License is distributed on an "AS IS" BASIS, |
| 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 12 | # See the License for the specific language governing permissions and |
| 13 | # limitations under the License. |
| 14 | |
| 15 | # This tool collects CPU and Memory informations for each container in the VOLTHA stack |
| 16 | |
| 17 | # NOTE |
| 18 | # Collecting the info for all containers in the same chart can be confusing, |
| 19 | # we may want to create subcharts for the different groups, eg: infra, ONOS, core, adapters |
| 20 | |
Matteo Scandolo | 7274b43 | 2020-08-27 14:28:43 -0700 | [diff] [blame] | 21 | import csv |
| 22 | from sys import platform as sys_pf |
| 23 | |
| 24 | if sys_pf == 'darwin': |
| 25 | import matplotlib |
| 26 | |
| 27 | matplotlib.use("TkAgg") |
| 28 | |
Matteo Scandolo | 3ed8987 | 2020-07-15 17:01:02 -0700 | [diff] [blame] | 29 | import argparse |
| 30 | import requests |
| 31 | import matplotlib.pyplot as plt |
| 32 | import matplotlib.dates as mdates |
| 33 | from datetime import datetime |
| 34 | import time |
| 35 | |
| 36 | EXCLUDED_POD_NAMES = [ |
| 37 | "kube", "coredns", "kind", "grafana", |
| 38 | "prometheus", "tiller", "control-plane", |
Matteo Scandolo | 7274b43 | 2020-08-27 14:28:43 -0700 | [diff] [blame] | 39 | "calico", "nginx", "registry", "cattle", "canal", "metrics", |
Matteo Scandolo | 3ed8987 | 2020-07-15 17:01:02 -0700 | [diff] [blame] | 40 | ] |
| 41 | |
| 42 | DATE_FORMATTER_FN = mdates.DateFormatter('%Y-%m-%d %H:%M:%S') |
| 43 | |
Matteo Scandolo | 88d01c1 | 2020-11-02 17:11:26 -0800 | [diff] [blame] | 44 | KAFKA_TOPICS = [ |
| 45 | "openolt", |
| 46 | "brcm_openomci_onu", |
| 47 | "voltha", |
| 48 | "adapters", |
| 49 | "rwcore" |
| 50 | ] |
Matteo Scandolo | 3ed8987 | 2020-07-15 17:01:02 -0700 | [diff] [blame] | 51 | |
| 52 | def main(address, out_folder, since): |
| 53 | """ |
| 54 | Query Prometheus and generate .pdf files for CPU and Memory consumption for each POD |
| 55 | :param address: string The address of the Prometheus instance to query |
| 56 | :param out_folder: string The output folder (where to save the .pdf files) |
| 57 | :param since: int When to start collection data (minutes in the past) |
| 58 | :return: void |
| 59 | """ |
| 60 | time_delta = int(since) * 60 |
Matteo Scandolo | 7274b43 | 2020-08-27 14:28:43 -0700 | [diff] [blame] | 61 | |
Matteo Scandolo | 86334f5 | 2020-08-28 10:56:25 -0700 | [diff] [blame] | 62 | container_mem_query = "sum by(pod) (container_memory_working_set_bytes{namespace='default',container!='',container!='POD'})" |
| 63 | |
Andrey Pozolotin | ced58a0 | 2021-07-13 18:49:05 +0300 | [diff] [blame] | 64 | container_cpu_query = "sum by(pod) (rate(container_cpu_usage_seconds_total{namespace='default',container!='',container!='POD'}[%sm]))" % since |
Matteo Scandolo | 3ed8987 | 2020-07-15 17:01:02 -0700 | [diff] [blame] | 65 | |
| 66 | now = time.time() |
| 67 | cpu_params = { |
| 68 | "query": container_cpu_query, |
| 69 | "start": now - time_delta, |
| 70 | "end": now, |
| 71 | "step": "30", |
| 72 | } |
Matteo Scandolo | 86334f5 | 2020-08-28 10:56:25 -0700 | [diff] [blame] | 73 | |
Matteo Scandolo | 3ed8987 | 2020-07-15 17:01:02 -0700 | [diff] [blame] | 74 | r = requests.get("http://%s/api/v1/query_range" % address, cpu_params) |
| 75 | print("Downloading CPU info from: %s" % r.url) |
| 76 | container_cpu = r.json()["data"]["result"] |
Matteo Scandolo | 7274b43 | 2020-08-27 14:28:43 -0700 | [diff] [blame] | 77 | containers = remove_unwanted_containers(container_cpu) |
| 78 | plot_cpu_consumption(containers, |
Matteo Scandolo | 806637d | 2020-07-30 02:07:06 +0000 | [diff] [blame] | 79 | output="%s/cpu.pdf" % out_folder) |
Matteo Scandolo | 7274b43 | 2020-08-27 14:28:43 -0700 | [diff] [blame] | 80 | data_to_csv(containers, output="%s/cpu.csv" % out_folder, |
Matteo Scandolo | 86334f5 | 2020-08-28 10:56:25 -0700 | [diff] [blame] | 81 | convert_values=lambda values: ["{:.2f}".format(v) for v in values]) |
Matteo Scandolo | 3ed8987 | 2020-07-15 17:01:02 -0700 | [diff] [blame] | 82 | |
Matteo Scandolo | 7274b43 | 2020-08-27 14:28:43 -0700 | [diff] [blame] | 83 | mem_params = { |
| 84 | "query": container_mem_query, |
| 85 | "start": now - time_delta, |
| 86 | "end": now, |
| 87 | "step": "30", |
| 88 | } |
| 89 | |
| 90 | r = requests.get("http://%s/api/v1/query_range" % address, mem_params) |
Matteo Scandolo | 3ed8987 | 2020-07-15 17:01:02 -0700 | [diff] [blame] | 91 | print("Downloading Memory info from: %s" % r.url) |
| 92 | container_mem = r.json()["data"]["result"] |
Matteo Scandolo | 7274b43 | 2020-08-27 14:28:43 -0700 | [diff] [blame] | 93 | containers = remove_unwanted_containers(container_mem) |
| 94 | plot_memory_consumption(containers, output="%s/memory.pdf" % out_folder) |
| 95 | data_to_csv(containers, output="%s/memory.csv" % out_folder, |
Matteo Scandolo | 86334f5 | 2020-08-28 10:56:25 -0700 | [diff] [blame] | 96 | convert_values=lambda values: ["{:.2f}".format(bytesto(v, "m")) for v in values]) |
Matteo Scandolo | 7274b43 | 2020-08-27 14:28:43 -0700 | [diff] [blame] | 97 | |
Matteo Scandolo | 88d01c1 | 2020-11-02 17:11:26 -0800 | [diff] [blame] | 98 | print("Downloading KAFKA stats") |
| 99 | get_kafka_stats(address, out_folder) |
| 100 | print("Downloading ETCD stats") |
| 101 | get_etcd_stats(address, out_folder) |
| 102 | |
| 103 | |
Matteo Scandolo | 7274b43 | 2020-08-27 14:28:43 -0700 | [diff] [blame] | 104 | |
| 105 | def data_to_csv(containers, output=None, convert_values=None): |
| 106 | """ |
| 107 | Get a list of prometheus metrics and dumps them in a csv |
| 108 | :param containers: Prometheus metrics |
| 109 | :param output: Destination file |
| 110 | :param convert_values: Function to convert the valus, take a list on numbers |
| 111 | """ |
| 112 | csv_file = open(output, "w+") |
| 113 | csv_writer = csv.writer(csv_file, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL) |
| 114 | |
| 115 | # we assume all the containers have the same timestamps |
Matteo Scandolo | 7e3dd12 | 2020-11-04 15:24:00 -0800 | [diff] [blame] | 116 | # FIXME pods may have different timestamps depending on when the collection started |
| 117 | # - find the longest list in containers |
| 118 | # - add empty values at the beginning of the other list |
Matteo Scandolo | 7274b43 | 2020-08-27 14:28:43 -0700 | [diff] [blame] | 119 | dates = [datetime.fromtimestamp(x[0]) for x in containers[0]["values"]] |
| 120 | csv_writer.writerow([''] + dates) |
| 121 | |
| 122 | for c in containers: |
| 123 | name = c["metric"]["pod"] |
| 124 | data = c["values"] |
| 125 | |
| 126 | values = [float(x[1]) for x in data] |
| 127 | |
| 128 | if convert_values: |
| 129 | values = convert_values(values) |
| 130 | csv_writer.writerow([name] + values) |
Matteo Scandolo | 3ed8987 | 2020-07-15 17:01:02 -0700 | [diff] [blame] | 131 | |
| 132 | |
| 133 | def plot_cpu_consumption(containers, output=None): |
Matteo Scandolo | 3ed8987 | 2020-07-15 17:01:02 -0700 | [diff] [blame] | 134 | plt.figure('cpu') |
| 135 | fig, ax = plt.subplots() |
| 136 | ax.xaxis.set_major_formatter(DATE_FORMATTER_FN) |
| 137 | ax.xaxis_date() |
| 138 | fig.autofmt_xdate() |
| 139 | |
| 140 | plt.title("CPU Usage per POD") |
| 141 | plt.xlabel("Timestamp") |
Andrey Pozolotin | ced58a0 | 2021-07-13 18:49:05 +0300 | [diff] [blame] | 142 | plt.ylabel("CPU cores used") |
Matteo Scandolo | 3ed8987 | 2020-07-15 17:01:02 -0700 | [diff] [blame] | 143 | |
| 144 | for c in containers: |
Matteo Scandolo | 7274b43 | 2020-08-27 14:28:43 -0700 | [diff] [blame] | 145 | name = c["metric"]["pod"] |
Matteo Scandolo | 3ed8987 | 2020-07-15 17:01:02 -0700 | [diff] [blame] | 146 | data = c["values"] |
| 147 | |
| 148 | dates = [datetime.fromtimestamp(x[0]) for x in data] |
| 149 | |
| 150 | values = [float(x[1]) for x in data] |
| 151 | |
| 152 | plt.plot(dates, values, label=name, lw=2, color=get_line_color(name)) |
| 153 | # plt.plot(dates[1:], get_diff(values), label=name, lw=2, color=get_line_color(name)) |
| 154 | |
Matteo Scandolo | 7274b43 | 2020-08-27 14:28:43 -0700 | [diff] [blame] | 155 | plt.legend(loc='upper left', title="CPU Consumption", bbox_to_anchor=(1.05, 1)) |
Matteo Scandolo | 3ed8987 | 2020-07-15 17:01:02 -0700 | [diff] [blame] | 156 | |
| 157 | fig = plt.gcf() |
| 158 | fig.set_size_inches(20, 11) |
| 159 | |
Matteo Scandolo | 7274b43 | 2020-08-27 14:28:43 -0700 | [diff] [blame] | 160 | plt.savefig(output, bbox_inches="tight") |
Matteo Scandolo | 3ed8987 | 2020-07-15 17:01:02 -0700 | [diff] [blame] | 161 | |
| 162 | |
| 163 | def plot_memory_consumption(containers, output=None): |
| 164 | plt.figure("memory") |
| 165 | fig, ax = plt.subplots() |
| 166 | ax.xaxis.set_major_formatter(DATE_FORMATTER_FN) |
| 167 | ax.xaxis_date() |
| 168 | fig.autofmt_xdate() |
| 169 | plt.title("Memory Usage") |
| 170 | plt.xlabel("Timestamp") |
| 171 | plt.ylabel("MB") |
| 172 | |
| 173 | for c in containers: |
Matteo Scandolo | 7274b43 | 2020-08-27 14:28:43 -0700 | [diff] [blame] | 174 | name = c["metric"]["pod"] |
Matteo Scandolo | 3ed8987 | 2020-07-15 17:01:02 -0700 | [diff] [blame] | 175 | data = c["values"] |
| 176 | |
| 177 | dates = [datetime.fromtimestamp(x[0]) for x in data] |
| 178 | values = [bytesto(float(x[1]), "m") for x in data] |
| 179 | |
Matteo Scandolo | 7274b43 | 2020-08-27 14:28:43 -0700 | [diff] [blame] | 180 | # plt.plot(dates[1:], get_diff(values), label=name, lw=2, color=get_line_color(name)) |
| 181 | plt.plot(dates[1:], values[1:], label=name, lw=2, color=get_line_color(name)) |
Matteo Scandolo | 3ed8987 | 2020-07-15 17:01:02 -0700 | [diff] [blame] | 182 | |
Matteo Scandolo | 7274b43 | 2020-08-27 14:28:43 -0700 | [diff] [blame] | 183 | plt.legend(loc='upper left', title="Memory Usage", bbox_to_anchor=(1.05, 1)) |
Matteo Scandolo | 3ed8987 | 2020-07-15 17:01:02 -0700 | [diff] [blame] | 184 | |
| 185 | fig = plt.gcf() |
| 186 | fig.set_size_inches(20, 11) |
| 187 | |
Matteo Scandolo | 7274b43 | 2020-08-27 14:28:43 -0700 | [diff] [blame] | 188 | plt.savefig(output, bbox_inches="tight") |
Matteo Scandolo | 3ed8987 | 2020-07-15 17:01:02 -0700 | [diff] [blame] | 189 | |
| 190 | |
| 191 | def remove_unwanted_containers(cpus): |
| 192 | res = [] |
| 193 | for c in cpus: |
Matteo Scandolo | 3ed8987 | 2020-07-15 17:01:02 -0700 | [diff] [blame] | 194 | |
Matteo Scandolo | 7274b43 | 2020-08-27 14:28:43 -0700 | [diff] [blame] | 195 | if "pod" in c["metric"]: |
| 196 | pod_name = c["metric"]["pod"] |
Matteo Scandolo | 3ed8987 | 2020-07-15 17:01:02 -0700 | [diff] [blame] | 197 | if any(x in pod_name for x in EXCLUDED_POD_NAMES): |
| 198 | continue |
Matteo Scandolo | 3ed8987 | 2020-07-15 17:01:02 -0700 | [diff] [blame] | 199 | res.append(c) |
Matteo Scandolo | 806637d | 2020-07-30 02:07:06 +0000 | [diff] [blame] | 200 | |
Matteo Scandolo | 3ed8987 | 2020-07-15 17:01:02 -0700 | [diff] [blame] | 201 | return res |
| 202 | |
| 203 | |
| 204 | def get_line_color(container_name): |
| 205 | colors = { |
| 206 | "bbsim0": "#884EA0", |
| 207 | "bbsim1": "#9B59B6", |
| 208 | "bbsim-sadis-server": "#D2B4DE", |
| 209 | "onos-atomix-0": "#85C1E9", |
| 210 | "onos-atomix-1": "#7FB3D5", |
| 211 | "onos-atomix-2": "#3498DB", |
| 212 | "onos-onos-classic-0": "#1A5276", |
| 213 | "onos-onos-classic-1": "#1B4F72", |
| 214 | "onos-onos-classic-2": "#154360", |
| 215 | "etcd-0": "#7D6608", |
| 216 | "etcd-1": "#9A7D0A", |
| 217 | "etcd-2": "#B7950B", |
| 218 | "open-olt-voltha-adapter-openolt": "#7E5109", |
| 219 | "open-onu-voltha-adapter-openonu-0": "#6E2C00", |
| 220 | "open-onu-voltha-adapter-openonu-1": "#873600", |
| 221 | "open-onu-voltha-adapter-openonu-2": "#A04000", |
| 222 | "open-onu-voltha-adapter-openonu-3": "#BA4A00", |
| 223 | "open-onu-voltha-adapter-openonu-4": "#D35400", |
| 224 | "open-onu-voltha-adapter-openonu-5": "#D35400", |
| 225 | "open-onu-voltha-adapter-openonu-6": "#E59866", |
| 226 | "open-onu-voltha-adapter-openonu-7": "#EDBB99", |
| 227 | "kafka-0": "#4D5656", |
| 228 | "kafka-1": "#5F6A6A", |
| 229 | "kafka-2": "#717D7E", |
| 230 | "kafka-zookeeper-0": "#839192", |
| 231 | "kafka-zookeeper-1": "#95A5A6", |
| 232 | "kafka-zookeeper-2": "#717D7E", |
| 233 | "radius": "#82E0AA", |
| 234 | "voltha-voltha-ofagent": "#641E16", |
| 235 | "voltha-voltha-rw-core": "#7B241C", |
| 236 | } |
| 237 | |
| 238 | if container_name in colors: |
| 239 | return colors[container_name] |
| 240 | elif "openolt" in container_name: |
| 241 | return colors["open-olt-voltha-adapter-openolt"] |
| 242 | elif "ofagent" in container_name: |
| 243 | return colors["voltha-voltha-ofagent"] |
| 244 | elif "rw-core" in container_name: |
| 245 | return colors["voltha-voltha-rw-core"] |
| 246 | elif "bbsim0" in container_name: |
| 247 | return colors["bbsim0"] |
| 248 | elif "bbsim1" in container_name: |
| 249 | return colors["bbsim1"] |
| 250 | elif "bbsim-sadis-server" in container_name: |
| 251 | return colors["bbsim-sadis-server"] |
| 252 | elif "radius" in container_name: |
| 253 | return colors["radius"] |
| 254 | else: |
| 255 | return "black" |
| 256 | |
| 257 | |
| 258 | def get_diff(data): |
Matteo Scandolo | 7274b43 | 2020-08-27 14:28:43 -0700 | [diff] [blame] | 259 | # get the delta between the current data and the previous point |
Matteo Scandolo | 3ed8987 | 2020-07-15 17:01:02 -0700 | [diff] [blame] | 260 | return [x - data[i - 1] for i, x in enumerate(data)][1:] |
| 261 | |
| 262 | |
| 263 | def bytesto(b, to, bsize=1024): |
| 264 | """convert bytes to megabytes, etc. |
| 265 | sample code: |
| 266 | print('mb= ' + str(bytesto(314575262000000, 'm'))) |
| 267 | sample output: |
| 268 | mb= 300002347.946 |
| 269 | """ |
| 270 | |
| 271 | a = {'k': 1, 'm': 2, 'g': 3, 't': 4, 'p': 5, 'e': 6} |
| 272 | r = float(b) |
| 273 | for i in range(a[to]): |
| 274 | r = r / bsize |
| 275 | |
| 276 | return r |
| 277 | |
| 278 | |
Matteo Scandolo | 88d01c1 | 2020-11-02 17:11:26 -0800 | [diff] [blame] | 279 | |
| 280 | def get_etcd_stats(address, out_folder): |
| 281 | """ |
| 282 | :param address: The prometheus address |
| 283 | :param out_folder: The folder in which store the output files |
| 284 | """ |
| 285 | |
| 286 | etcd_stats = { |
| 287 | "size":"etcd_debugging_mvcc_db_total_size_in_bytes", |
| 288 | "keys":"etcd_debugging_mvcc_keys_total" |
| 289 | } |
| 290 | |
| 291 | etcd = {} |
| 292 | |
| 293 | time_delta = 80 |
| 294 | for stat,query in etcd_stats.items(): |
| 295 | now = time.time() |
| 296 | etcd_params = { |
| 297 | "query": "%s{}" % query, |
| 298 | "start": now - time_delta, |
| 299 | "end": now, |
| 300 | "step": "30", |
| 301 | } |
| 302 | r = requests.get("http://%s/api/v1/query_range" % address, etcd_params) |
| 303 | |
| 304 | i = r.json()["data"]["result"][0] |
| 305 | etcd[stat] = i["values"][-1][1] |
| 306 | |
| 307 | csv_file = open("%s/etcd_stats.csv" % out_folder, "w+") |
| 308 | csv_writer = csv.writer(csv_file, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL) |
| 309 | |
| 310 | for k,v in etcd.items(): |
| 311 | csv_writer.writerow([k, v]) |
| 312 | |
| 313 | def get_kafka_stats(address, out_folder): |
| 314 | """ |
| 315 | :param address: The prometheus address |
| 316 | :param out_folder: The folder in which store the output files |
| 317 | """ |
| 318 | # get the last information for all topics, we only care about the last value so a short interval is fine |
| 319 | now = time.time() |
| 320 | time_delta = 80 |
| 321 | kafka_params = { |
| 322 | "query": "kafka_topic_partition_current_offset{}", |
| 323 | "start": now - time_delta, |
| 324 | "end": now, |
| 325 | "step": "30", |
| 326 | } |
| 327 | |
| 328 | r = requests.get("http://%s/api/v1/query_range" % address, kafka_params) |
| 329 | |
| 330 | msg_per_topic = {} |
| 331 | |
| 332 | for t in r.json()["data"]["result"]: |
| 333 | # we only care about some topics |
| 334 | topic_name = t["metric"]["topic"] |
| 335 | |
| 336 | if any(x in topic_name for x in KAFKA_TOPICS): |
| 337 | # get only the value at the last timestamp |
| 338 | msg_per_topic[t["metric"]["topic"]] = t["values"][-1][1] |
| 339 | |
| 340 | csv_file = open("%s/kafka_msg_per_topic.csv" % out_folder, "w+") |
| 341 | csv_writer = csv.writer(csv_file, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL) |
| 342 | |
| 343 | for k,v in msg_per_topic.items(): |
| 344 | csv_writer.writerow([k, v]) |
| 345 | |
Matteo Scandolo | 3ed8987 | 2020-07-15 17:01:02 -0700 | [diff] [blame] | 346 | if __name__ == "__main__": |
| 347 | parser = argparse.ArgumentParser(prog="sizing") |
| 348 | parser.add_argument("-a", "--address", help="The address of the Prometheus instance we're targeting", |
| 349 | default="127.0.0.1:31301") |
| 350 | parser.add_argument("-o", "--output", help="Where to output the generated files", |
| 351 | default="plots") |
| 352 | parser.add_argument("-s", "--since", help="When to start sampling the data (in minutes before now)", |
| 353 | default=10) |
| 354 | |
| 355 | args = parser.parse_args() |
| 356 | main(args.address, args.output, args.since) |