blob: 30613fe9ced8c73d2c0e32876cc4eacaf7ca7ea6 [file] [log] [blame]
Hyunsun Moonf32ae9a2020-05-28 13:17:45 -07001#!/usr/bin/env python
2
3# Copyright 2020-present Open Networking Foundation
4#
Hyunsun Moon200eba52021-04-05 21:31:54 -07005# SPDX-License-Identifier: LicenseRef-ONF-Member-Only-1.0
Hyunsun Moonf32ae9a2020-05-28 13:17:45 -07006
Andy Bavier614af142020-08-07 14:49:56 -07007import os
Hyunsun Moonf32ae9a2020-05-28 13:17:45 -07008import time
Andy Bavier614af142020-08-07 14:49:56 -07009import datetime
10import pytz
11import threading
12from icalevents.icalevents import events
Andy Bavier4021a2f2020-07-29 12:39:47 -070013from flask import Flask, jsonify, abort, request, Response
14import prometheus_client as prom
15
Andy Bavier8a5c9872020-10-21 13:17:53 -070016# URL of maintenance calendar
Andy Bavier614af142020-08-07 14:49:56 -070017SECRET_ICAL_URL = os.environ.get("SECRET_ICAL_URL")
Andy Bavier8a5c9872020-10-21 13:17:53 -070018
19# Aether environment that the server is monitoring (e.g., "production")
20# To schedule downtime, postfix the cluster name with the env: "ace-tucson-production"
21AETHER_ENV = os.environ.get("AETHER_ENV", "production")
22
23# Move to "no result" status if we don't hear from agent for this many seconds
Andy Bavier4021a2f2020-07-29 12:39:47 -070024NO_RESULT_THRESHOLD = 720
Hyunsun Moonf32ae9a2020-05-28 13:17:45 -070025
26app = Flask(__name__)
27edges = [
28 {
Andy Bavier8a5c9872020-10-21 13:17:53 -070029 'name': 'ace-example',
Hyunsun Moonf32ae9a2020-05-28 13:17:45 -070030 'status': {
31 'control_plane': 'connected',
32 'user_plane': 'connected'
33 },
Jeremy Ronquillof4200252021-02-13 16:11:04 -080034 'speedtest': {
35 'ping': {
36 'dns': {
Jeremy Ronquilloa944fbc2021-03-30 10:57:45 -070037 'min': 0.0,
38 'avg': 0.0,
39 'max': 0.0,
40 'stddev': 0.0
Jeremy Ronquillof4200252021-02-13 16:11:04 -080041 }
Jeremy Ronquilloc7434622021-04-08 21:06:00 -070042 },
43 'iperf': {
44 'cluster': {
45 'downlink': 0.0,
46 'uplink': 0.0
47 }
Jeremy Ronquillof4200252021-02-13 16:11:04 -080048 }
49 },
Hyunsun Moon200eba52021-04-05 21:31:54 -070050 'signal_quality': {
51 'rsrq': 0,
52 'rsrp': 0
53 },
54 'last_update': time.time()
Hyunsun Moonf32ae9a2020-05-28 13:17:45 -070055 }
56]
57
Andy Bavier4021a2f2020-07-29 12:39:47 -070058status_codes = {
59 "no result": -2,
60 "error": -1,
61 "disconnected": 0,
62 "connecting": 1,
63 "connected": 2
64}
65
Andy Bavier614af142020-08-07 14:49:56 -070066room_mapping = {
Andy Bavier0423cbd2020-10-23 10:50:29 -070067 "ace-menlo-pixel-production": "(Compute)-MP-1-Aether Production",
68 "ace-menlo-staging": "(Compute)-MP-1-Aether Staging"
Andy Bavier614af142020-08-07 14:49:56 -070069}
70
Andy Bavier5b4e28f2021-03-09 15:48:20 -070071# Legacy test status metrics, reporting a status code between -2 and 2
Andy Bavier4021a2f2020-07-29 12:39:47 -070072cp_status = prom.Gauge("aetheredge_status_control_plane", "Control plane status code", ["name"])
73up_status = prom.Gauge("aetheredge_status_user_plane", "User plane status code", ["name"])
Andy Bavier5b4e28f2021-03-09 15:48:20 -070074
75# Simplified binary test result metrics
Andy Bavier3c7b78d2021-03-11 14:16:43 -070076e2e_tests_ok = prom.Gauge("aetheredge_e2e_tests_ok", "Last connect and ping test both passed", ["name"])
Andy Baviera0c40aa2021-03-10 12:09:12 -070077connect_test_ok = prom.Gauge("aetheredge_connect_test_ok", "Last connect test passed", ["name"])
Andy Baviera0c40aa2021-03-10 12:09:12 -070078ping_test_ok = prom.Gauge("aetheredge_ping_test_ok", "Last ping test passed", ["name"])
Andy Bavier3c7b78d2021-03-11 14:16:43 -070079e2e_tests_down = prom.Gauge("aetheredge_e2e_tests_down", "E2E tests not reporting", ["name"])
Andy Bavier5b4e28f2021-03-09 15:48:20 -070080
Jeremy Ronquilloc7434622021-04-08 21:06:00 -070081# Speedtest ping metrics
Jeremy Ronquillof4200252021-02-13 16:11:04 -080082ping_dns_min = prom.Gauge("aetheredge_ping_dns_test_min","Last ping test minimum value",["name"])
83ping_dns_avg = prom.Gauge("aetheredge_ping_dns_test_avg","Last ping test average",["name"])
84ping_dns_max = prom.Gauge("aetheredge_ping_dns_test_max","Last ping test maximum value",["name"])
85ping_dns_stddev = prom.Gauge("aetheredge_ping_dns_test_stddev","Last ping test standard deviation",["name"])
86
Jeremy Ronquilloc7434622021-04-08 21:06:00 -070087# Speedtest iperf metrics
88iperf_cluster_downlink = prom.Gauge("aetheredge_iperf_cluster_downlink_test","Last iperf test downlink result",["name"])
89iperf_cluster_uplink = prom.Gauge("aetheredge_iperf_cluster_uplink_test","Last iperf test downlink result",["name"])
90
Hyunsun Moon200eba52021-04-05 21:31:54 -070091# Signal quality metrics in CESQ format not dB
92# RSRQ: >=53 excellent, 43 ~ 53 good, 33 ~ 43 mid, <=33 bad, 0 no signal
93# RSRP: >=20 excellent, 10 ~ 20 good, 0 ~ 10 mid, 0 no signal
94signal_quality_rsrq = prom.Gauge("aetheredge_signal_quality_rsrq", "Quality of the received signal", ["name"])
95signal_quality_rsrp = prom.Gauge("aetheredge_signal_quality_rsrp", "Power of the received signal", ["name"])
96
Andy Bavier5b4e28f2021-03-09 15:48:20 -070097# Other metrics
Andy Bavier4021a2f2020-07-29 12:39:47 -070098last_update = prom.Gauge("aetheredge_last_update", "Last reported test result", ["name"])
Andy Bavier614af142020-08-07 14:49:56 -070099maint_window = prom.Gauge("aetheredge_in_maintenance_window", "Currently in a maintenance window", ["name"])
100
101def is_my_event(event, name):
102 for field in ["summary", "location", "description"]:
Andy Bavier8a5c9872020-10-21 13:17:53 -0700103 fullname = name
104 if name.startswith("ace-"):
105 fullname = "%s-%s" % (name, AETHER_ENV)
106 if fullname in getattr(event, field, ""):
Andy Bavier614af142020-08-07 14:49:56 -0700107 return True
Andy Bavier0423cbd2020-10-23 10:50:29 -0700108 if fullname in room_mapping and room_mapping[fullname] in getattr(event, field, ""):
109 return True
Andy Bavier614af142020-08-07 14:49:56 -0700110 return False
111
Andy Bavierc41cf0c2020-09-02 14:49:21 -0700112def is_naive_datetime(d):
113 return d.tzinfo is None or d.tzinfo.utcoffset(d) is None
114
115def process_all_day_events(es):
116 for event in es:
117 if event.all_day:
118 # All day events have naive datetimes, which breaks comparisons
119 pacific = pytz.timezone('US/Pacific')
120 if is_naive_datetime(event.start):
121 event.start = pacific.localize(event.start)
122 if is_naive_datetime(event.end):
123 event.end = pacific.localize(event.end)
124
Andy Bavier614af142020-08-07 14:49:56 -0700125def in_maintenance_window(events, name, now):
126 for event in events:
127 if event.start < now and event.end > now:
128 if is_my_event(event, name):
129 return True
Andy Bavier614af142020-08-07 14:49:56 -0700130 return False
131
132def pull_maintenance_events():
133 while(True):
134 now = datetime.datetime.now(pytz.utc)
135 try:
136 es = events(SECRET_ICAL_URL, start = now)
Andy Bavierc41cf0c2020-09-02 14:49:21 -0700137 process_all_day_events(es)
Andy Bavier614af142020-08-07 14:49:56 -0700138 except Exception as e:
139 print(e)
140 else:
141 for edge in edges:
142 if 'maintenance' not in edge:
143 edge['maintenance'] = {}
144 edge['maintenance']['in_window'] = in_maintenance_window(es, edge['name'], now)
145 edge['maintenance']['last_update'] = time.time()
146 time.sleep(60)
Andy Bavier4021a2f2020-07-29 12:39:47 -0700147
148def time_out_stale_results():
149 for edge in edges:
150 time_elapsed = time.time() - edge["last_update"]
151 if time_elapsed > NO_RESULT_THRESHOLD:
152 edge['status']['control_plane'] = "no result"
153 edge['status']['user_plane'] = "no result"
Jeremy Ronquilloa944fbc2021-03-30 10:57:45 -0700154 edge['speedtest']['ping']['dns'] = {'min': 0.0,
155 'avg': 0.0,
156 'max': 0.0,
157 'stddev': 0.0}
Hyunsun Moon200eba52021-04-05 21:31:54 -0700158 edge.pop('signal_quality', None)
Andy Bavier4021a2f2020-07-29 12:39:47 -0700159
Andy Baviere47157d2020-12-11 14:13:12 -0700160def remove_edge_from_metrics(name):
161 try:
162 cp_status.remove(name)
163 up_status.remove(name)
164 last_update.remove(name)
Andy Bavier3c7b78d2021-03-11 14:16:43 -0700165 e2e_tests_ok.remove(name)
Andy Baviera0c40aa2021-03-10 12:09:12 -0700166 connect_test_ok.remove(name)
Andy Baviera0c40aa2021-03-10 12:09:12 -0700167 ping_test_ok.remove(name)
Andy Bavier3c7b78d2021-03-11 14:16:43 -0700168 e2e_tests_down.remove(name)
Andy Bavier5b4e28f2021-03-09 15:48:20 -0700169 except:
170 pass
171
172 try:
Hyunsun Moon200eba52021-04-05 21:31:54 -0700173 ping_dns_min.remove(name)
174 ping_dns_avg.remove(name)
175 ping_dns_max.remove(name)
176 ping_dns_stddev.remove(name)
177 except:
178 pass
179
180 try:
Jeremy Ronquilloc7434622021-04-08 21:06:00 -0700181 iperf_cluster_downlink.remove(name)
182 iperf_cluster_uplink.remove(name)
183 except:
184 pass
185
186 try:
Hyunsun Moon200eba52021-04-05 21:31:54 -0700187 signal_quality_rsrq.remove(name)
188 signal_quality_rsrp.remove(name)
189 except:
190 pass
191
192 try:
Andy Baviere47157d2020-12-11 14:13:12 -0700193 maint_window.remove(name)
194 except:
195 pass
Andy Bavier4021a2f2020-07-29 12:39:47 -0700196
197@app.route('/edges/metrics', methods=['GET'])
198def get_prometheus_metrics():
199 res = []
200 time_out_stale_results()
201 for edge in edges:
Andy Bavier8a5c9872020-10-21 13:17:53 -0700202 if edge['name'] == "ace-example":
Andy Bavier4021a2f2020-07-29 12:39:47 -0700203 continue
204
Andy Bavier3c7b78d2021-03-11 14:16:43 -0700205 connect_status = edge['status']['control_plane']
206 ping_status = edge['status']['user_plane']
207
Jeremy Ronquilloc7434622021-04-08 21:06:00 -0700208 speedtest_ping_results_exist = True
Jeremy Ronquillof4200252021-02-13 16:11:04 -0800209 if edge['speedtest']['ping']['dns']['avg']:
210 ping_dns_min_result = edge['speedtest']['ping']['dns']['min']
211 ping_dns_avg_result = edge['speedtest']['ping']['dns']['avg']
212 ping_dns_max_result = edge['speedtest']['ping']['dns']['max']
213 ping_dns_stddev_result = edge['speedtest']['ping']['dns']['stddev']
214 else:
Jeremy Ronquilloc7434622021-04-08 21:06:00 -0700215 speedtest_ping_results_exist = False
216
217 speedtest_iperf_results_exist = True
218 if edge['speedtest']['iperf']['cluster']['downlink']:
219 iperf_cluster_downlink_result = edge['speedtest']['iperf']['cluster']['downlink']
220 iperf_cluster_uplink_result = edge['speedtest']['iperf']['cluster']['uplink']
221 else:
222 speedtest_iperf_results_exist = False
Jeremy Ronquillof4200252021-02-13 16:11:04 -0800223
Andy Bavier3c7b78d2021-03-11 14:16:43 -0700224 cp_status.labels(edge['name']).set(status_codes[connect_status])
225 up_status.labels(edge['name']).set(status_codes[ping_status])
Jeremy Ronquillof4200252021-02-13 16:11:04 -0800226
Andy Bavier4021a2f2020-07-29 12:39:47 -0700227 last_update.labels(edge['name']).set(edge['last_update'])
Andy Bavier614af142020-08-07 14:49:56 -0700228 if 'maintenance' in edge:
229 maint_window.labels(edge['name']).set(int(edge['maintenance']['in_window']))
Andy Bavier4021a2f2020-07-29 12:39:47 -0700230
Andy Baviera0c40aa2021-03-10 12:09:12 -0700231 connect_test_ok.labels(edge['name']).set(0)
Andy Baviera0c40aa2021-03-10 12:09:12 -0700232 ping_test_ok.labels(edge['name']).set(0)
Andy Bavier3c7b78d2021-03-11 14:16:43 -0700233 e2e_tests_ok.labels(edge['name']).set(0)
234 e2e_tests_down.labels(edge['name']).set(0)
235
236 if connect_status in ["error", "no result"] or ping_status in ["error", "no result"]:
237 e2e_tests_down.labels(edge['name']).set(1)
238 else:
239 if connect_status == "connected":
240 connect_test_ok.labels(edge['name']).set(1)
241 if ping_status == "connected":
242 ping_test_ok.labels(edge['name']).set(1)
243 if connect_status == "connected" and ping_status == "connected":
244 e2e_tests_ok.labels(edge['name']).set(1)
Andy Bavier5b4e28f2021-03-09 15:48:20 -0700245
Jeremy Ronquilloc7434622021-04-08 21:06:00 -0700246 if speedtest_ping_results_exist:
Jeremy Ronquillof4200252021-02-13 16:11:04 -0800247 ping_dns_min.labels(edge['name']).set(ping_dns_min_result)
248 ping_dns_avg.labels(edge['name']).set(ping_dns_avg_result)
249 ping_dns_max.labels(edge['name']).set(ping_dns_max_result)
250 ping_dns_stddev.labels(edge['name']).set(ping_dns_stddev_result)
251
Jeremy Ronquilloc7434622021-04-08 21:06:00 -0700252 if speedtest_iperf_results_exist:
253 iperf_cluster_downlink.labels(edge['name']).set(iperf_cluster_downlink_result)
254 iperf_cluster_uplink.labels(edge['name']).set(iperf_cluster_uplink_result)
255
Hyunsun Moon200eba52021-04-05 21:31:54 -0700256 if 'signal_quality' in edge.keys():
257 signal_quality_rsrq.labels(edge['name']).set(edge['signal_quality']['rsrq'])
258 signal_quality_rsrp.labels(edge['name']).set(edge['signal_quality']['rsrp'])
Jeremy Ronquillof4200252021-02-13 16:11:04 -0800259
Andy Bavier4021a2f2020-07-29 12:39:47 -0700260 res.append(prom.generate_latest(cp_status))
261 res.append(prom.generate_latest(up_status))
Jeremy Ronquillof4200252021-02-13 16:11:04 -0800262 res.append(prom.generate_latest(ping_dns_min))
263 res.append(prom.generate_latest(ping_dns_avg))
264 res.append(prom.generate_latest(ping_dns_max))
265 res.append(prom.generate_latest(ping_dns_stddev))
Jeremy Ronquilloc7434622021-04-08 21:06:00 -0700266 res.append(prom.generate_latest(iperf_cluster_downlink))
267 res.append(prom.generate_latest(iperf_cluster_uplink))
Andy Bavier4021a2f2020-07-29 12:39:47 -0700268 res.append(prom.generate_latest(last_update))
Andy Bavier614af142020-08-07 14:49:56 -0700269 res.append(prom.generate_latest(maint_window))
Andy Baviera0c40aa2021-03-10 12:09:12 -0700270 res.append(prom.generate_latest(connect_test_ok))
Andy Baviera0c40aa2021-03-10 12:09:12 -0700271 res.append(prom.generate_latest(ping_test_ok))
Andy Bavier3c7b78d2021-03-11 14:16:43 -0700272 res.append(prom.generate_latest(e2e_tests_ok))
273 res.append(prom.generate_latest(e2e_tests_down))
Hyunsun Moon200eba52021-04-05 21:31:54 -0700274 res.append(prom.generate_latest(signal_quality_rsrq))
275 res.append(prom.generate_latest(signal_quality_rsrp))
Andy Bavier614af142020-08-07 14:49:56 -0700276
Andy Bavier4021a2f2020-07-29 12:39:47 -0700277 return Response(res, mimetype="text/plain")
278
Hyunsun Moonf32ae9a2020-05-28 13:17:45 -0700279
280@app.route('/edges/healthz', methods=['GET'])
281def get_health():
282 return {'message': 'healthy'}
283
284
285@app.route('/edges', methods=['GET'])
286def get_edges():
Andy Bavier4021a2f2020-07-29 12:39:47 -0700287 time_out_stale_results()
Hyunsun Moonf32ae9a2020-05-28 13:17:45 -0700288 return jsonify({'edges': edges})
289
290
291@app.route('/edges/<string:name>', methods=['GET'])
292def get_edge(name):
Andy Bavier4021a2f2020-07-29 12:39:47 -0700293 time_out_stale_results()
Hyunsun Moonf32ae9a2020-05-28 13:17:45 -0700294 edge = [edge for edge in edges if edge['name'] == name]
295 if len(edge) == 0:
296 abort(404)
297 return jsonify({'edge': edge[0]})
298
299
300@app.route('/edges', methods=['POST'])
Andy Bavierf872e9a2021-03-22 12:06:25 -0700301@app.route('/testresults', methods=['POST'])
Hyunsun Moonf32ae9a2020-05-28 13:17:45 -0700302def create_or_update_edge():
303 if not request.json:
304 abort(400)
305 if 'name' not in request.json:
306 abort(400)
307 if 'status' not in request.json:
308 abort(400)
309
310 req_edge = {
311 'name': request.json['name'],
312 'status': {
313 'control_plane': request.json['status']['control_plane'],
314 'user_plane': request.json['status']['user_plane']
315 },
Jeremy Ronquillof4200252021-02-13 16:11:04 -0800316 'speedtest': {
317 'ping': {
318 'dns': {
Jeremy Ronquilloa944fbc2021-03-30 10:57:45 -0700319 'min': 0.0,
320 'avg': 0.0,
321 'max': 0.0,
322 'stddev': 0.0
Jeremy Ronquillof4200252021-02-13 16:11:04 -0800323 }
Jeremy Ronquilloc7434622021-04-08 21:06:00 -0700324 },
325 'iperf': {
326 'cluster': {
327 'downlink': 0.0,
328 'uplink': 0.0
329 }
Jeremy Ronquillof4200252021-02-13 16:11:04 -0800330 }
331 },
Hyunsun Moonf32ae9a2020-05-28 13:17:45 -0700332 'last_update': time.time()
333 }
334
Jeremy Ronquillof4200252021-02-13 16:11:04 -0800335 if 'speedtest' in request.json:
Jeremy Ronquilloc7434622021-04-08 21:06:00 -0700336 if 'ping' in request.json['speedtest']:
337 req_edge['speedtest']['ping'] = request.json['speedtest']['ping']
338 if 'iperf' in request.json['speedtest']:
339 req_edge['speedtest']['iperf'] = request.json['speedtest']['iperf']
Jeremy Ronquillof4200252021-02-13 16:11:04 -0800340
Hyunsun Moon200eba52021-04-05 21:31:54 -0700341 if 'signal_quality' in request.json:
342 req_edge['signal_quality'] = request.json['signal_quality']
Jeremy Ronquillof4200252021-02-13 16:11:04 -0800343
Hyunsun Moonf32ae9a2020-05-28 13:17:45 -0700344 edge = [edge for edge in edges if edge['name'] == req_edge['name']]
345 if len(edge) == 0:
346 print("new edge request " + req_edge['name'])
347 edges.append(req_edge)
348 else:
349 edge[0]['status']['control_plane'] = req_edge['status']['control_plane']
350 edge[0]['status']['user_plane'] = req_edge['status']['user_plane']
Jeremy Ronquillof4200252021-02-13 16:11:04 -0800351 edge[0]['speedtest']['ping'] = req_edge['speedtest']['ping']
Hyunsun Moon200eba52021-04-05 21:31:54 -0700352 if 'signal_quality' in req_edge.keys():
353 edge[0]['signal_quality'] = req_edge['signal_quality']
Hyunsun Moonf32ae9a2020-05-28 13:17:45 -0700354 edge[0]['last_update'] = req_edge['last_update']
355
356 return jsonify({'edge': req_edge}), 201
357
358
Hyunsun Moon5f237ec2020-09-29 14:45:52 -0700359@app.route('/edges/<string:name>', methods=['DELETE'])
Andy Bavierf872e9a2021-03-22 12:06:25 -0700360@app.route('/testresults/<string:name>', methods=['DELETE'])
Hyunsun Moon5f237ec2020-09-29 14:45:52 -0700361def delete_edge(name):
362 print("delete edge request " + name)
363 result = False
364 for i in range(len(edges)):
365 if edges[i]['name'] == name:
366 del edges[i]
Andy Baviere47157d2020-12-11 14:13:12 -0700367 remove_edge_from_metrics(name)
Hyunsun Moon5f237ec2020-09-29 14:45:52 -0700368 result = True
369 break
370 if not result:
371 abort(404)
372 return jsonify({'result': True})
373
374
Hyunsun Moonf32ae9a2020-05-28 13:17:45 -0700375if __name__ == '__main__':
Andy Bavier8a5c9872020-10-21 13:17:53 -0700376 if SECRET_ICAL_URL and AETHER_ENV:
377 print(" * Starting maintenance calendar polling thread (Aether env: %s)" % AETHER_ENV)
Andy Bavier614af142020-08-07 14:49:56 -0700378 t = threading.Thread(target=pull_maintenance_events)
379 t.start()
Hyunsun Moonf32ae9a2020-05-28 13:17:45 -0700380 app.run(debug=True, host='0.0.0.0', port=80)