blob: f8f8c91432068a9ec99fedd690ac3a3138200956 [file] [log] [blame]
Hyunsun Moonf32ae9a2020-05-28 13:17:45 -07001#!/usr/bin/env python
2
3# Copyright 2020-present Open Networking Foundation
4#
Hyunsun Moon200eba52021-04-05 21:31:54 -07005# SPDX-License-Identifier: LicenseRef-ONF-Member-Only-1.0
Hyunsun Moonf32ae9a2020-05-28 13:17:45 -07006
Andy Bavier614af142020-08-07 14:49:56 -07007import os
Hyunsun Moonf32ae9a2020-05-28 13:17:45 -07008import time
Andy Bavier614af142020-08-07 14:49:56 -07009import datetime
10import pytz
11import threading
12from icalevents.icalevents import events
Andy Bavier4021a2f2020-07-29 12:39:47 -070013from flask import Flask, jsonify, abort, request, Response
14import prometheus_client as prom
15
Andy Bavier8a5c9872020-10-21 13:17:53 -070016# URL of maintenance calendar
Andy Bavier614af142020-08-07 14:49:56 -070017SECRET_ICAL_URL = os.environ.get("SECRET_ICAL_URL")
Andy Bavier8a5c9872020-10-21 13:17:53 -070018
19# Aether environment that the server is monitoring (e.g., "production")
20# To schedule downtime, postfix the cluster name with the env: "ace-tucson-production"
21AETHER_ENV = os.environ.get("AETHER_ENV", "production")
22
23# Move to "no result" status if we don't hear from agent for this many seconds
Andy Bavier4021a2f2020-07-29 12:39:47 -070024NO_RESULT_THRESHOLD = 720
Hyunsun Moonf32ae9a2020-05-28 13:17:45 -070025
26app = Flask(__name__)
27edges = [
28 {
Andy Bavier8a5c9872020-10-21 13:17:53 -070029 'name': 'ace-example',
Hyunsun Moonf32ae9a2020-05-28 13:17:45 -070030 'status': {
31 'control_plane': 'connected',
32 'user_plane': 'connected'
33 },
Jeremy Ronquillof4200252021-02-13 16:11:04 -080034 'speedtest': {
35 'ping': {
36 'dns': {
Jeremy Ronquilloa944fbc2021-03-30 10:57:45 -070037 'min': 0.0,
38 'avg': 0.0,
39 'max': 0.0,
40 'stddev': 0.0
Jeremy Ronquillof4200252021-02-13 16:11:04 -080041 }
Jeremy Ronquilloc7434622021-04-08 21:06:00 -070042 },
43 'iperf': {
44 'cluster': {
45 'downlink': 0.0,
46 'uplink': 0.0
47 }
Jeremy Ronquillof4200252021-02-13 16:11:04 -080048 }
49 },
Hyunsun Moon200eba52021-04-05 21:31:54 -070050 'signal_quality': {
51 'rsrq': 0,
52 'rsrp': 0
53 },
54 'last_update': time.time()
Hyunsun Moonf32ae9a2020-05-28 13:17:45 -070055 }
56]
57
Andy Bavier4021a2f2020-07-29 12:39:47 -070058status_codes = {
59 "no result": -2,
60 "error": -1,
61 "disconnected": 0,
62 "connecting": 1,
63 "connected": 2
64}
65
Andy Bavier614af142020-08-07 14:49:56 -070066room_mapping = {
Andy Bavier0423cbd2020-10-23 10:50:29 -070067 "ace-menlo-pixel-production": "(Compute)-MP-1-Aether Production",
68 "ace-menlo-staging": "(Compute)-MP-1-Aether Staging"
Andy Bavier614af142020-08-07 14:49:56 -070069}
70
Andy Bavier5b4e28f2021-03-09 15:48:20 -070071# Legacy test status metrics, reporting a status code between -2 and 2
Andy Bavier4021a2f2020-07-29 12:39:47 -070072cp_status = prom.Gauge("aetheredge_status_control_plane", "Control plane status code", ["name"])
73up_status = prom.Gauge("aetheredge_status_user_plane", "User plane status code", ["name"])
Andy Bavier5b4e28f2021-03-09 15:48:20 -070074
75# Simplified binary test result metrics
Andy Bavier3c7b78d2021-03-11 14:16:43 -070076e2e_tests_ok = prom.Gauge("aetheredge_e2e_tests_ok", "Last connect and ping test both passed", ["name"])
Andy Baviera0c40aa2021-03-10 12:09:12 -070077connect_test_ok = prom.Gauge("aetheredge_connect_test_ok", "Last connect test passed", ["name"])
Andy Baviera0c40aa2021-03-10 12:09:12 -070078ping_test_ok = prom.Gauge("aetheredge_ping_test_ok", "Last ping test passed", ["name"])
Andy Bavier3c7b78d2021-03-11 14:16:43 -070079e2e_tests_down = prom.Gauge("aetheredge_e2e_tests_down", "E2E tests not reporting", ["name"])
Andy Bavier5b4e28f2021-03-09 15:48:20 -070080
Jeremy Ronquilloc7434622021-04-08 21:06:00 -070081# Speedtest ping metrics
Jeremy Ronquillof4200252021-02-13 16:11:04 -080082ping_dns_min = prom.Gauge("aetheredge_ping_dns_test_min","Last ping test minimum value",["name"])
83ping_dns_avg = prom.Gauge("aetheredge_ping_dns_test_avg","Last ping test average",["name"])
84ping_dns_max = prom.Gauge("aetheredge_ping_dns_test_max","Last ping test maximum value",["name"])
85ping_dns_stddev = prom.Gauge("aetheredge_ping_dns_test_stddev","Last ping test standard deviation",["name"])
86
Jeremy Ronquilloc7434622021-04-08 21:06:00 -070087# Speedtest iperf metrics
88iperf_cluster_downlink = prom.Gauge("aetheredge_iperf_cluster_downlink_test","Last iperf test downlink result",["name"])
89iperf_cluster_uplink = prom.Gauge("aetheredge_iperf_cluster_uplink_test","Last iperf test downlink result",["name"])
90
Hyunsun Moon200eba52021-04-05 21:31:54 -070091# Signal quality metrics in CESQ format not dB
92# RSRQ: >=53 excellent, 43 ~ 53 good, 33 ~ 43 mid, <=33 bad, 0 no signal
93# RSRP: >=20 excellent, 10 ~ 20 good, 0 ~ 10 mid, 0 no signal
94signal_quality_rsrq = prom.Gauge("aetheredge_signal_quality_rsrq", "Quality of the received signal", ["name"])
95signal_quality_rsrp = prom.Gauge("aetheredge_signal_quality_rsrp", "Power of the received signal", ["name"])
96
Andy Bavier5b4e28f2021-03-09 15:48:20 -070097# Other metrics
Andy Bavier4021a2f2020-07-29 12:39:47 -070098last_update = prom.Gauge("aetheredge_last_update", "Last reported test result", ["name"])
Andy Bavier614af142020-08-07 14:49:56 -070099maint_window = prom.Gauge("aetheredge_in_maintenance_window", "Currently in a maintenance window", ["name"])
100
101def is_my_event(event, name):
102 for field in ["summary", "location", "description"]:
Andy Bavier8a5c9872020-10-21 13:17:53 -0700103 fullname = name
104 if name.startswith("ace-"):
105 fullname = "%s-%s" % (name, AETHER_ENV)
106 if fullname in getattr(event, field, ""):
Andy Bavier614af142020-08-07 14:49:56 -0700107 return True
Andy Bavier0423cbd2020-10-23 10:50:29 -0700108 if fullname in room_mapping and room_mapping[fullname] in getattr(event, field, ""):
109 return True
Andy Bavier614af142020-08-07 14:49:56 -0700110 return False
111
Andy Bavierc41cf0c2020-09-02 14:49:21 -0700112def is_naive_datetime(d):
113 return d.tzinfo is None or d.tzinfo.utcoffset(d) is None
114
115def process_all_day_events(es):
116 for event in es:
117 if event.all_day:
118 # All day events have naive datetimes, which breaks comparisons
119 pacific = pytz.timezone('US/Pacific')
120 if is_naive_datetime(event.start):
121 event.start = pacific.localize(event.start)
122 if is_naive_datetime(event.end):
123 event.end = pacific.localize(event.end)
124
Andy Bavier614af142020-08-07 14:49:56 -0700125def in_maintenance_window(events, name, now):
126 for event in events:
127 if event.start < now and event.end > now:
128 if is_my_event(event, name):
129 return True
Andy Bavier614af142020-08-07 14:49:56 -0700130 return False
131
132def pull_maintenance_events():
133 while(True):
134 now = datetime.datetime.now(pytz.utc)
135 try:
136 es = events(SECRET_ICAL_URL, start = now)
Andy Bavierc41cf0c2020-09-02 14:49:21 -0700137 process_all_day_events(es)
Andy Bavier614af142020-08-07 14:49:56 -0700138 except Exception as e:
139 print(e)
140 else:
141 for edge in edges:
142 if 'maintenance' not in edge:
143 edge['maintenance'] = {}
144 edge['maintenance']['in_window'] = in_maintenance_window(es, edge['name'], now)
145 edge['maintenance']['last_update'] = time.time()
146 time.sleep(60)
Andy Bavier4021a2f2020-07-29 12:39:47 -0700147
148def time_out_stale_results():
149 for edge in edges:
150 time_elapsed = time.time() - edge["last_update"]
151 if time_elapsed > NO_RESULT_THRESHOLD:
152 edge['status']['control_plane'] = "no result"
153 edge['status']['user_plane'] = "no result"
Jeremy Ronquilloa944fbc2021-03-30 10:57:45 -0700154 edge['speedtest']['ping']['dns'] = {'min': 0.0,
155 'avg': 0.0,
156 'max': 0.0,
157 'stddev': 0.0}
Jeremy Ronquillo56b0a1e2021-04-09 00:26:18 -0700158 edge['speedtest']['iperf'] = {'cluster': {
159 'downlink': 0.0,
160 'uplink': 0.0
161 }
162 }
Hyunsun Moon200eba52021-04-05 21:31:54 -0700163 edge.pop('signal_quality', None)
Andy Bavier4021a2f2020-07-29 12:39:47 -0700164
Andy Baviere47157d2020-12-11 14:13:12 -0700165def remove_edge_from_metrics(name):
166 try:
167 cp_status.remove(name)
168 up_status.remove(name)
169 last_update.remove(name)
Andy Bavier3c7b78d2021-03-11 14:16:43 -0700170 e2e_tests_ok.remove(name)
Andy Baviera0c40aa2021-03-10 12:09:12 -0700171 connect_test_ok.remove(name)
Andy Baviera0c40aa2021-03-10 12:09:12 -0700172 ping_test_ok.remove(name)
Andy Bavier3c7b78d2021-03-11 14:16:43 -0700173 e2e_tests_down.remove(name)
Andy Bavier5b4e28f2021-03-09 15:48:20 -0700174 except:
175 pass
176
177 try:
Hyunsun Moon200eba52021-04-05 21:31:54 -0700178 ping_dns_min.remove(name)
179 ping_dns_avg.remove(name)
180 ping_dns_max.remove(name)
181 ping_dns_stddev.remove(name)
182 except:
183 pass
184
185 try:
Jeremy Ronquilloc7434622021-04-08 21:06:00 -0700186 iperf_cluster_downlink.remove(name)
187 iperf_cluster_uplink.remove(name)
188 except:
189 pass
190
191 try:
Hyunsun Moon200eba52021-04-05 21:31:54 -0700192 signal_quality_rsrq.remove(name)
193 signal_quality_rsrp.remove(name)
194 except:
195 pass
196
197 try:
Andy Baviere47157d2020-12-11 14:13:12 -0700198 maint_window.remove(name)
199 except:
200 pass
Andy Bavier4021a2f2020-07-29 12:39:47 -0700201
202@app.route('/edges/metrics', methods=['GET'])
203def get_prometheus_metrics():
204 res = []
205 time_out_stale_results()
206 for edge in edges:
Andy Bavier8a5c9872020-10-21 13:17:53 -0700207 if edge['name'] == "ace-example":
Andy Bavier4021a2f2020-07-29 12:39:47 -0700208 continue
209
Andy Bavier3c7b78d2021-03-11 14:16:43 -0700210 connect_status = edge['status']['control_plane']
211 ping_status = edge['status']['user_plane']
212
Jeremy Ronquilloc7434622021-04-08 21:06:00 -0700213 speedtest_ping_results_exist = True
Jeremy Ronquillof4200252021-02-13 16:11:04 -0800214 if edge['speedtest']['ping']['dns']['avg']:
215 ping_dns_min_result = edge['speedtest']['ping']['dns']['min']
216 ping_dns_avg_result = edge['speedtest']['ping']['dns']['avg']
217 ping_dns_max_result = edge['speedtest']['ping']['dns']['max']
218 ping_dns_stddev_result = edge['speedtest']['ping']['dns']['stddev']
219 else:
Jeremy Ronquilloc7434622021-04-08 21:06:00 -0700220 speedtest_ping_results_exist = False
221
222 speedtest_iperf_results_exist = True
223 if edge['speedtest']['iperf']['cluster']['downlink']:
224 iperf_cluster_downlink_result = edge['speedtest']['iperf']['cluster']['downlink']
225 iperf_cluster_uplink_result = edge['speedtest']['iperf']['cluster']['uplink']
226 else:
227 speedtest_iperf_results_exist = False
Jeremy Ronquillof4200252021-02-13 16:11:04 -0800228
Andy Bavier3c7b78d2021-03-11 14:16:43 -0700229 cp_status.labels(edge['name']).set(status_codes[connect_status])
230 up_status.labels(edge['name']).set(status_codes[ping_status])
Jeremy Ronquillof4200252021-02-13 16:11:04 -0800231
Andy Bavier4021a2f2020-07-29 12:39:47 -0700232 last_update.labels(edge['name']).set(edge['last_update'])
Andy Bavier614af142020-08-07 14:49:56 -0700233 if 'maintenance' in edge:
234 maint_window.labels(edge['name']).set(int(edge['maintenance']['in_window']))
Andy Bavier4021a2f2020-07-29 12:39:47 -0700235
Andy Baviera0c40aa2021-03-10 12:09:12 -0700236 connect_test_ok.labels(edge['name']).set(0)
Andy Baviera0c40aa2021-03-10 12:09:12 -0700237 ping_test_ok.labels(edge['name']).set(0)
Andy Bavier3c7b78d2021-03-11 14:16:43 -0700238 e2e_tests_ok.labels(edge['name']).set(0)
239 e2e_tests_down.labels(edge['name']).set(0)
240
241 if connect_status in ["error", "no result"] or ping_status in ["error", "no result"]:
242 e2e_tests_down.labels(edge['name']).set(1)
243 else:
244 if connect_status == "connected":
245 connect_test_ok.labels(edge['name']).set(1)
246 if ping_status == "connected":
247 ping_test_ok.labels(edge['name']).set(1)
248 if connect_status == "connected" and ping_status == "connected":
249 e2e_tests_ok.labels(edge['name']).set(1)
Andy Bavier5b4e28f2021-03-09 15:48:20 -0700250
Jeremy Ronquilloc7434622021-04-08 21:06:00 -0700251 if speedtest_ping_results_exist:
Jeremy Ronquillof4200252021-02-13 16:11:04 -0800252 ping_dns_min.labels(edge['name']).set(ping_dns_min_result)
253 ping_dns_avg.labels(edge['name']).set(ping_dns_avg_result)
254 ping_dns_max.labels(edge['name']).set(ping_dns_max_result)
255 ping_dns_stddev.labels(edge['name']).set(ping_dns_stddev_result)
256
Jeremy Ronquilloc7434622021-04-08 21:06:00 -0700257 if speedtest_iperf_results_exist:
258 iperf_cluster_downlink.labels(edge['name']).set(iperf_cluster_downlink_result)
259 iperf_cluster_uplink.labels(edge['name']).set(iperf_cluster_uplink_result)
260
Hyunsun Moon200eba52021-04-05 21:31:54 -0700261 if 'signal_quality' in edge.keys():
262 signal_quality_rsrq.labels(edge['name']).set(edge['signal_quality']['rsrq'])
263 signal_quality_rsrp.labels(edge['name']).set(edge['signal_quality']['rsrp'])
Jeremy Ronquillof4200252021-02-13 16:11:04 -0800264
Andy Bavier4021a2f2020-07-29 12:39:47 -0700265 res.append(prom.generate_latest(cp_status))
266 res.append(prom.generate_latest(up_status))
Jeremy Ronquillof4200252021-02-13 16:11:04 -0800267 res.append(prom.generate_latest(ping_dns_min))
268 res.append(prom.generate_latest(ping_dns_avg))
269 res.append(prom.generate_latest(ping_dns_max))
270 res.append(prom.generate_latest(ping_dns_stddev))
Jeremy Ronquilloc7434622021-04-08 21:06:00 -0700271 res.append(prom.generate_latest(iperf_cluster_downlink))
272 res.append(prom.generate_latest(iperf_cluster_uplink))
Andy Bavier4021a2f2020-07-29 12:39:47 -0700273 res.append(prom.generate_latest(last_update))
Andy Bavier614af142020-08-07 14:49:56 -0700274 res.append(prom.generate_latest(maint_window))
Andy Baviera0c40aa2021-03-10 12:09:12 -0700275 res.append(prom.generate_latest(connect_test_ok))
Andy Baviera0c40aa2021-03-10 12:09:12 -0700276 res.append(prom.generate_latest(ping_test_ok))
Andy Bavier3c7b78d2021-03-11 14:16:43 -0700277 res.append(prom.generate_latest(e2e_tests_ok))
278 res.append(prom.generate_latest(e2e_tests_down))
Hyunsun Moon200eba52021-04-05 21:31:54 -0700279 res.append(prom.generate_latest(signal_quality_rsrq))
280 res.append(prom.generate_latest(signal_quality_rsrp))
Andy Bavier614af142020-08-07 14:49:56 -0700281
Andy Bavier4021a2f2020-07-29 12:39:47 -0700282 return Response(res, mimetype="text/plain")
283
Hyunsun Moonf32ae9a2020-05-28 13:17:45 -0700284
285@app.route('/edges/healthz', methods=['GET'])
286def get_health():
287 return {'message': 'healthy'}
288
289
290@app.route('/edges', methods=['GET'])
291def get_edges():
Andy Bavier4021a2f2020-07-29 12:39:47 -0700292 time_out_stale_results()
Hyunsun Moonf32ae9a2020-05-28 13:17:45 -0700293 return jsonify({'edges': edges})
294
295
296@app.route('/edges/<string:name>', methods=['GET'])
297def get_edge(name):
Andy Bavier4021a2f2020-07-29 12:39:47 -0700298 time_out_stale_results()
Hyunsun Moonf32ae9a2020-05-28 13:17:45 -0700299 edge = [edge for edge in edges if edge['name'] == name]
300 if len(edge) == 0:
301 abort(404)
302 return jsonify({'edge': edge[0]})
303
304
305@app.route('/edges', methods=['POST'])
Andy Bavierf872e9a2021-03-22 12:06:25 -0700306@app.route('/testresults', methods=['POST'])
Hyunsun Moonf32ae9a2020-05-28 13:17:45 -0700307def create_or_update_edge():
308 if not request.json:
309 abort(400)
310 if 'name' not in request.json:
311 abort(400)
312 if 'status' not in request.json:
313 abort(400)
314
315 req_edge = {
316 'name': request.json['name'],
317 'status': {
318 'control_plane': request.json['status']['control_plane'],
319 'user_plane': request.json['status']['user_plane']
320 },
Jeremy Ronquillof4200252021-02-13 16:11:04 -0800321 'speedtest': {
322 'ping': {
323 'dns': {
Jeremy Ronquilloa944fbc2021-03-30 10:57:45 -0700324 'min': 0.0,
325 'avg': 0.0,
326 'max': 0.0,
327 'stddev': 0.0
Jeremy Ronquillof4200252021-02-13 16:11:04 -0800328 }
Jeremy Ronquilloc7434622021-04-08 21:06:00 -0700329 },
330 'iperf': {
331 'cluster': {
332 'downlink': 0.0,
333 'uplink': 0.0
334 }
Jeremy Ronquillof4200252021-02-13 16:11:04 -0800335 }
336 },
Hyunsun Moonf32ae9a2020-05-28 13:17:45 -0700337 'last_update': time.time()
338 }
339
Jeremy Ronquillof4200252021-02-13 16:11:04 -0800340 if 'speedtest' in request.json:
Jeremy Ronquilloc7434622021-04-08 21:06:00 -0700341 if 'ping' in request.json['speedtest']:
342 req_edge['speedtest']['ping'] = request.json['speedtest']['ping']
343 if 'iperf' in request.json['speedtest']:
344 req_edge['speedtest']['iperf'] = request.json['speedtest']['iperf']
Jeremy Ronquillof4200252021-02-13 16:11:04 -0800345
Hyunsun Moon200eba52021-04-05 21:31:54 -0700346 if 'signal_quality' in request.json:
347 req_edge['signal_quality'] = request.json['signal_quality']
Jeremy Ronquillof4200252021-02-13 16:11:04 -0800348
Hyunsun Moonf32ae9a2020-05-28 13:17:45 -0700349 edge = [edge for edge in edges if edge['name'] == req_edge['name']]
350 if len(edge) == 0:
351 print("new edge request " + req_edge['name'])
352 edges.append(req_edge)
353 else:
354 edge[0]['status']['control_plane'] = req_edge['status']['control_plane']
355 edge[0]['status']['user_plane'] = req_edge['status']['user_plane']
Jeremy Ronquillof4200252021-02-13 16:11:04 -0800356 edge[0]['speedtest']['ping'] = req_edge['speedtest']['ping']
Jeremy Ronquillo56b0a1e2021-04-09 00:26:18 -0700357 edge[0]['speedtest']['iperf'] = req_edge['speedtest']['iperf']
Hyunsun Moon200eba52021-04-05 21:31:54 -0700358 if 'signal_quality' in req_edge.keys():
359 edge[0]['signal_quality'] = req_edge['signal_quality']
Hyunsun Moonf32ae9a2020-05-28 13:17:45 -0700360 edge[0]['last_update'] = req_edge['last_update']
361
362 return jsonify({'edge': req_edge}), 201
363
364
Hyunsun Moon5f237ec2020-09-29 14:45:52 -0700365@app.route('/edges/<string:name>', methods=['DELETE'])
Andy Bavierf872e9a2021-03-22 12:06:25 -0700366@app.route('/testresults/<string:name>', methods=['DELETE'])
Hyunsun Moon5f237ec2020-09-29 14:45:52 -0700367def delete_edge(name):
368 print("delete edge request " + name)
369 result = False
370 for i in range(len(edges)):
371 if edges[i]['name'] == name:
372 del edges[i]
Andy Baviere47157d2020-12-11 14:13:12 -0700373 remove_edge_from_metrics(name)
Hyunsun Moon5f237ec2020-09-29 14:45:52 -0700374 result = True
375 break
376 if not result:
377 abort(404)
378 return jsonify({'result': True})
379
380
Hyunsun Moonf32ae9a2020-05-28 13:17:45 -0700381if __name__ == '__main__':
Andy Bavier8a5c9872020-10-21 13:17:53 -0700382 if SECRET_ICAL_URL and AETHER_ENV:
383 print(" * Starting maintenance calendar polling thread (Aether env: %s)" % AETHER_ENV)
Andy Bavier614af142020-08-07 14:49:56 -0700384 t = threading.Thread(target=pull_maintenance_events)
385 t.start()
Hyunsun Moonf32ae9a2020-05-28 13:17:45 -0700386 app.run(debug=True, host='0.0.0.0', port=80)