blob: 16bfe9e17c073fc4277e1d2010fe223c76f40f89 [file] [log] [blame]
Hyunsun Moonf32ae9a2020-05-28 13:17:45 -07001#!/usr/bin/env python
2
3# Copyright 2020-present Open Networking Foundation
4#
Hyunsun Moon200eba52021-04-05 21:31:54 -07005# SPDX-License-Identifier: LicenseRef-ONF-Member-Only-1.0
Hyunsun Moonf32ae9a2020-05-28 13:17:45 -07006
Andy Bavier614af142020-08-07 14:49:56 -07007import os
Hyunsun Moonf32ae9a2020-05-28 13:17:45 -07008import time
Andy Bavier614af142020-08-07 14:49:56 -07009import datetime
10import pytz
11import threading
12from icalevents.icalevents import events
Andy Bavier4021a2f2020-07-29 12:39:47 -070013from flask import Flask, jsonify, abort, request, Response
14import prometheus_client as prom
15
Andy Bavier8a5c9872020-10-21 13:17:53 -070016# URL of maintenance calendar
Andy Bavier614af142020-08-07 14:49:56 -070017SECRET_ICAL_URL = os.environ.get("SECRET_ICAL_URL")
Andy Bavier8a5c9872020-10-21 13:17:53 -070018
19# Aether environment that the server is monitoring (e.g., "production")
20# To schedule downtime, postfix the cluster name with the env: "ace-tucson-production"
21AETHER_ENV = os.environ.get("AETHER_ENV", "production")
22
23# Move to "no result" status if we don't hear from agent for this many seconds
Andy Bavier4021a2f2020-07-29 12:39:47 -070024NO_RESULT_THRESHOLD = 720
Hyunsun Moonf32ae9a2020-05-28 13:17:45 -070025
26app = Flask(__name__)
27edges = [
28 {
Andy Bavier8a5c9872020-10-21 13:17:53 -070029 'name': 'ace-example',
Hyunsun Moonf32ae9a2020-05-28 13:17:45 -070030 'status': {
31 'control_plane': 'connected',
32 'user_plane': 'connected'
33 },
Jeremy Ronquillof4200252021-02-13 16:11:04 -080034 'speedtest': {
35 'ping': {
36 'dns': {
Jeremy Ronquilloa944fbc2021-03-30 10:57:45 -070037 'min': 0.0,
38 'avg': 0.0,
39 'max': 0.0,
40 'stddev': 0.0
Jeremy Ronquillof4200252021-02-13 16:11:04 -080041 }
42 }
43 },
Hyunsun Moon200eba52021-04-05 21:31:54 -070044 'signal_quality': {
45 'rsrq': 0,
46 'rsrp': 0
47 },
48 'last_update': time.time()
Hyunsun Moonf32ae9a2020-05-28 13:17:45 -070049 }
50]
51
Andy Bavier4021a2f2020-07-29 12:39:47 -070052status_codes = {
53 "no result": -2,
54 "error": -1,
55 "disconnected": 0,
56 "connecting": 1,
57 "connected": 2
58}
59
Andy Bavier614af142020-08-07 14:49:56 -070060room_mapping = {
Andy Bavier0423cbd2020-10-23 10:50:29 -070061 "ace-menlo-pixel-production": "(Compute)-MP-1-Aether Production",
62 "ace-menlo-staging": "(Compute)-MP-1-Aether Staging"
Andy Bavier614af142020-08-07 14:49:56 -070063}
64
Andy Bavier5b4e28f2021-03-09 15:48:20 -070065# Legacy test status metrics, reporting a status code between -2 and 2
Andy Bavier4021a2f2020-07-29 12:39:47 -070066cp_status = prom.Gauge("aetheredge_status_control_plane", "Control plane status code", ["name"])
67up_status = prom.Gauge("aetheredge_status_user_plane", "User plane status code", ["name"])
Andy Bavier5b4e28f2021-03-09 15:48:20 -070068
69# Simplified binary test result metrics
Andy Bavier3c7b78d2021-03-11 14:16:43 -070070e2e_tests_ok = prom.Gauge("aetheredge_e2e_tests_ok", "Last connect and ping test both passed", ["name"])
Andy Baviera0c40aa2021-03-10 12:09:12 -070071connect_test_ok = prom.Gauge("aetheredge_connect_test_ok", "Last connect test passed", ["name"])
Andy Baviera0c40aa2021-03-10 12:09:12 -070072ping_test_ok = prom.Gauge("aetheredge_ping_test_ok", "Last ping test passed", ["name"])
Andy Bavier3c7b78d2021-03-11 14:16:43 -070073e2e_tests_down = prom.Gauge("aetheredge_e2e_tests_down", "E2E tests not reporting", ["name"])
Andy Bavier5b4e28f2021-03-09 15:48:20 -070074
Jeremy Ronquillof4200252021-02-13 16:11:04 -080075# Speedtest metrics
76ping_dns_min = prom.Gauge("aetheredge_ping_dns_test_min","Last ping test minimum value",["name"])
77ping_dns_avg = prom.Gauge("aetheredge_ping_dns_test_avg","Last ping test average",["name"])
78ping_dns_max = prom.Gauge("aetheredge_ping_dns_test_max","Last ping test maximum value",["name"])
79ping_dns_stddev = prom.Gauge("aetheredge_ping_dns_test_stddev","Last ping test standard deviation",["name"])
80
Hyunsun Moon200eba52021-04-05 21:31:54 -070081# Signal quality metrics in CESQ format not dB
82# RSRQ: >=53 excellent, 43 ~ 53 good, 33 ~ 43 mid, <=33 bad, 0 no signal
83# RSRP: >=20 excellent, 10 ~ 20 good, 0 ~ 10 mid, 0 no signal
84signal_quality_rsrq = prom.Gauge("aetheredge_signal_quality_rsrq", "Quality of the received signal", ["name"])
85signal_quality_rsrp = prom.Gauge("aetheredge_signal_quality_rsrp", "Power of the received signal", ["name"])
86
Andy Bavier5b4e28f2021-03-09 15:48:20 -070087# Other metrics
Andy Bavier4021a2f2020-07-29 12:39:47 -070088last_update = prom.Gauge("aetheredge_last_update", "Last reported test result", ["name"])
Andy Bavier614af142020-08-07 14:49:56 -070089maint_window = prom.Gauge("aetheredge_in_maintenance_window", "Currently in a maintenance window", ["name"])
90
91def is_my_event(event, name):
92 for field in ["summary", "location", "description"]:
Andy Bavier8a5c9872020-10-21 13:17:53 -070093 fullname = name
94 if name.startswith("ace-"):
95 fullname = "%s-%s" % (name, AETHER_ENV)
96 if fullname in getattr(event, field, ""):
Andy Bavier614af142020-08-07 14:49:56 -070097 return True
Andy Bavier0423cbd2020-10-23 10:50:29 -070098 if fullname in room_mapping and room_mapping[fullname] in getattr(event, field, ""):
99 return True
Andy Bavier614af142020-08-07 14:49:56 -0700100 return False
101
Andy Bavierc41cf0c2020-09-02 14:49:21 -0700102def is_naive_datetime(d):
103 return d.tzinfo is None or d.tzinfo.utcoffset(d) is None
104
105def process_all_day_events(es):
106 for event in es:
107 if event.all_day:
108 # All day events have naive datetimes, which breaks comparisons
109 pacific = pytz.timezone('US/Pacific')
110 if is_naive_datetime(event.start):
111 event.start = pacific.localize(event.start)
112 if is_naive_datetime(event.end):
113 event.end = pacific.localize(event.end)
114
Andy Bavier614af142020-08-07 14:49:56 -0700115def in_maintenance_window(events, name, now):
116 for event in events:
117 if event.start < now and event.end > now:
118 if is_my_event(event, name):
119 return True
Andy Bavier614af142020-08-07 14:49:56 -0700120 return False
121
122def pull_maintenance_events():
123 while(True):
124 now = datetime.datetime.now(pytz.utc)
125 try:
126 es = events(SECRET_ICAL_URL, start = now)
Andy Bavierc41cf0c2020-09-02 14:49:21 -0700127 process_all_day_events(es)
Andy Bavier614af142020-08-07 14:49:56 -0700128 except Exception as e:
129 print(e)
130 else:
131 for edge in edges:
132 if 'maintenance' not in edge:
133 edge['maintenance'] = {}
134 edge['maintenance']['in_window'] = in_maintenance_window(es, edge['name'], now)
135 edge['maintenance']['last_update'] = time.time()
136 time.sleep(60)
Andy Bavier4021a2f2020-07-29 12:39:47 -0700137
138def time_out_stale_results():
139 for edge in edges:
140 time_elapsed = time.time() - edge["last_update"]
141 if time_elapsed > NO_RESULT_THRESHOLD:
142 edge['status']['control_plane'] = "no result"
143 edge['status']['user_plane'] = "no result"
Jeremy Ronquilloa944fbc2021-03-30 10:57:45 -0700144 edge['speedtest']['ping']['dns'] = {'min': 0.0,
145 'avg': 0.0,
146 'max': 0.0,
147 'stddev': 0.0}
Hyunsun Moon200eba52021-04-05 21:31:54 -0700148 edge.pop('signal_quality', None)
Andy Bavier4021a2f2020-07-29 12:39:47 -0700149
Andy Baviere47157d2020-12-11 14:13:12 -0700150def remove_edge_from_metrics(name):
151 try:
152 cp_status.remove(name)
153 up_status.remove(name)
154 last_update.remove(name)
Andy Bavier3c7b78d2021-03-11 14:16:43 -0700155 e2e_tests_ok.remove(name)
Andy Baviera0c40aa2021-03-10 12:09:12 -0700156 connect_test_ok.remove(name)
Andy Baviera0c40aa2021-03-10 12:09:12 -0700157 ping_test_ok.remove(name)
Andy Bavier3c7b78d2021-03-11 14:16:43 -0700158 e2e_tests_down.remove(name)
Andy Bavier5b4e28f2021-03-09 15:48:20 -0700159 except:
160 pass
161
162 try:
Hyunsun Moon200eba52021-04-05 21:31:54 -0700163 ping_dns_min.remove(name)
164 ping_dns_avg.remove(name)
165 ping_dns_max.remove(name)
166 ping_dns_stddev.remove(name)
167 except:
168 pass
169
170 try:
171 signal_quality_rsrq.remove(name)
172 signal_quality_rsrp.remove(name)
173 except:
174 pass
175
176 try:
Andy Baviere47157d2020-12-11 14:13:12 -0700177 maint_window.remove(name)
178 except:
179 pass
Andy Bavier4021a2f2020-07-29 12:39:47 -0700180
181@app.route('/edges/metrics', methods=['GET'])
182def get_prometheus_metrics():
183 res = []
184 time_out_stale_results()
185 for edge in edges:
Andy Bavier8a5c9872020-10-21 13:17:53 -0700186 if edge['name'] == "ace-example":
Andy Bavier4021a2f2020-07-29 12:39:47 -0700187 continue
188
Andy Bavier3c7b78d2021-03-11 14:16:43 -0700189 connect_status = edge['status']['control_plane']
190 ping_status = edge['status']['user_plane']
191
Jeremy Ronquilloa944fbc2021-03-30 10:57:45 -0700192 speedtest_results_exist = True
Jeremy Ronquillof4200252021-02-13 16:11:04 -0800193 if edge['speedtest']['ping']['dns']['avg']:
194 ping_dns_min_result = edge['speedtest']['ping']['dns']['min']
195 ping_dns_avg_result = edge['speedtest']['ping']['dns']['avg']
196 ping_dns_max_result = edge['speedtest']['ping']['dns']['max']
197 ping_dns_stddev_result = edge['speedtest']['ping']['dns']['stddev']
198 else:
Jeremy Ronquilloa944fbc2021-03-30 10:57:45 -0700199 speedtest_results_exist = False
Jeremy Ronquillof4200252021-02-13 16:11:04 -0800200
Andy Bavier3c7b78d2021-03-11 14:16:43 -0700201 cp_status.labels(edge['name']).set(status_codes[connect_status])
202 up_status.labels(edge['name']).set(status_codes[ping_status])
Jeremy Ronquillof4200252021-02-13 16:11:04 -0800203
Andy Bavier4021a2f2020-07-29 12:39:47 -0700204 last_update.labels(edge['name']).set(edge['last_update'])
Andy Bavier614af142020-08-07 14:49:56 -0700205 if 'maintenance' in edge:
206 maint_window.labels(edge['name']).set(int(edge['maintenance']['in_window']))
Andy Bavier4021a2f2020-07-29 12:39:47 -0700207
Andy Baviera0c40aa2021-03-10 12:09:12 -0700208 connect_test_ok.labels(edge['name']).set(0)
Andy Baviera0c40aa2021-03-10 12:09:12 -0700209 ping_test_ok.labels(edge['name']).set(0)
Andy Bavier3c7b78d2021-03-11 14:16:43 -0700210 e2e_tests_ok.labels(edge['name']).set(0)
211 e2e_tests_down.labels(edge['name']).set(0)
212
213 if connect_status in ["error", "no result"] or ping_status in ["error", "no result"]:
214 e2e_tests_down.labels(edge['name']).set(1)
215 else:
216 if connect_status == "connected":
217 connect_test_ok.labels(edge['name']).set(1)
218 if ping_status == "connected":
219 ping_test_ok.labels(edge['name']).set(1)
220 if connect_status == "connected" and ping_status == "connected":
221 e2e_tests_ok.labels(edge['name']).set(1)
Andy Bavier5b4e28f2021-03-09 15:48:20 -0700222
Jeremy Ronquilloa944fbc2021-03-30 10:57:45 -0700223 if speedtest_results_exist:
Jeremy Ronquillof4200252021-02-13 16:11:04 -0800224 ping_dns_min.labels(edge['name']).set(ping_dns_min_result)
225 ping_dns_avg.labels(edge['name']).set(ping_dns_avg_result)
226 ping_dns_max.labels(edge['name']).set(ping_dns_max_result)
227 ping_dns_stddev.labels(edge['name']).set(ping_dns_stddev_result)
228
Hyunsun Moon200eba52021-04-05 21:31:54 -0700229 if 'signal_quality' in edge.keys():
230 signal_quality_rsrq.labels(edge['name']).set(edge['signal_quality']['rsrq'])
231 signal_quality_rsrp.labels(edge['name']).set(edge['signal_quality']['rsrp'])
Jeremy Ronquillof4200252021-02-13 16:11:04 -0800232
Andy Bavier4021a2f2020-07-29 12:39:47 -0700233 res.append(prom.generate_latest(cp_status))
234 res.append(prom.generate_latest(up_status))
Jeremy Ronquillof4200252021-02-13 16:11:04 -0800235 res.append(prom.generate_latest(ping_dns_min))
236 res.append(prom.generate_latest(ping_dns_avg))
237 res.append(prom.generate_latest(ping_dns_max))
238 res.append(prom.generate_latest(ping_dns_stddev))
Andy Bavier4021a2f2020-07-29 12:39:47 -0700239 res.append(prom.generate_latest(last_update))
Andy Bavier614af142020-08-07 14:49:56 -0700240 res.append(prom.generate_latest(maint_window))
Andy Baviera0c40aa2021-03-10 12:09:12 -0700241 res.append(prom.generate_latest(connect_test_ok))
Andy Baviera0c40aa2021-03-10 12:09:12 -0700242 res.append(prom.generate_latest(ping_test_ok))
Andy Bavier3c7b78d2021-03-11 14:16:43 -0700243 res.append(prom.generate_latest(e2e_tests_ok))
244 res.append(prom.generate_latest(e2e_tests_down))
Hyunsun Moon200eba52021-04-05 21:31:54 -0700245 res.append(prom.generate_latest(signal_quality_rsrq))
246 res.append(prom.generate_latest(signal_quality_rsrp))
Andy Bavier614af142020-08-07 14:49:56 -0700247
Andy Bavier4021a2f2020-07-29 12:39:47 -0700248 return Response(res, mimetype="text/plain")
249
Hyunsun Moonf32ae9a2020-05-28 13:17:45 -0700250
251@app.route('/edges/healthz', methods=['GET'])
252def get_health():
253 return {'message': 'healthy'}
254
255
256@app.route('/edges', methods=['GET'])
257def get_edges():
Andy Bavier4021a2f2020-07-29 12:39:47 -0700258 time_out_stale_results()
Hyunsun Moonf32ae9a2020-05-28 13:17:45 -0700259 return jsonify({'edges': edges})
260
261
262@app.route('/edges/<string:name>', methods=['GET'])
263def get_edge(name):
Andy Bavier4021a2f2020-07-29 12:39:47 -0700264 time_out_stale_results()
Hyunsun Moonf32ae9a2020-05-28 13:17:45 -0700265 edge = [edge for edge in edges if edge['name'] == name]
266 if len(edge) == 0:
267 abort(404)
268 return jsonify({'edge': edge[0]})
269
270
271@app.route('/edges', methods=['POST'])
Andy Bavierf872e9a2021-03-22 12:06:25 -0700272@app.route('/testresults', methods=['POST'])
Hyunsun Moonf32ae9a2020-05-28 13:17:45 -0700273def create_or_update_edge():
274 if not request.json:
275 abort(400)
276 if 'name' not in request.json:
277 abort(400)
278 if 'status' not in request.json:
279 abort(400)
280
281 req_edge = {
282 'name': request.json['name'],
283 'status': {
284 'control_plane': request.json['status']['control_plane'],
285 'user_plane': request.json['status']['user_plane']
286 },
Jeremy Ronquillof4200252021-02-13 16:11:04 -0800287 'speedtest': {
288 'ping': {
289 'dns': {
Jeremy Ronquilloa944fbc2021-03-30 10:57:45 -0700290 'min': 0.0,
291 'avg': 0.0,
292 'max': 0.0,
293 'stddev': 0.0
Jeremy Ronquillof4200252021-02-13 16:11:04 -0800294 }
295 }
296 },
Hyunsun Moonf32ae9a2020-05-28 13:17:45 -0700297 'last_update': time.time()
298 }
299
Jeremy Ronquillof4200252021-02-13 16:11:04 -0800300 if 'speedtest' in request.json:
301 req_edge['speedtest'] = {
302 'ping': request.json['speedtest']['ping']
303 }
304
Hyunsun Moon200eba52021-04-05 21:31:54 -0700305 if 'signal_quality' in request.json:
306 req_edge['signal_quality'] = request.json['signal_quality']
Jeremy Ronquillof4200252021-02-13 16:11:04 -0800307
Hyunsun Moonf32ae9a2020-05-28 13:17:45 -0700308 edge = [edge for edge in edges if edge['name'] == req_edge['name']]
309 if len(edge) == 0:
310 print("new edge request " + req_edge['name'])
311 edges.append(req_edge)
312 else:
313 edge[0]['status']['control_plane'] = req_edge['status']['control_plane']
314 edge[0]['status']['user_plane'] = req_edge['status']['user_plane']
Jeremy Ronquillof4200252021-02-13 16:11:04 -0800315 edge[0]['speedtest']['ping'] = req_edge['speedtest']['ping']
Hyunsun Moon200eba52021-04-05 21:31:54 -0700316 if 'signal_quality' in req_edge.keys():
317 edge[0]['signal_quality'] = req_edge['signal_quality']
Hyunsun Moonf32ae9a2020-05-28 13:17:45 -0700318 edge[0]['last_update'] = req_edge['last_update']
319
320 return jsonify({'edge': req_edge}), 201
321
322
Hyunsun Moon5f237ec2020-09-29 14:45:52 -0700323@app.route('/edges/<string:name>', methods=['DELETE'])
Andy Bavierf872e9a2021-03-22 12:06:25 -0700324@app.route('/testresults/<string:name>', methods=['DELETE'])
Hyunsun Moon5f237ec2020-09-29 14:45:52 -0700325def delete_edge(name):
326 print("delete edge request " + name)
327 result = False
328 for i in range(len(edges)):
329 if edges[i]['name'] == name:
330 del edges[i]
Andy Baviere47157d2020-12-11 14:13:12 -0700331 remove_edge_from_metrics(name)
Hyunsun Moon5f237ec2020-09-29 14:45:52 -0700332 result = True
333 break
334 if not result:
335 abort(404)
336 return jsonify({'result': True})
337
338
Hyunsun Moonf32ae9a2020-05-28 13:17:45 -0700339if __name__ == '__main__':
Andy Bavier8a5c9872020-10-21 13:17:53 -0700340 if SECRET_ICAL_URL and AETHER_ENV:
341 print(" * Starting maintenance calendar polling thread (Aether env: %s)" % AETHER_ENV)
Andy Bavier614af142020-08-07 14:49:56 -0700342 t = threading.Thread(target=pull_maintenance_events)
343 t.start()
Hyunsun Moonf32ae9a2020-05-28 13:17:45 -0700344 app.run(debug=True, host='0.0.0.0', port=80)