blob: 20ddc947c12ab4e23bcdb95a3757b46fa6d06b4a [file] [log] [blame]
Hyunsun Moonf32ae9a2020-05-28 13:17:45 -07001#!/usr/bin/env python
2
3# Copyright 2020-present Open Networking Foundation
4#
5# Licensed under the Apache License, Version 2.0 (the "License");
6# you may not use this file except in compliance with the License.
7# You may obtain a copy of the License at
8#
9# http://www.apache.org/licenses/LICENSE-2.0
10#
11# Unless required by applicable law or agreed to in writing, software
12# distributed under the License is distributed on an "AS IS" BASIS,
13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14# See the License for the specific language governing permissions and
15# limitations under the License.
16
Andy Bavier614af142020-08-07 14:49:56 -070017import os
Hyunsun Moonf32ae9a2020-05-28 13:17:45 -070018import time
Andy Bavier614af142020-08-07 14:49:56 -070019import datetime
20import pytz
21import threading
22from icalevents.icalevents import events
Andy Bavier4021a2f2020-07-29 12:39:47 -070023from flask import Flask, jsonify, abort, request, Response
24import prometheus_client as prom
25
Andy Bavier8a5c9872020-10-21 13:17:53 -070026# URL of maintenance calendar
Andy Bavier614af142020-08-07 14:49:56 -070027SECRET_ICAL_URL = os.environ.get("SECRET_ICAL_URL")
Andy Bavier8a5c9872020-10-21 13:17:53 -070028
29# Aether environment that the server is monitoring (e.g., "production")
30# To schedule downtime, postfix the cluster name with the env: "ace-tucson-production"
31AETHER_ENV = os.environ.get("AETHER_ENV", "production")
32
33# Move to "no result" status if we don't hear from agent for this many seconds
Andy Bavier4021a2f2020-07-29 12:39:47 -070034NO_RESULT_THRESHOLD = 720
Hyunsun Moonf32ae9a2020-05-28 13:17:45 -070035
36app = Flask(__name__)
37edges = [
38 {
Andy Bavier8a5c9872020-10-21 13:17:53 -070039 'name': 'ace-example',
Hyunsun Moonf32ae9a2020-05-28 13:17:45 -070040 'status': {
41 'control_plane': 'connected',
42 'user_plane': 'connected'
43 },
Jeremy Ronquillof4200252021-02-13 16:11:04 -080044 'speedtest': {
45 'ping': {
46 'dns': {
Jeremy Ronquilloa944fbc2021-03-30 10:57:45 -070047 'min': 0.0,
48 'avg': 0.0,
49 'max': 0.0,
50 'stddev': 0.0
Jeremy Ronquillof4200252021-02-13 16:11:04 -080051 }
52 }
53 },
Andy Bavier614af142020-08-07 14:49:56 -070054 'last_update': time.time(),
Hyunsun Moonf32ae9a2020-05-28 13:17:45 -070055 }
56]
57
Andy Bavier4021a2f2020-07-29 12:39:47 -070058status_codes = {
59 "no result": -2,
60 "error": -1,
61 "disconnected": 0,
62 "connecting": 1,
63 "connected": 2
64}
65
Andy Bavier614af142020-08-07 14:49:56 -070066room_mapping = {
Andy Bavier0423cbd2020-10-23 10:50:29 -070067 "ace-menlo-pixel-production": "(Compute)-MP-1-Aether Production",
68 "ace-menlo-staging": "(Compute)-MP-1-Aether Staging"
Andy Bavier614af142020-08-07 14:49:56 -070069}
70
Andy Bavier5b4e28f2021-03-09 15:48:20 -070071# Legacy test status metrics, reporting a status code between -2 and 2
Andy Bavier4021a2f2020-07-29 12:39:47 -070072cp_status = prom.Gauge("aetheredge_status_control_plane", "Control plane status code", ["name"])
73up_status = prom.Gauge("aetheredge_status_user_plane", "User plane status code", ["name"])
Andy Bavier5b4e28f2021-03-09 15:48:20 -070074
75# Simplified binary test result metrics
Andy Bavier3c7b78d2021-03-11 14:16:43 -070076e2e_tests_ok = prom.Gauge("aetheredge_e2e_tests_ok", "Last connect and ping test both passed", ["name"])
Andy Baviera0c40aa2021-03-10 12:09:12 -070077connect_test_ok = prom.Gauge("aetheredge_connect_test_ok", "Last connect test passed", ["name"])
Andy Baviera0c40aa2021-03-10 12:09:12 -070078ping_test_ok = prom.Gauge("aetheredge_ping_test_ok", "Last ping test passed", ["name"])
Andy Bavier3c7b78d2021-03-11 14:16:43 -070079e2e_tests_down = prom.Gauge("aetheredge_e2e_tests_down", "E2E tests not reporting", ["name"])
Andy Bavier5b4e28f2021-03-09 15:48:20 -070080
Jeremy Ronquillof4200252021-02-13 16:11:04 -080081# Speedtest metrics
82ping_dns_min = prom.Gauge("aetheredge_ping_dns_test_min","Last ping test minimum value",["name"])
83ping_dns_avg = prom.Gauge("aetheredge_ping_dns_test_avg","Last ping test average",["name"])
84ping_dns_max = prom.Gauge("aetheredge_ping_dns_test_max","Last ping test maximum value",["name"])
85ping_dns_stddev = prom.Gauge("aetheredge_ping_dns_test_stddev","Last ping test standard deviation",["name"])
86
Andy Bavier5b4e28f2021-03-09 15:48:20 -070087# Other metrics
Andy Bavier4021a2f2020-07-29 12:39:47 -070088last_update = prom.Gauge("aetheredge_last_update", "Last reported test result", ["name"])
Andy Bavier614af142020-08-07 14:49:56 -070089maint_window = prom.Gauge("aetheredge_in_maintenance_window", "Currently in a maintenance window", ["name"])
90
91def is_my_event(event, name):
92 for field in ["summary", "location", "description"]:
Andy Bavier8a5c9872020-10-21 13:17:53 -070093 fullname = name
94 if name.startswith("ace-"):
95 fullname = "%s-%s" % (name, AETHER_ENV)
96 if fullname in getattr(event, field, ""):
Andy Bavier614af142020-08-07 14:49:56 -070097 return True
Andy Bavier0423cbd2020-10-23 10:50:29 -070098 if fullname in room_mapping and room_mapping[fullname] in getattr(event, field, ""):
99 return True
Andy Bavier614af142020-08-07 14:49:56 -0700100 return False
101
Andy Bavierc41cf0c2020-09-02 14:49:21 -0700102def is_naive_datetime(d):
103 return d.tzinfo is None or d.tzinfo.utcoffset(d) is None
104
105def process_all_day_events(es):
106 for event in es:
107 if event.all_day:
108 # All day events have naive datetimes, which breaks comparisons
109 pacific = pytz.timezone('US/Pacific')
110 if is_naive_datetime(event.start):
111 event.start = pacific.localize(event.start)
112 if is_naive_datetime(event.end):
113 event.end = pacific.localize(event.end)
114
Andy Bavier614af142020-08-07 14:49:56 -0700115def in_maintenance_window(events, name, now):
116 for event in events:
117 if event.start < now and event.end > now:
118 if is_my_event(event, name):
119 return True
Andy Bavier614af142020-08-07 14:49:56 -0700120 return False
121
122def pull_maintenance_events():
123 while(True):
124 now = datetime.datetime.now(pytz.utc)
125 try:
126 es = events(SECRET_ICAL_URL, start = now)
Andy Bavierc41cf0c2020-09-02 14:49:21 -0700127 process_all_day_events(es)
Andy Bavier614af142020-08-07 14:49:56 -0700128 except Exception as e:
129 print(e)
130 else:
131 for edge in edges:
132 if 'maintenance' not in edge:
133 edge['maintenance'] = {}
134 edge['maintenance']['in_window'] = in_maintenance_window(es, edge['name'], now)
135 edge['maintenance']['last_update'] = time.time()
136 time.sleep(60)
Andy Bavier4021a2f2020-07-29 12:39:47 -0700137
138def time_out_stale_results():
139 for edge in edges:
140 time_elapsed = time.time() - edge["last_update"]
141 if time_elapsed > NO_RESULT_THRESHOLD:
142 edge['status']['control_plane'] = "no result"
143 edge['status']['user_plane'] = "no result"
Jeremy Ronquilloa944fbc2021-03-30 10:57:45 -0700144 edge['speedtest']['ping']['dns'] = {'min': 0.0,
145 'avg': 0.0,
146 'max': 0.0,
147 'stddev': 0.0}
Andy Bavier4021a2f2020-07-29 12:39:47 -0700148
Andy Baviere47157d2020-12-11 14:13:12 -0700149def remove_edge_from_metrics(name):
150 try:
151 cp_status.remove(name)
152 up_status.remove(name)
Jeremy Ronquillof4200252021-02-13 16:11:04 -0800153 ping_dns_min.remove(name)
154 ping_dns_avg.remove(name)
155 ping_dns_max.remove(name)
156 ping_dns_stddev.remove(name)
Andy Baviere47157d2020-12-11 14:13:12 -0700157 last_update.remove(name)
Andy Bavier3c7b78d2021-03-11 14:16:43 -0700158 e2e_tests_ok.remove(name)
Andy Baviera0c40aa2021-03-10 12:09:12 -0700159 connect_test_ok.remove(name)
Andy Baviera0c40aa2021-03-10 12:09:12 -0700160 ping_test_ok.remove(name)
Andy Bavier3c7b78d2021-03-11 14:16:43 -0700161 e2e_tests_down.remove(name)
Andy Bavier5b4e28f2021-03-09 15:48:20 -0700162 except:
163 pass
164
165 try:
Andy Baviere47157d2020-12-11 14:13:12 -0700166 maint_window.remove(name)
167 except:
168 pass
Andy Bavier4021a2f2020-07-29 12:39:47 -0700169
170@app.route('/edges/metrics', methods=['GET'])
171def get_prometheus_metrics():
172 res = []
173 time_out_stale_results()
174 for edge in edges:
Andy Bavier8a5c9872020-10-21 13:17:53 -0700175 if edge['name'] == "ace-example":
Andy Bavier4021a2f2020-07-29 12:39:47 -0700176 continue
177
Andy Bavier3c7b78d2021-03-11 14:16:43 -0700178 connect_status = edge['status']['control_plane']
179 ping_status = edge['status']['user_plane']
180
Jeremy Ronquilloa944fbc2021-03-30 10:57:45 -0700181 speedtest_results_exist = True
Jeremy Ronquillof4200252021-02-13 16:11:04 -0800182 if edge['speedtest']['ping']['dns']['avg']:
183 ping_dns_min_result = edge['speedtest']['ping']['dns']['min']
184 ping_dns_avg_result = edge['speedtest']['ping']['dns']['avg']
185 ping_dns_max_result = edge['speedtest']['ping']['dns']['max']
186 ping_dns_stddev_result = edge['speedtest']['ping']['dns']['stddev']
187 else:
Jeremy Ronquilloa944fbc2021-03-30 10:57:45 -0700188 speedtest_results_exist = False
Jeremy Ronquillof4200252021-02-13 16:11:04 -0800189
Andy Bavier3c7b78d2021-03-11 14:16:43 -0700190 cp_status.labels(edge['name']).set(status_codes[connect_status])
191 up_status.labels(edge['name']).set(status_codes[ping_status])
Jeremy Ronquillof4200252021-02-13 16:11:04 -0800192
Andy Bavier4021a2f2020-07-29 12:39:47 -0700193 last_update.labels(edge['name']).set(edge['last_update'])
Andy Bavier614af142020-08-07 14:49:56 -0700194 if 'maintenance' in edge:
195 maint_window.labels(edge['name']).set(int(edge['maintenance']['in_window']))
Andy Bavier4021a2f2020-07-29 12:39:47 -0700196
Andy Baviera0c40aa2021-03-10 12:09:12 -0700197 connect_test_ok.labels(edge['name']).set(0)
Andy Baviera0c40aa2021-03-10 12:09:12 -0700198 ping_test_ok.labels(edge['name']).set(0)
Andy Bavier3c7b78d2021-03-11 14:16:43 -0700199 e2e_tests_ok.labels(edge['name']).set(0)
200 e2e_tests_down.labels(edge['name']).set(0)
201
202 if connect_status in ["error", "no result"] or ping_status in ["error", "no result"]:
203 e2e_tests_down.labels(edge['name']).set(1)
204 else:
205 if connect_status == "connected":
206 connect_test_ok.labels(edge['name']).set(1)
207 if ping_status == "connected":
208 ping_test_ok.labels(edge['name']).set(1)
209 if connect_status == "connected" and ping_status == "connected":
210 e2e_tests_ok.labels(edge['name']).set(1)
Andy Bavier5b4e28f2021-03-09 15:48:20 -0700211
Jeremy Ronquilloa944fbc2021-03-30 10:57:45 -0700212 if speedtest_results_exist:
Jeremy Ronquillof4200252021-02-13 16:11:04 -0800213 ping_dns_min.labels(edge['name']).set(ping_dns_min_result)
214 ping_dns_avg.labels(edge['name']).set(ping_dns_avg_result)
215 ping_dns_max.labels(edge['name']).set(ping_dns_max_result)
216 ping_dns_stddev.labels(edge['name']).set(ping_dns_stddev_result)
217
218
Andy Bavier4021a2f2020-07-29 12:39:47 -0700219 res.append(prom.generate_latest(cp_status))
220 res.append(prom.generate_latest(up_status))
Jeremy Ronquillof4200252021-02-13 16:11:04 -0800221 res.append(prom.generate_latest(ping_dns_min))
222 res.append(prom.generate_latest(ping_dns_avg))
223 res.append(prom.generate_latest(ping_dns_max))
224 res.append(prom.generate_latest(ping_dns_stddev))
Andy Bavier4021a2f2020-07-29 12:39:47 -0700225 res.append(prom.generate_latest(last_update))
Andy Bavier614af142020-08-07 14:49:56 -0700226 res.append(prom.generate_latest(maint_window))
Andy Baviera0c40aa2021-03-10 12:09:12 -0700227 res.append(prom.generate_latest(connect_test_ok))
Andy Baviera0c40aa2021-03-10 12:09:12 -0700228 res.append(prom.generate_latest(ping_test_ok))
Andy Bavier3c7b78d2021-03-11 14:16:43 -0700229 res.append(prom.generate_latest(e2e_tests_ok))
230 res.append(prom.generate_latest(e2e_tests_down))
Andy Bavier614af142020-08-07 14:49:56 -0700231
Andy Bavier4021a2f2020-07-29 12:39:47 -0700232 return Response(res, mimetype="text/plain")
233
Hyunsun Moonf32ae9a2020-05-28 13:17:45 -0700234
235@app.route('/edges/healthz', methods=['GET'])
236def get_health():
237 return {'message': 'healthy'}
238
239
240@app.route('/edges', methods=['GET'])
241def get_edges():
Andy Bavier4021a2f2020-07-29 12:39:47 -0700242 time_out_stale_results()
Hyunsun Moonf32ae9a2020-05-28 13:17:45 -0700243 return jsonify({'edges': edges})
244
245
246@app.route('/edges/<string:name>', methods=['GET'])
247def get_edge(name):
Andy Bavier4021a2f2020-07-29 12:39:47 -0700248 time_out_stale_results()
Hyunsun Moonf32ae9a2020-05-28 13:17:45 -0700249 edge = [edge for edge in edges if edge['name'] == name]
250 if len(edge) == 0:
251 abort(404)
252 return jsonify({'edge': edge[0]})
253
254
255@app.route('/edges', methods=['POST'])
Andy Bavierf872e9a2021-03-22 12:06:25 -0700256@app.route('/testresults', methods=['POST'])
Hyunsun Moonf32ae9a2020-05-28 13:17:45 -0700257def create_or_update_edge():
258 if not request.json:
259 abort(400)
260 if 'name' not in request.json:
261 abort(400)
262 if 'status' not in request.json:
263 abort(400)
264
265 req_edge = {
266 'name': request.json['name'],
267 'status': {
268 'control_plane': request.json['status']['control_plane'],
269 'user_plane': request.json['status']['user_plane']
270 },
Jeremy Ronquillof4200252021-02-13 16:11:04 -0800271 'speedtest': {
272 'ping': {
273 'dns': {
Jeremy Ronquilloa944fbc2021-03-30 10:57:45 -0700274 'min': 0.0,
275 'avg': 0.0,
276 'max': 0.0,
277 'stddev': 0.0
Jeremy Ronquillof4200252021-02-13 16:11:04 -0800278 }
279 }
280 },
Hyunsun Moonf32ae9a2020-05-28 13:17:45 -0700281 'last_update': time.time()
282 }
283
Jeremy Ronquillof4200252021-02-13 16:11:04 -0800284 if 'speedtest' in request.json:
285 req_edge['speedtest'] = {
286 'ping': request.json['speedtest']['ping']
287 }
288
289
Hyunsun Moonf32ae9a2020-05-28 13:17:45 -0700290 edge = [edge for edge in edges if edge['name'] == req_edge['name']]
291 if len(edge) == 0:
292 print("new edge request " + req_edge['name'])
293 edges.append(req_edge)
294 else:
295 edge[0]['status']['control_plane'] = req_edge['status']['control_plane']
296 edge[0]['status']['user_plane'] = req_edge['status']['user_plane']
Jeremy Ronquillof4200252021-02-13 16:11:04 -0800297 edge[0]['speedtest']['ping'] = req_edge['speedtest']['ping']
Hyunsun Moonf32ae9a2020-05-28 13:17:45 -0700298 edge[0]['last_update'] = req_edge['last_update']
299
300 return jsonify({'edge': req_edge}), 201
301
302
Hyunsun Moon5f237ec2020-09-29 14:45:52 -0700303@app.route('/edges/<string:name>', methods=['DELETE'])
Andy Bavierf872e9a2021-03-22 12:06:25 -0700304@app.route('/testresults/<string:name>', methods=['DELETE'])
Hyunsun Moon5f237ec2020-09-29 14:45:52 -0700305def delete_edge(name):
306 print("delete edge request " + name)
307 result = False
308 for i in range(len(edges)):
309 if edges[i]['name'] == name:
310 del edges[i]
Andy Baviere47157d2020-12-11 14:13:12 -0700311 remove_edge_from_metrics(name)
Hyunsun Moon5f237ec2020-09-29 14:45:52 -0700312 result = True
313 break
314 if not result:
315 abort(404)
316 return jsonify({'result': True})
317
318
Hyunsun Moonf32ae9a2020-05-28 13:17:45 -0700319if __name__ == '__main__':
Andy Bavier8a5c9872020-10-21 13:17:53 -0700320 if SECRET_ICAL_URL and AETHER_ENV:
321 print(" * Starting maintenance calendar polling thread (Aether env: %s)" % AETHER_ENV)
Andy Bavier614af142020-08-07 14:49:56 -0700322 t = threading.Thread(target=pull_maintenance_events)
323 t.start()
Hyunsun Moonf32ae9a2020-05-28 13:17:45 -0700324 app.run(debug=True, host='0.0.0.0', port=80)