Hyunsun Moon | f32ae9a | 2020-05-28 13:17:45 -0700 | [diff] [blame] | 1 | #!/usr/bin/env python |
| 2 | |
| 3 | # Copyright 2020-present Open Networking Foundation |
| 4 | # |
| 5 | # Licensed under the Apache License, Version 2.0 (the "License"); |
| 6 | # you may not use this file except in compliance with the License. |
| 7 | # You may obtain a copy of the License at |
| 8 | # |
| 9 | # http://www.apache.org/licenses/LICENSE-2.0 |
| 10 | # |
| 11 | # Unless required by applicable law or agreed to in writing, software |
| 12 | # distributed under the License is distributed on an "AS IS" BASIS, |
| 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 14 | # See the License for the specific language governing permissions and |
| 15 | # limitations under the License. |
| 16 | |
Andy Bavier | 614af14 | 2020-08-07 14:49:56 -0700 | [diff] [blame] | 17 | import os |
Hyunsun Moon | f32ae9a | 2020-05-28 13:17:45 -0700 | [diff] [blame] | 18 | import time |
Andy Bavier | 614af14 | 2020-08-07 14:49:56 -0700 | [diff] [blame] | 19 | import datetime |
| 20 | import pytz |
| 21 | import threading |
| 22 | from icalevents.icalevents import events |
Andy Bavier | 4021a2f | 2020-07-29 12:39:47 -0700 | [diff] [blame] | 23 | from flask import Flask, jsonify, abort, request, Response |
| 24 | import prometheus_client as prom |
| 25 | |
Andy Bavier | 8a5c987 | 2020-10-21 13:17:53 -0700 | [diff] [blame] | 26 | # URL of maintenance calendar |
Andy Bavier | 614af14 | 2020-08-07 14:49:56 -0700 | [diff] [blame] | 27 | SECRET_ICAL_URL = os.environ.get("SECRET_ICAL_URL") |
Andy Bavier | 8a5c987 | 2020-10-21 13:17:53 -0700 | [diff] [blame] | 28 | |
| 29 | # Aether environment that the server is monitoring (e.g., "production") |
| 30 | # To schedule downtime, postfix the cluster name with the env: "ace-tucson-production" |
| 31 | AETHER_ENV = os.environ.get("AETHER_ENV", "production") |
| 32 | |
| 33 | # Move to "no result" status if we don't hear from agent for this many seconds |
Andy Bavier | 4021a2f | 2020-07-29 12:39:47 -0700 | [diff] [blame] | 34 | NO_RESULT_THRESHOLD = 720 |
Hyunsun Moon | f32ae9a | 2020-05-28 13:17:45 -0700 | [diff] [blame] | 35 | |
| 36 | app = Flask(__name__) |
| 37 | edges = [ |
| 38 | { |
Andy Bavier | 8a5c987 | 2020-10-21 13:17:53 -0700 | [diff] [blame] | 39 | 'name': 'ace-example', |
Hyunsun Moon | f32ae9a | 2020-05-28 13:17:45 -0700 | [diff] [blame] | 40 | 'status': { |
| 41 | 'control_plane': 'connected', |
| 42 | 'user_plane': 'connected' |
| 43 | }, |
Andy Bavier | 614af14 | 2020-08-07 14:49:56 -0700 | [diff] [blame] | 44 | 'last_update': time.time(), |
Hyunsun Moon | f32ae9a | 2020-05-28 13:17:45 -0700 | [diff] [blame] | 45 | } |
| 46 | ] |
| 47 | |
Andy Bavier | 4021a2f | 2020-07-29 12:39:47 -0700 | [diff] [blame] | 48 | status_codes = { |
| 49 | "no result": -2, |
| 50 | "error": -1, |
| 51 | "disconnected": 0, |
| 52 | "connecting": 1, |
| 53 | "connected": 2 |
| 54 | } |
| 55 | |
Andy Bavier | 614af14 | 2020-08-07 14:49:56 -0700 | [diff] [blame] | 56 | room_mapping = { |
Andy Bavier | 0423cbd | 2020-10-23 10:50:29 -0700 | [diff] [blame] | 57 | "ace-menlo-pixel-production": "(Compute)-MP-1-Aether Production", |
| 58 | "ace-menlo-staging": "(Compute)-MP-1-Aether Staging" |
Andy Bavier | 614af14 | 2020-08-07 14:49:56 -0700 | [diff] [blame] | 59 | } |
| 60 | |
Andy Bavier | 4021a2f | 2020-07-29 12:39:47 -0700 | [diff] [blame] | 61 | cp_status = prom.Gauge("aetheredge_status_control_plane", "Control plane status code", ["name"]) |
| 62 | up_status = prom.Gauge("aetheredge_status_user_plane", "User plane status code", ["name"]) |
| 63 | last_update = prom.Gauge("aetheredge_last_update", "Last reported test result", ["name"]) |
Andy Bavier | 614af14 | 2020-08-07 14:49:56 -0700 | [diff] [blame] | 64 | maint_window = prom.Gauge("aetheredge_in_maintenance_window", "Currently in a maintenance window", ["name"]) |
| 65 | |
| 66 | def is_my_event(event, name): |
| 67 | for field in ["summary", "location", "description"]: |
Andy Bavier | 8a5c987 | 2020-10-21 13:17:53 -0700 | [diff] [blame] | 68 | fullname = name |
| 69 | if name.startswith("ace-"): |
| 70 | fullname = "%s-%s" % (name, AETHER_ENV) |
| 71 | if fullname in getattr(event, field, ""): |
Andy Bavier | 614af14 | 2020-08-07 14:49:56 -0700 | [diff] [blame] | 72 | return True |
Andy Bavier | 0423cbd | 2020-10-23 10:50:29 -0700 | [diff] [blame] | 73 | if fullname in room_mapping and room_mapping[fullname] in getattr(event, field, ""): |
| 74 | return True |
Andy Bavier | 614af14 | 2020-08-07 14:49:56 -0700 | [diff] [blame] | 75 | return False |
| 76 | |
Andy Bavier | c41cf0c | 2020-09-02 14:49:21 -0700 | [diff] [blame] | 77 | def is_naive_datetime(d): |
| 78 | return d.tzinfo is None or d.tzinfo.utcoffset(d) is None |
| 79 | |
| 80 | def process_all_day_events(es): |
| 81 | for event in es: |
| 82 | if event.all_day: |
| 83 | # All day events have naive datetimes, which breaks comparisons |
| 84 | pacific = pytz.timezone('US/Pacific') |
| 85 | if is_naive_datetime(event.start): |
| 86 | event.start = pacific.localize(event.start) |
| 87 | if is_naive_datetime(event.end): |
| 88 | event.end = pacific.localize(event.end) |
| 89 | |
Andy Bavier | 614af14 | 2020-08-07 14:49:56 -0700 | [diff] [blame] | 90 | def in_maintenance_window(events, name, now): |
| 91 | for event in events: |
| 92 | if event.start < now and event.end > now: |
| 93 | if is_my_event(event, name): |
| 94 | return True |
Andy Bavier | 614af14 | 2020-08-07 14:49:56 -0700 | [diff] [blame] | 95 | return False |
| 96 | |
| 97 | def pull_maintenance_events(): |
| 98 | while(True): |
| 99 | now = datetime.datetime.now(pytz.utc) |
| 100 | try: |
| 101 | es = events(SECRET_ICAL_URL, start = now) |
Andy Bavier | c41cf0c | 2020-09-02 14:49:21 -0700 | [diff] [blame] | 102 | process_all_day_events(es) |
Andy Bavier | 614af14 | 2020-08-07 14:49:56 -0700 | [diff] [blame] | 103 | except Exception as e: |
| 104 | print(e) |
| 105 | else: |
| 106 | for edge in edges: |
| 107 | if 'maintenance' not in edge: |
| 108 | edge['maintenance'] = {} |
| 109 | edge['maintenance']['in_window'] = in_maintenance_window(es, edge['name'], now) |
| 110 | edge['maintenance']['last_update'] = time.time() |
| 111 | time.sleep(60) |
Andy Bavier | 4021a2f | 2020-07-29 12:39:47 -0700 | [diff] [blame] | 112 | |
| 113 | def time_out_stale_results(): |
| 114 | for edge in edges: |
| 115 | time_elapsed = time.time() - edge["last_update"] |
| 116 | if time_elapsed > NO_RESULT_THRESHOLD: |
| 117 | edge['status']['control_plane'] = "no result" |
| 118 | edge['status']['user_plane'] = "no result" |
| 119 | |
Andy Bavier | e47157d | 2020-12-11 14:13:12 -0700 | [diff] [blame^] | 120 | def remove_edge_from_metrics(name): |
| 121 | try: |
| 122 | cp_status.remove(name) |
| 123 | up_status.remove(name) |
| 124 | last_update.remove(name) |
| 125 | except: |
| 126 | pass |
| 127 | |
| 128 | try: |
| 129 | maint_window.remove(name) |
| 130 | except: |
| 131 | pass |
Andy Bavier | 4021a2f | 2020-07-29 12:39:47 -0700 | [diff] [blame] | 132 | |
| 133 | @app.route('/edges/metrics', methods=['GET']) |
| 134 | def get_prometheus_metrics(): |
| 135 | res = [] |
| 136 | time_out_stale_results() |
| 137 | for edge in edges: |
Andy Bavier | 8a5c987 | 2020-10-21 13:17:53 -0700 | [diff] [blame] | 138 | if edge['name'] == "ace-example": |
Andy Bavier | 4021a2f | 2020-07-29 12:39:47 -0700 | [diff] [blame] | 139 | continue |
| 140 | |
| 141 | cp_status.labels(edge['name']).set(status_codes[edge['status']['control_plane']]) |
| 142 | up_status.labels(edge['name']).set(status_codes[edge['status']['user_plane']]) |
| 143 | last_update.labels(edge['name']).set(edge['last_update']) |
Andy Bavier | 614af14 | 2020-08-07 14:49:56 -0700 | [diff] [blame] | 144 | if 'maintenance' in edge: |
| 145 | maint_window.labels(edge['name']).set(int(edge['maintenance']['in_window'])) |
Andy Bavier | 4021a2f | 2020-07-29 12:39:47 -0700 | [diff] [blame] | 146 | |
| 147 | res.append(prom.generate_latest(cp_status)) |
| 148 | res.append(prom.generate_latest(up_status)) |
| 149 | res.append(prom.generate_latest(last_update)) |
Andy Bavier | 614af14 | 2020-08-07 14:49:56 -0700 | [diff] [blame] | 150 | res.append(prom.generate_latest(maint_window)) |
| 151 | |
Andy Bavier | 4021a2f | 2020-07-29 12:39:47 -0700 | [diff] [blame] | 152 | return Response(res, mimetype="text/plain") |
| 153 | |
Hyunsun Moon | f32ae9a | 2020-05-28 13:17:45 -0700 | [diff] [blame] | 154 | |
| 155 | @app.route('/edges/healthz', methods=['GET']) |
| 156 | def get_health(): |
| 157 | return {'message': 'healthy'} |
| 158 | |
| 159 | |
| 160 | @app.route('/edges', methods=['GET']) |
| 161 | def get_edges(): |
Andy Bavier | 4021a2f | 2020-07-29 12:39:47 -0700 | [diff] [blame] | 162 | time_out_stale_results() |
Hyunsun Moon | f32ae9a | 2020-05-28 13:17:45 -0700 | [diff] [blame] | 163 | return jsonify({'edges': edges}) |
| 164 | |
| 165 | |
| 166 | @app.route('/edges/<string:name>', methods=['GET']) |
| 167 | def get_edge(name): |
Andy Bavier | 4021a2f | 2020-07-29 12:39:47 -0700 | [diff] [blame] | 168 | time_out_stale_results() |
Hyunsun Moon | f32ae9a | 2020-05-28 13:17:45 -0700 | [diff] [blame] | 169 | edge = [edge for edge in edges if edge['name'] == name] |
| 170 | if len(edge) == 0: |
| 171 | abort(404) |
| 172 | return jsonify({'edge': edge[0]}) |
| 173 | |
| 174 | |
| 175 | @app.route('/edges', methods=['POST']) |
| 176 | def create_or_update_edge(): |
| 177 | if not request.json: |
| 178 | abort(400) |
| 179 | if 'name' not in request.json: |
| 180 | abort(400) |
| 181 | if 'status' not in request.json: |
| 182 | abort(400) |
| 183 | |
| 184 | req_edge = { |
| 185 | 'name': request.json['name'], |
| 186 | 'status': { |
| 187 | 'control_plane': request.json['status']['control_plane'], |
| 188 | 'user_plane': request.json['status']['user_plane'] |
| 189 | }, |
| 190 | 'last_update': time.time() |
| 191 | } |
| 192 | |
| 193 | edge = [edge for edge in edges if edge['name'] == req_edge['name']] |
| 194 | if len(edge) == 0: |
| 195 | print("new edge request " + req_edge['name']) |
| 196 | edges.append(req_edge) |
| 197 | else: |
| 198 | edge[0]['status']['control_plane'] = req_edge['status']['control_plane'] |
| 199 | edge[0]['status']['user_plane'] = req_edge['status']['user_plane'] |
| 200 | edge[0]['last_update'] = req_edge['last_update'] |
| 201 | |
| 202 | return jsonify({'edge': req_edge}), 201 |
| 203 | |
| 204 | |
Hyunsun Moon | 5f237ec | 2020-09-29 14:45:52 -0700 | [diff] [blame] | 205 | @app.route('/edges/<string:name>', methods=['DELETE']) |
| 206 | def delete_edge(name): |
| 207 | print("delete edge request " + name) |
| 208 | result = False |
| 209 | for i in range(len(edges)): |
| 210 | if edges[i]['name'] == name: |
| 211 | del edges[i] |
Andy Bavier | e47157d | 2020-12-11 14:13:12 -0700 | [diff] [blame^] | 212 | remove_edge_from_metrics(name) |
Hyunsun Moon | 5f237ec | 2020-09-29 14:45:52 -0700 | [diff] [blame] | 213 | result = True |
| 214 | break |
| 215 | if not result: |
| 216 | abort(404) |
| 217 | return jsonify({'result': True}) |
| 218 | |
| 219 | |
Hyunsun Moon | f32ae9a | 2020-05-28 13:17:45 -0700 | [diff] [blame] | 220 | if __name__ == '__main__': |
Andy Bavier | 8a5c987 | 2020-10-21 13:17:53 -0700 | [diff] [blame] | 221 | if SECRET_ICAL_URL and AETHER_ENV: |
| 222 | print(" * Starting maintenance calendar polling thread (Aether env: %s)" % AETHER_ENV) |
Andy Bavier | 614af14 | 2020-08-07 14:49:56 -0700 | [diff] [blame] | 223 | t = threading.Thread(target=pull_maintenance_events) |
| 224 | t.start() |
Hyunsun Moon | f32ae9a | 2020-05-28 13:17:45 -0700 | [diff] [blame] | 225 | app.run(debug=True, host='0.0.0.0', port=80) |