Andy Bavier | cd98690 | 2021-04-23 16:50:25 -0700 | [diff] [blame] | 1 | #!/usr/bin/env python |
| 2 | |
| 3 | # Copyright 2021-present Open Networking Foundation |
| 4 | # |
Andy Bavier | 200bd27 | 2022-06-09 11:15:51 -0700 | [diff] [blame] | 5 | # SPDX-License-Identifier: Apache-2.0 |
Andy Bavier | cd98690 | 2021-04-23 16:50:25 -0700 | [diff] [blame] | 6 | |
| 7 | # This implements a simple agent that polls the central Edge Monitoring Server |
| 8 | # to find out whether the local cluster is in a maintenance window, and exports |
| 9 | # the result as a Prometheus metric. The metric can be used to inhibit alerts |
| 10 | # from the local cluster. |
| 11 | |
| 12 | import os |
| 13 | import json |
| 14 | import time |
Andy Bavier | 34e13ba | 2022-06-09 15:33:56 -0700 | [diff] [blame^] | 15 | import base64 |
| 16 | import logging |
Andy Bavier | cd98690 | 2021-04-23 16:50:25 -0700 | [diff] [blame] | 17 | import threading |
| 18 | import urllib.request |
| 19 | from flask import Flask, Response |
| 20 | import prometheus_client as prom |
| 21 | |
| 22 | # URL of the Edge Monitoring Server where this edge's status can be fetched |
Andy Bavier | cd98690 | 2021-04-23 16:50:25 -0700 | [diff] [blame] | 23 | AETHER_EDGE_STATUS_URL = os.environ.get("AETHER_EDGE_STATUS_URL") |
| 24 | |
Andy Bavier | 34e13ba | 2022-06-09 15:33:56 -0700 | [diff] [blame^] | 25 | # For basic auth |
| 26 | AETHER_USERNAME = os.environ.get("AETHER_USERNAME") |
| 27 | AETHER_PASSWORD = os.environ.get("AETHER_PASSWORD") |
| 28 | |
Andy Bavier | cd98690 | 2021-04-23 16:50:25 -0700 | [diff] [blame] | 29 | # Seconds to sleep at end of loop |
| 30 | SLEEP_INTERVAL = 60 |
| 31 | |
| 32 | app = Flask(__name__) |
| 33 | maint_window = prom.Gauge("aetheredge_in_maintenance_window", "Currently in a maintenance window") |
| 34 | |
| 35 | def pull_maintenance_events(): |
| 36 | while True: |
| 37 | # Pull latest status |
Andy Bavier | 34e13ba | 2022-06-09 15:33:56 -0700 | [diff] [blame^] | 38 | app.logger.info("Pulling edge status from %s" % (AETHER_EDGE_STATUS_URL)) |
Andy Bavier | cd98690 | 2021-04-23 16:50:25 -0700 | [diff] [blame] | 39 | try: |
Andy Bavier | 34e13ba | 2022-06-09 15:33:56 -0700 | [diff] [blame^] | 40 | request = urllib.request.Request(AETHER_EDGE_STATUS_URL) |
| 41 | base64string = base64.b64encode(bytes('%s:%s' % (AETHER_USERNAME, AETHER_PASSWORD),'ascii')) |
| 42 | request.add_header("Authorization", "Basic %s" % base64string.decode('utf-8')) |
| 43 | response = urllib.request.urlopen(request) |
Andy Bavier | cd98690 | 2021-04-23 16:50:25 -0700 | [diff] [blame] | 44 | data = json.load(response) |
Andy Bavier | 34e13ba | 2022-06-09 15:33:56 -0700 | [diff] [blame^] | 45 | app.logger.debug(" Response: %s" % data) |
Andy Bavier | cd98690 | 2021-04-23 16:50:25 -0700 | [diff] [blame] | 46 | |
| 47 | # Export metric to Prometheus |
| 48 | in_window = data['edge']['maintenance']['in_window'] |
Andy Bavier | 34e13ba | 2022-06-09 15:33:56 -0700 | [diff] [blame^] | 49 | app.logger.info("In maintenance window: %s" % in_window) |
Andy Bavier | cd98690 | 2021-04-23 16:50:25 -0700 | [diff] [blame] | 50 | maint_window.set(int(in_window)) |
Andy Bavier | 34e13ba | 2022-06-09 15:33:56 -0700 | [diff] [blame^] | 51 | except Exception as e: |
| 52 | app.logger.warning("Could not retrieve edge status, will keep trying (%s)" % e) |
Andy Bavier | cd98690 | 2021-04-23 16:50:25 -0700 | [diff] [blame] | 53 | pass |
| 54 | |
| 55 | time.sleep(SLEEP_INTERVAL) |
| 56 | |
| 57 | @app.route('/metrics', methods=['GET']) |
| 58 | def get_prometheus_metrics(): |
| 59 | res = [] |
| 60 | res.append(prom.generate_latest(maint_window)) |
| 61 | return Response(res, mimetype="text/plain") |
| 62 | |
| 63 | @app.route('/healthz', methods=['GET']) |
| 64 | def get_health(): |
| 65 | return {'message': 'healthy'} |
| 66 | |
| 67 | if __name__ == '__main__': |
Andy Bavier | 34e13ba | 2022-06-09 15:33:56 -0700 | [diff] [blame^] | 68 | app.logger.setLevel(logging.INFO) |
Andy Bavier | cd98690 | 2021-04-23 16:50:25 -0700 | [diff] [blame] | 69 | if not (AETHER_EDGE_STATUS_URL): |
Andy Bavier | 34e13ba | 2022-06-09 15:33:56 -0700 | [diff] [blame^] | 70 | app.logger.error("AETHER_EDGE_STATUS_URL must be present in the local environment") |
Andy Bavier | cd98690 | 2021-04-23 16:50:25 -0700 | [diff] [blame] | 71 | exit(1) |
Andy Bavier | 34e13ba | 2022-06-09 15:33:56 -0700 | [diff] [blame^] | 72 | app.logger.info("Starting maintenance window polling thread") |
| 73 | app.logger.info("[AETHER_EDGE_STATUS_URL: %s" % AETHER_EDGE_STATUS_URL) |
Andy Bavier | cd98690 | 2021-04-23 16:50:25 -0700 | [diff] [blame] | 74 | t = threading.Thread(target=pull_maintenance_events) |
| 75 | t.start() |
Andy Bavier | 34e13ba | 2022-06-09 15:33:56 -0700 | [diff] [blame^] | 76 | app.run(debug=True, host='0.0.0.0', port=8080, use_reloader=False) |