Andy Bavier | cd98690 | 2021-04-23 16:50:25 -0700 | [diff] [blame^] | 1 | #!/usr/bin/env python |
| 2 | |
| 3 | # Copyright 2021-present Open Networking Foundation |
| 4 | # |
| 5 | # SPDX-License-Identifier: LicenseRef-ONF-Member-Only-1.0 |
| 6 | |
| 7 | # This implements a simple agent that polls the central Edge Monitoring Server |
| 8 | # to find out whether the local cluster is in a maintenance window, and exports |
| 9 | # the result as a Prometheus metric. The metric can be used to inhibit alerts |
| 10 | # from the local cluster. |
| 11 | |
| 12 | import os |
| 13 | import json |
| 14 | import time |
| 15 | import threading |
| 16 | import urllib.request |
| 17 | from flask import Flask, Response |
| 18 | import prometheus_client as prom |
| 19 | |
| 20 | # URL of the Edge Monitoring Server where this edge's status can be fetched |
| 21 | # I.e., put /<edge-name> at the end of the URL |
| 22 | AETHER_EDGE_STATUS_URL = os.environ.get("AETHER_EDGE_STATUS_URL") |
| 23 | |
| 24 | # Seconds to sleep at end of loop |
| 25 | SLEEP_INTERVAL = 60 |
| 26 | |
| 27 | app = Flask(__name__) |
| 28 | maint_window = prom.Gauge("aetheredge_in_maintenance_window", "Currently in a maintenance window") |
| 29 | |
| 30 | def pull_maintenance_events(): |
| 31 | while True: |
| 32 | # Pull latest status |
| 33 | print ("[INFO] Pulling edge status from %s" % AETHER_EDGE_STATUS_URL) |
| 34 | try: |
| 35 | response = urllib.request.urlopen(AETHER_EDGE_STATUS_URL) |
| 36 | data = json.load(response) |
| 37 | # print (" * Got: ", data) |
| 38 | |
| 39 | # Export metric to Prometheus |
| 40 | in_window = data['edge']['maintenance']['in_window'] |
| 41 | print ("[INFO] In maintenance window: %s" % in_window) |
| 42 | maint_window.set(int(in_window)) |
| 43 | except: |
| 44 | print("[WARN] Could not retrieve edge status, will keep trying") |
| 45 | pass |
| 46 | |
| 47 | time.sleep(SLEEP_INTERVAL) |
| 48 | |
| 49 | @app.route('/metrics', methods=['GET']) |
| 50 | def get_prometheus_metrics(): |
| 51 | res = [] |
| 52 | res.append(prom.generate_latest(maint_window)) |
| 53 | return Response(res, mimetype="text/plain") |
| 54 | |
| 55 | @app.route('/healthz', methods=['GET']) |
| 56 | def get_health(): |
| 57 | return {'message': 'healthy'} |
| 58 | |
| 59 | if __name__ == '__main__': |
| 60 | if not (AETHER_EDGE_STATUS_URL): |
| 61 | print("[ERROR] AETHER_EDGE_STATUS_URL must be present in the local environment") |
| 62 | exit(1) |
| 63 | print(" * Starting maintenance window polling thread") |
| 64 | print(" * AETHER_EDGE_STATUS_URL: %s" % AETHER_EDGE_STATUS_URL) |
| 65 | t = threading.Thread(target=pull_maintenance_events) |
| 66 | t.start() |
| 67 | app.run(debug=True, host='0.0.0.0', port=8080) |