blob: 082f75f64679ed07e9e4fe7abf876e73f95de6b6 [file] [log] [blame]
Andy Baviercd986902021-04-23 16:50:25 -07001#!/usr/bin/env python
2
3# Copyright 2021-present Open Networking Foundation
4#
Andy Bavier200bd272022-06-09 11:15:51 -07005# SPDX-License-Identifier: Apache-2.0
Andy Baviercd986902021-04-23 16:50:25 -07006
7# This implements a simple agent that polls the central Edge Monitoring Server
8# to find out whether the local cluster is in a maintenance window, and exports
9# the result as a Prometheus metric. The metric can be used to inhibit alerts
10# from the local cluster.
11
12import os
13import json
14import time
Andy Bavier34e13ba2022-06-09 15:33:56 -070015import base64
16import logging
Andy Baviercd986902021-04-23 16:50:25 -070017import threading
18import urllib.request
19from flask import Flask, Response
20import prometheus_client as prom
21
22# URL of the Edge Monitoring Server where this edge's status can be fetched
Andy Baviercd986902021-04-23 16:50:25 -070023AETHER_EDGE_STATUS_URL = os.environ.get("AETHER_EDGE_STATUS_URL")
24
Andy Bavier34e13ba2022-06-09 15:33:56 -070025# For basic auth
26AETHER_USERNAME = os.environ.get("AETHER_USERNAME")
27AETHER_PASSWORD = os.environ.get("AETHER_PASSWORD")
28
Andy Baviercd986902021-04-23 16:50:25 -070029# Seconds to sleep at end of loop
30SLEEP_INTERVAL = 60
31
32app = Flask(__name__)
33maint_window = prom.Gauge("aetheredge_in_maintenance_window", "Currently in a maintenance window")
34
35def pull_maintenance_events():
36 while True:
37 # Pull latest status
Andy Bavier34e13ba2022-06-09 15:33:56 -070038 app.logger.info("Pulling edge status from %s" % (AETHER_EDGE_STATUS_URL))
Andy Baviercd986902021-04-23 16:50:25 -070039 try:
Andy Bavier34e13ba2022-06-09 15:33:56 -070040 request = urllib.request.Request(AETHER_EDGE_STATUS_URL)
41 base64string = base64.b64encode(bytes('%s:%s' % (AETHER_USERNAME, AETHER_PASSWORD),'ascii'))
42 request.add_header("Authorization", "Basic %s" % base64string.decode('utf-8'))
43 response = urllib.request.urlopen(request)
Andy Baviercd986902021-04-23 16:50:25 -070044 data = json.load(response)
Andy Bavier34e13ba2022-06-09 15:33:56 -070045 app.logger.debug(" Response: %s" % data)
Andy Baviercd986902021-04-23 16:50:25 -070046
47 # Export metric to Prometheus
48 in_window = data['edge']['maintenance']['in_window']
Andy Bavier34e13ba2022-06-09 15:33:56 -070049 app.logger.info("In maintenance window: %s" % in_window)
Andy Baviercd986902021-04-23 16:50:25 -070050 maint_window.set(int(in_window))
Andy Bavier34e13ba2022-06-09 15:33:56 -070051 except Exception as e:
52 app.logger.warning("Could not retrieve edge status, will keep trying (%s)" % e)
Andy Baviercd986902021-04-23 16:50:25 -070053 pass
54
55 time.sleep(SLEEP_INTERVAL)
56
57@app.route('/metrics', methods=['GET'])
58def get_prometheus_metrics():
59 res = []
60 res.append(prom.generate_latest(maint_window))
61 return Response(res, mimetype="text/plain")
62
63@app.route('/healthz', methods=['GET'])
64def get_health():
65 return {'message': 'healthy'}
66
67if __name__ == '__main__':
Andy Bavier34e13ba2022-06-09 15:33:56 -070068 app.logger.setLevel(logging.INFO)
Andy Baviercd986902021-04-23 16:50:25 -070069 if not (AETHER_EDGE_STATUS_URL):
Andy Bavier34e13ba2022-06-09 15:33:56 -070070 app.logger.error("AETHER_EDGE_STATUS_URL must be present in the local environment")
Andy Baviercd986902021-04-23 16:50:25 -070071 exit(1)
Andy Bavier34e13ba2022-06-09 15:33:56 -070072 app.logger.info("Starting maintenance window polling thread")
73 app.logger.info("[AETHER_EDGE_STATUS_URL: %s" % AETHER_EDGE_STATUS_URL)
Andy Baviercd986902021-04-23 16:50:25 -070074 t = threading.Thread(target=pull_maintenance_events)
75 t.start()
Andy Bavier34e13ba2022-06-09 15:33:56 -070076 app.run(debug=True, host='0.0.0.0', port=8080, use_reloader=False)