blob: 082f75f64679ed07e9e4fe7abf876e73f95de6b6 [file] [log] [blame]
#!/usr/bin/env python
# Copyright 2021-present Open Networking Foundation
#
# SPDX-License-Identifier: Apache-2.0
# This implements a simple agent that polls the central Edge Monitoring Server
# to find out whether the local cluster is in a maintenance window, and exports
# the result as a Prometheus metric. The metric can be used to inhibit alerts
# from the local cluster.
import os
import json
import time
import base64
import logging
import threading
import urllib.request
from flask import Flask, Response
import prometheus_client as prom
# URL of the Edge Monitoring Server where this edge's status can be fetched
AETHER_EDGE_STATUS_URL = os.environ.get("AETHER_EDGE_STATUS_URL")
# For basic auth
AETHER_USERNAME = os.environ.get("AETHER_USERNAME")
AETHER_PASSWORD = os.environ.get("AETHER_PASSWORD")
# Seconds to sleep at end of loop
SLEEP_INTERVAL = 60
app = Flask(__name__)
maint_window = prom.Gauge("aetheredge_in_maintenance_window", "Currently in a maintenance window")
def pull_maintenance_events():
while True:
# Pull latest status
app.logger.info("Pulling edge status from %s" % (AETHER_EDGE_STATUS_URL))
try:
request = urllib.request.Request(AETHER_EDGE_STATUS_URL)
base64string = base64.b64encode(bytes('%s:%s' % (AETHER_USERNAME, AETHER_PASSWORD),'ascii'))
request.add_header("Authorization", "Basic %s" % base64string.decode('utf-8'))
response = urllib.request.urlopen(request)
data = json.load(response)
app.logger.debug(" Response: %s" % data)
# Export metric to Prometheus
in_window = data['edge']['maintenance']['in_window']
app.logger.info("In maintenance window: %s" % in_window)
maint_window.set(int(in_window))
except Exception as e:
app.logger.warning("Could not retrieve edge status, will keep trying (%s)" % e)
pass
time.sleep(SLEEP_INTERVAL)
@app.route('/metrics', methods=['GET'])
def get_prometheus_metrics():
res = []
res.append(prom.generate_latest(maint_window))
return Response(res, mimetype="text/plain")
@app.route('/healthz', methods=['GET'])
def get_health():
return {'message': 'healthy'}
if __name__ == '__main__':
app.logger.setLevel(logging.INFO)
if not (AETHER_EDGE_STATUS_URL):
app.logger.error("AETHER_EDGE_STATUS_URL must be present in the local environment")
exit(1)
app.logger.info("Starting maintenance window polling thread")
app.logger.info("[AETHER_EDGE_STATUS_URL: %s" % AETHER_EDGE_STATUS_URL)
t = threading.Thread(target=pull_maintenance_events)
t.start()
app.run(debug=True, host='0.0.0.0', port=8080, use_reloader=False)