blob: 68f06c4e94342f931c146d647f04f97f8b0458c9 [file] [log] [blame]
#!/usr/bin/env python
# Copyright 2021-present Open Networking Foundation
#
# SPDX-License-Identifier: LicenseRef-ONF-Member-Only-1.0
# This implements a simple agent that polls the central Edge Monitoring Server
# to find out whether the local cluster is in a maintenance window, and exports
# the result as a Prometheus metric. The metric can be used to inhibit alerts
# from the local cluster.
import os
import json
import time
import threading
import urllib.request
from flask import Flask, Response
import prometheus_client as prom
# URL of the Edge Monitoring Server where this edge's status can be fetched
# I.e., put /<edge-name> at the end of the URL
AETHER_EDGE_STATUS_URL = os.environ.get("AETHER_EDGE_STATUS_URL")
# Seconds to sleep at end of loop
SLEEP_INTERVAL = 60
app = Flask(__name__)
maint_window = prom.Gauge("aetheredge_in_maintenance_window", "Currently in a maintenance window")
def pull_maintenance_events():
while True:
# Pull latest status
print ("[INFO] Pulling edge status from %s" % AETHER_EDGE_STATUS_URL)
try:
response = urllib.request.urlopen(AETHER_EDGE_STATUS_URL)
data = json.load(response)
# print (" * Got: ", data)
# Export metric to Prometheus
in_window = data['edge']['maintenance']['in_window']
print ("[INFO] In maintenance window: %s" % in_window)
maint_window.set(int(in_window))
except:
print("[WARN] Could not retrieve edge status, will keep trying")
pass
time.sleep(SLEEP_INTERVAL)
@app.route('/metrics', methods=['GET'])
def get_prometheus_metrics():
res = []
res.append(prom.generate_latest(maint_window))
return Response(res, mimetype="text/plain")
@app.route('/healthz', methods=['GET'])
def get_health():
return {'message': 'healthy'}
if __name__ == '__main__':
if not (AETHER_EDGE_STATUS_URL):
print("[ERROR] AETHER_EDGE_STATUS_URL must be present in the local environment")
exit(1)
print(" * Starting maintenance window polling thread")
print(" * AETHER_EDGE_STATUS_URL: %s" % AETHER_EDGE_STATUS_URL)
t = threading.Thread(target=pull_maintenance_events)
t.start()
app.run(debug=True, host='0.0.0.0', port=8080)