AETHER-1585 Add edge maintenance agent
Change-Id: If4445db3581e1585416c1df7a035b31cd0b435e1
diff --git a/edge-monitoring/Dockerfile.server b/edge-monitoring/Dockerfile.server
index 7b44045..c64c39b 100644
--- a/edge-monitoring/Dockerfile.server
+++ b/edge-monitoring/Dockerfile.server
@@ -18,5 +18,6 @@
COPY requirements.txt ./
RUN pip install --no-cache-dir -r requirements.txt
COPY edge_monitoring_server.py ./
+COPY edge_maintenance_agent.py ./
CMD ["python", "edge_monitoring_server.py"]
diff --git a/edge-monitoring/VERSION b/edge-monitoring/VERSION
index b49b253..a918a2a 100644
--- a/edge-monitoring/VERSION
+++ b/edge-monitoring/VERSION
@@ -1 +1 @@
-0.5.6
+0.6.0
diff --git a/edge-monitoring/edge_maintenance_agent.py b/edge-monitoring/edge_maintenance_agent.py
new file mode 100755
index 0000000..68f06c4
--- /dev/null
+++ b/edge-monitoring/edge_maintenance_agent.py
@@ -0,0 +1,67 @@
+#!/usr/bin/env python
+
+# Copyright 2021-present Open Networking Foundation
+#
+# SPDX-License-Identifier: LicenseRef-ONF-Member-Only-1.0
+
+# This implements a simple agent that polls the central Edge Monitoring Server
+# to find out whether the local cluster is in a maintenance window, and exports
+# the result as a Prometheus metric. The metric can be used to inhibit alerts
+# from the local cluster.
+
+import os
+import json
+import time
+import threading
+import urllib.request
+from flask import Flask, Response
+import prometheus_client as prom
+
+# URL of the Edge Monitoring Server where this edge's status can be fetched
+# I.e., put /<edge-name> at the end of the URL
+AETHER_EDGE_STATUS_URL = os.environ.get("AETHER_EDGE_STATUS_URL")
+
+# Seconds to sleep at end of loop
+SLEEP_INTERVAL = 60
+
+app = Flask(__name__)
+maint_window = prom.Gauge("aetheredge_in_maintenance_window", "Currently in a maintenance window")
+
+def pull_maintenance_events():
+ while True:
+ # Pull latest status
+ print ("[INFO] Pulling edge status from %s" % AETHER_EDGE_STATUS_URL)
+ try:
+ response = urllib.request.urlopen(AETHER_EDGE_STATUS_URL)
+ data = json.load(response)
+ # print (" * Got: ", data)
+
+ # Export metric to Prometheus
+ in_window = data['edge']['maintenance']['in_window']
+ print ("[INFO] In maintenance window: %s" % in_window)
+ maint_window.set(int(in_window))
+ except:
+ print("[WARN] Could not retrieve edge status, will keep trying")
+ pass
+
+ time.sleep(SLEEP_INTERVAL)
+
+@app.route('/metrics', methods=['GET'])
+def get_prometheus_metrics():
+ res = []
+ res.append(prom.generate_latest(maint_window))
+ return Response(res, mimetype="text/plain")
+
+@app.route('/healthz', methods=['GET'])
+def get_health():
+ return {'message': 'healthy'}
+
+if __name__ == '__main__':
+ if not (AETHER_EDGE_STATUS_URL):
+ print("[ERROR] AETHER_EDGE_STATUS_URL must be present in the local environment")
+ exit(1)
+ print(" * Starting maintenance window polling thread")
+ print(" * AETHER_EDGE_STATUS_URL: %s" % AETHER_EDGE_STATUS_URL)
+ t = threading.Thread(target=pull_maintenance_events)
+ t.start()
+ app.run(debug=True, host='0.0.0.0', port=8080)