AETHER-752 Fix scheduled downtime on wrong pod bug
Change-Id: I7f2771f6a8da852a0f944ae633435d2a5cf587ed
diff --git a/edge-monitoring/edge_monitoring_server.py b/edge-monitoring/edge_monitoring_server.py
index 1bea10c..c5bb2f4 100755
--- a/edge-monitoring/edge_monitoring_server.py
+++ b/edge-monitoring/edge_monitoring_server.py
@@ -23,13 +23,20 @@
from flask import Flask, jsonify, abort, request, Response
import prometheus_client as prom
+# URL of maintenance calendar
SECRET_ICAL_URL = os.environ.get("SECRET_ICAL_URL")
+
+# Aether environment that the server is monitoring (e.g., "production")
+# To schedule downtime, postfix the cluster name with the env: "ace-tucson-production"
+AETHER_ENV = os.environ.get("AETHER_ENV", "production")
+
+# Move to "no result" status if we don't hear from agent for this many seconds
NO_RESULT_THRESHOLD = 720
app = Flask(__name__)
edges = [
{
- 'name': 'production-edge-example',
+ 'name': 'ace-example',
'status': {
'control_plane': 'connected',
'user_plane': 'connected'
@@ -47,8 +54,7 @@
}
room_mapping = {
- "production-edge-onf-menlo": "(Compute)-MP-1-Aether Production",
- "production-edge-example": "(Compute)-MP-1-Aether Production" # for testing
+ "ace-menlo-pixel": "(Compute)-MP-1-Aether Production"
}
cp_status = prom.Gauge("aetheredge_status_control_plane", "Control plane status code", ["name"])
@@ -58,8 +64,14 @@
def is_my_event(event, name):
for field in ["summary", "location", "description"]:
- if name in getattr(event, field, ""):
+ fullname = name
+ if name.startswith("ace-"):
+ fullname = "%s-%s" % (name, AETHER_ENV)
+ if fullname in getattr(event, field, ""):
return True
+ if name in room_mapping:
+ if room_mapping[name] in getattr(event, field, ""):
+ return True
return False
def is_naive_datetime(d):
@@ -80,8 +92,6 @@
if event.start < now and event.end > now:
if is_my_event(event, name):
return True
- if name in room_mapping and is_my_event(event, room_mapping[name]):
- return True
return False
def pull_maintenance_events():
@@ -113,7 +123,7 @@
res = []
time_out_stale_results()
for edge in edges:
- if edge['name'] == "production-edge-example":
+ if edge['name'] == "ace-example":
continue
cp_status.labels(edge['name']).set(status_codes[edge['status']['control_plane']])
@@ -195,8 +205,8 @@
if __name__ == '__main__':
- if SECRET_ICAL_URL:
- print(" * Starting maintenance calendar polling thread")
+ if SECRET_ICAL_URL and AETHER_ENV:
+ print(" * Starting maintenance calendar polling thread (Aether env: %s)" % AETHER_ENV)
t = threading.Thread(target=pull_maintenance_events)
t.start()
app.run(debug=True, host='0.0.0.0', port=80)