AETHER-510 Export maintenance window metric
Change-Id: I54aa39ec9cbc95162b2d6068f62348daa7da4d72
diff --git a/edge-monitoring/edge_monitoring_server.py b/edge-monitoring/edge_monitoring_server.py
index 41ed675..29d66f5 100755
--- a/edge-monitoring/edge_monitoring_server.py
+++ b/edge-monitoring/edge_monitoring_server.py
@@ -14,10 +14,16 @@
# See the License for the specific language governing permissions and
# limitations under the License.
+import os
import time
+import datetime
+import pytz
+import threading
+from icalevents.icalevents import events
from flask import Flask, jsonify, abort, request, Response
import prometheus_client as prom
+SECRET_ICAL_URL = os.environ.get("SECRET_ICAL_URL")
NO_RESULT_THRESHOLD = 720
app = Flask(__name__)
@@ -28,7 +34,7 @@
'control_plane': 'connected',
'user_plane': 'connected'
},
- 'last_update': time.time()
+ 'last_update': time.time(),
}
]
@@ -40,9 +46,45 @@
"connected": 2
}
+room_mapping = {
+ "production-edge-onf-menlo": "(Compute)-MP-1-Aether Production",
+ "production-edge-example": "(Compute)-MP-1-Aether Production" # for testing
+}
+
cp_status = prom.Gauge("aetheredge_status_control_plane", "Control plane status code", ["name"])
up_status = prom.Gauge("aetheredge_status_user_plane", "User plane status code", ["name"])
last_update = prom.Gauge("aetheredge_last_update", "Last reported test result", ["name"])
+maint_window = prom.Gauge("aetheredge_in_maintenance_window", "Currently in a maintenance window", ["name"])
+
+def is_my_event(event, name):
+ for field in ["summary", "location", "description"]:
+ if name in getattr(event, field, ""):
+ return True
+ return False
+
+def in_maintenance_window(events, name, now):
+ for event in events:
+ if event.start < now and event.end > now:
+ if is_my_event(event, name):
+ return True
+ if name in room_mapping and is_my_event(event, room_mapping[name]):
+ return True
+ return False
+
+def pull_maintenance_events():
+ while(True):
+ now = datetime.datetime.now(pytz.utc)
+ try:
+ es = events(SECRET_ICAL_URL, start = now)
+ except Exception as e:
+ print(e)
+ else:
+ for edge in edges:
+ if 'maintenance' not in edge:
+ edge['maintenance'] = {}
+ edge['maintenance']['in_window'] = in_maintenance_window(es, edge['name'], now)
+ edge['maintenance']['last_update'] = time.time()
+ time.sleep(60)
def time_out_stale_results():
for edge in edges:
@@ -63,10 +105,14 @@
cp_status.labels(edge['name']).set(status_codes[edge['status']['control_plane']])
up_status.labels(edge['name']).set(status_codes[edge['status']['user_plane']])
last_update.labels(edge['name']).set(edge['last_update'])
+ if 'maintenance' in edge:
+ maint_window.labels(edge['name']).set(int(edge['maintenance']['in_window']))
res.append(prom.generate_latest(cp_status))
res.append(prom.generate_latest(up_status))
res.append(prom.generate_latest(last_update))
+ res.append(prom.generate_latest(maint_window))
+
return Response(res, mimetype="text/plain")
@@ -121,4 +167,8 @@
if __name__ == '__main__':
+ if SECRET_ICAL_URL:
+ print(" * Starting maintenance calendar polling thread")
+ t = threading.Thread(target=pull_maintenance_events)
+ t.start()
app.run(debug=True, host='0.0.0.0', port=80)