Add binary test result metrics to Prometheus exporter
Change-Id: I3dfc620ce397ef8eb919c0dc3bfc224b27687608
diff --git a/edge-monitoring/edge_monitoring_server.py b/edge-monitoring/edge_monitoring_server.py
index aa9de66..47ae6f3 100755
--- a/edge-monitoring/edge_monitoring_server.py
+++ b/edge-monitoring/edge_monitoring_server.py
@@ -53,13 +53,29 @@
"connected": 2
}
+# Collapse 5 levels into 3 for simpler Prometheus metrics
+result_mapping = {
+ "no result": "no result",
+ "error": "no result",
+ "disconnected": "fail",
+ "connecting": "fail",
+ "connected": "ok"
+}
+
room_mapping = {
"ace-menlo-pixel-production": "(Compute)-MP-1-Aether Production",
"ace-menlo-staging": "(Compute)-MP-1-Aether Staging"
}
+# Legacy test status metrics, reporting a status code between -2 and 2
cp_status = prom.Gauge("aetheredge_status_control_plane", "Control plane status code", ["name"])
up_status = prom.Gauge("aetheredge_status_user_plane", "User plane status code", ["name"])
+
+# Simplified binary test result metrics
+connect_test = prom.Gauge("aetheredge_connect_test", "Result of last connect test", ["name", "result"])
+ping_test = prom.Gauge("aetheredge_ping_test", "Result of last ping test", ["name", "result"])
+
+# Other metrics
last_update = prom.Gauge("aetheredge_last_update", "Last reported test result", ["name"])
maint_window = prom.Gauge("aetheredge_in_maintenance_window", "Currently in a maintenance window", ["name"])
@@ -126,6 +142,13 @@
pass
try:
+ for result in list(set(result_mapping.values())):
+ connect_test.remove(name, result)
+ ping_test.remove(name, result)
+ except:
+ pass
+
+ try:
maint_window.remove(name)
except:
pass
@@ -133,6 +156,7 @@
@app.route('/edges/metrics', methods=['GET'])
def get_prometheus_metrics():
res = []
+ result_list = list(set(result_mapping.values()))
time_out_stale_results()
for edge in edges:
if edge['name'] == "ace-example":
@@ -144,10 +168,20 @@
if 'maintenance' in edge:
maint_window.labels(edge['name']).set(int(edge['maintenance']['in_window']))
+ for result in result_list:
+ connect_test.labels(edge['name'], result).set(0)
+ ping_test.labels(edge['name'], result).set(0)
+ result = result_mapping[edge['status']['control_plane']]
+ connect_test.labels(edge['name'], result).set(1)
+ result = result_mapping[edge['status']['user_plane']]
+ ping_test.labels(edge['name'], result).set(1)
+
res.append(prom.generate_latest(cp_status))
res.append(prom.generate_latest(up_status))
res.append(prom.generate_latest(last_update))
res.append(prom.generate_latest(maint_window))
+ res.append(prom.generate_latest(connect_test))
+ res.append(prom.generate_latest(ping_test))
return Response(res, mimetype="text/plain")