Tweak the metrics to support computing uptime using avg_over_time
Change-Id: I086784a15d621ff8cf401c1d4621bd618fdfe733
diff --git a/edge-monitoring/edge_monitoring_server.py b/edge-monitoring/edge_monitoring_server.py
index 8ebe8a9..2d0a9ed 100755
--- a/edge-monitoring/edge_monitoring_server.py
+++ b/edge-monitoring/edge_monitoring_server.py
@@ -63,10 +63,10 @@
up_status = prom.Gauge("aetheredge_status_user_plane", "User plane status code", ["name"])
# Simplified binary test result metrics
+e2e_tests_ok = prom.Gauge("aetheredge_e2e_tests_ok", "Last connect and ping test both passed", ["name"])
connect_test_ok = prom.Gauge("aetheredge_connect_test_ok", "Last connect test passed", ["name"])
-connect_test_down = prom.Gauge("aetheredge_connect_test_down", "Connect test not reporting", ["name"])
ping_test_ok = prom.Gauge("aetheredge_ping_test_ok", "Last ping test passed", ["name"])
-ping_test_down = prom.Gauge("aetheredge_ping_test_down", "Ping test not reporting", ["name"])
+e2e_tests_down = prom.Gauge("aetheredge_e2e_tests_down", "E2E tests not reporting", ["name"])
# Other metrics
last_update = prom.Gauge("aetheredge_last_update", "Last reported test result", ["name"])
@@ -131,10 +131,10 @@
cp_status.remove(name)
up_status.remove(name)
last_update.remove(name)
+ e2e_tests_ok.remove(name)
connect_test_ok.remove(name)
- connect_test_down.remove(name)
ping_test_ok.remove(name)
- ping_test_down.remove(name)
+ e2e_tests_down.remove(name)
except:
pass
@@ -151,34 +151,38 @@
if edge['name'] == "ace-example":
continue
- cp_status.labels(edge['name']).set(status_codes[edge['status']['control_plane']])
- up_status.labels(edge['name']).set(status_codes[edge['status']['user_plane']])
+ connect_status = edge['status']['control_plane']
+ ping_status = edge['status']['user_plane']
+
+ cp_status.labels(edge['name']).set(status_codes[connect_status])
+ up_status.labels(edge['name']).set(status_codes[ping_status])
last_update.labels(edge['name']).set(edge['last_update'])
if 'maintenance' in edge:
maint_window.labels(edge['name']).set(int(edge['maintenance']['in_window']))
connect_test_ok.labels(edge['name']).set(0)
- connect_test_down.labels(edge['name']).set(0)
- if edge['status']['control_plane'] == "connected":
- connect_test_ok.labels(edge['name']).set(1)
- if edge['status']['control_plane'] in ["error", "no result"]:
- connect_test_down.labels(edge['name']).set(1)
-
ping_test_ok.labels(edge['name']).set(0)
- ping_test_down.labels(edge['name']).set(0)
- if edge['status']['user_plane'] == "connected":
- ping_test_ok.labels(edge['name']).set(1)
- if edge['status']['user_plane'] in ["error", "no result"]:
- ping_test_down.labels(edge['name']).set(1)
+ e2e_tests_ok.labels(edge['name']).set(0)
+ e2e_tests_down.labels(edge['name']).set(0)
+
+ if connect_status in ["error", "no result"] or ping_status in ["error", "no result"]:
+ e2e_tests_down.labels(edge['name']).set(1)
+ else:
+ if connect_status == "connected":
+ connect_test_ok.labels(edge['name']).set(1)
+ if ping_status == "connected":
+ ping_test_ok.labels(edge['name']).set(1)
+ if connect_status == "connected" and ping_status == "connected":
+ e2e_tests_ok.labels(edge['name']).set(1)
res.append(prom.generate_latest(cp_status))
res.append(prom.generate_latest(up_status))
res.append(prom.generate_latest(last_update))
res.append(prom.generate_latest(maint_window))
res.append(prom.generate_latest(connect_test_ok))
- res.append(prom.generate_latest(connect_test_down))
res.append(prom.generate_latest(ping_test_ok))
- res.append(prom.generate_latest(ping_test_down))
+ res.append(prom.generate_latest(e2e_tests_ok))
+ res.append(prom.generate_latest(e2e_tests_down))
return Response(res, mimetype="text/plain")