Refactor binary test result metrics
Change-Id: Iaf5a078eb1b06ca2063e8d54778317335e7b5fd7
diff --git a/edge-monitoring/VERSION b/edge-monitoring/VERSION
index 8f0916f..4b9fcbe 100644
--- a/edge-monitoring/VERSION
+++ b/edge-monitoring/VERSION
@@ -1 +1 @@
-0.5.0
+0.5.1
diff --git a/edge-monitoring/edge_monitoring_server.py b/edge-monitoring/edge_monitoring_server.py
index 47ae6f3..8ebe8a9 100755
--- a/edge-monitoring/edge_monitoring_server.py
+++ b/edge-monitoring/edge_monitoring_server.py
@@ -53,15 +53,6 @@
"connected": 2
}
-# Collapse 5 levels into 3 for simpler Prometheus metrics
-result_mapping = {
- "no result": "no result",
- "error": "no result",
- "disconnected": "fail",
- "connecting": "fail",
- "connected": "ok"
-}
-
room_mapping = {
"ace-menlo-pixel-production": "(Compute)-MP-1-Aether Production",
"ace-menlo-staging": "(Compute)-MP-1-Aether Staging"
@@ -72,8 +63,10 @@
up_status = prom.Gauge("aetheredge_status_user_plane", "User plane status code", ["name"])
# Simplified binary test result metrics
-connect_test = prom.Gauge("aetheredge_connect_test", "Result of last connect test", ["name", "result"])
-ping_test = prom.Gauge("aetheredge_ping_test", "Result of last ping test", ["name", "result"])
+connect_test_ok = prom.Gauge("aetheredge_connect_test_ok", "Last connect test passed", ["name"])
+connect_test_down = prom.Gauge("aetheredge_connect_test_down", "Connect test not reporting", ["name"])
+ping_test_ok = prom.Gauge("aetheredge_ping_test_ok", "Last ping test passed", ["name"])
+ping_test_down = prom.Gauge("aetheredge_ping_test_down", "Ping test not reporting", ["name"])
# Other metrics
last_update = prom.Gauge("aetheredge_last_update", "Last reported test result", ["name"])
@@ -138,13 +131,10 @@
cp_status.remove(name)
up_status.remove(name)
last_update.remove(name)
- except:
- pass
-
- try:
- for result in list(set(result_mapping.values())):
- connect_test.remove(name, result)
- ping_test.remove(name, result)
+ connect_test_ok.remove(name)
+ connect_test_down.remove(name)
+ ping_test_ok.remove(name)
+ ping_test_down.remove(name)
except:
pass
@@ -156,7 +146,6 @@
@app.route('/edges/metrics', methods=['GET'])
def get_prometheus_metrics():
res = []
- result_list = list(set(result_mapping.values()))
time_out_stale_results()
for edge in edges:
if edge['name'] == "ace-example":
@@ -168,20 +157,28 @@
if 'maintenance' in edge:
maint_window.labels(edge['name']).set(int(edge['maintenance']['in_window']))
- for result in result_list:
- connect_test.labels(edge['name'], result).set(0)
- ping_test.labels(edge['name'], result).set(0)
- result = result_mapping[edge['status']['control_plane']]
- connect_test.labels(edge['name'], result).set(1)
- result = result_mapping[edge['status']['user_plane']]
- ping_test.labels(edge['name'], result).set(1)
+ connect_test_ok.labels(edge['name']).set(0)
+ connect_test_down.labels(edge['name']).set(0)
+ if edge['status']['control_plane'] == "connected":
+ connect_test_ok.labels(edge['name']).set(1)
+ if edge['status']['control_plane'] in ["error", "no result"]:
+ connect_test_down.labels(edge['name']).set(1)
+
+ ping_test_ok.labels(edge['name']).set(0)
+ ping_test_down.labels(edge['name']).set(0)
+ if edge['status']['user_plane'] == "connected":
+ ping_test_ok.labels(edge['name']).set(1)
+ if edge['status']['user_plane'] in ["error", "no result"]:
+ ping_test_down.labels(edge['name']).set(1)
res.append(prom.generate_latest(cp_status))
res.append(prom.generate_latest(up_status))
res.append(prom.generate_latest(last_update))
res.append(prom.generate_latest(maint_window))
- res.append(prom.generate_latest(connect_test))
- res.append(prom.generate_latest(ping_test))
+ res.append(prom.generate_latest(connect_test_ok))
+ res.append(prom.generate_latest(connect_test_down))
+ res.append(prom.generate_latest(ping_test_ok))
+ res.append(prom.generate_latest(ping_test_down))
return Response(res, mimetype="text/plain")
diff --git a/edge-monitoring/test_edge_monitoring_server.py b/edge-monitoring/test_edge_monitoring_server.py
index 9979c38..fa2a417 100755
--- a/edge-monitoring/test_edge_monitoring_server.py
+++ b/edge-monitoring/test_edge_monitoring_server.py
@@ -137,12 +137,10 @@
self.assertTrue('aetheredge_status_control_plane{name="ace-menlo-pixel"} 2.0' in data)
self.assertTrue('aetheredge_status_user_plane{name="ace-menlo-pixel"} 2.0' in data)
self.assertTrue('aetheredge_last_update{name="ace-menlo-pixel"}' in data)
- self.assertTrue('aetheredge_connect_test{name="ace-menlo-pixel",result="ok"} 1.0' in data)
- self.assertTrue('aetheredge_ping_test{name="ace-menlo-pixel",result="ok"} 1.0' in data)
- self.assertTrue('aetheredge_connect_test{name="ace-menlo-pixel",result="fail"} 0.0' in data)
- self.assertTrue('aetheredge_ping_test{name="ace-menlo-pixel",result="fail"} 0.0' in data)
- self.assertTrue('aetheredge_connect_test{name="ace-menlo-pixel",result="no result"} 0.0' in data)
- self.assertTrue('aetheredge_ping_test{name="ace-menlo-pixel",result="no result"} 0.0' in data)
+ self.assertTrue('aetheredge_connect_test_ok{name="ace-menlo-pixel"} 1.0' in data)
+ self.assertTrue('aetheredge_ping_test_ok{name="ace-menlo-pixel"} 1.0' in data)
+ self.assertTrue('aetheredge_connect_test_down{name="ace-menlo-pixel"} 0.0' in data)
+ self.assertTrue('aetheredge_ping_test_down{name="ace-menlo-pixel"} 0.0' in data)
response = self.app.delete('/edges/ace-menlo-pixel')
data = json.loads(response.get_data(as_text=True))