Changes to ManyEdgeConnectTestsFailing alert
Change-Id: I86262e2eccac1a59d5489ef6c7584d2f58c3dda3
diff --git a/monitoring/edge-monitoring-server/Chart.yaml b/monitoring/edge-monitoring-server/Chart.yaml
index 776de58..388d8ae 100644
--- a/monitoring/edge-monitoring-server/Chart.yaml
+++ b/monitoring/edge-monitoring-server/Chart.yaml
@@ -8,7 +8,7 @@
# This is the chart version. This version number should be incremented each time you make changes
# to the chart and its templates, including the app version.
# Versions are expected to follow Semantic Versioning (https://semver.org/)
-version: 0.4.2
+version: 0.4.3
# This is the version number of the application being deployed. This version number should be
# incremented each time you make changes to the application. Versions are not expected to
diff --git a/monitoring/edge-monitoring-server/templates/prometheusrule.yaml b/monitoring/edge-monitoring-server/templates/prometheusrule.yaml
index ecc6acf..d554431 100644
--- a/monitoring/edge-monitoring-server/templates/prometheusrule.yaml
+++ b/monitoring/edge-monitoring-server/templates/prometheusrule.yaml
@@ -43,12 +43,14 @@
for: 10m
labels:
severity: critical
+{{- if .Values.alerts.manyEdgeConnectTestsFailing }}
- alert: ManyEdgeConnectTestsFailing
annotations:
message: |
- {{`{{ $value | humanizePercentage }}`}} of the clusters are reporting UE connect failures.
+ Over half of the clusters are reporting UE connect failures.
expr: avg(clamp_max(aetheredge_connect_test_ok{endpoint="metrics80"} + aetheredge_in_maintenance_window{endpoint="metrics80"}, 1)) < 0.5
for: 10m
labels:
severity: critical
{{- end }}
+{{- end }}
diff --git a/monitoring/edge-monitoring-server/values.yaml b/monitoring/edge-monitoring-server/values.yaml
index e013840..a03e81f 100644
--- a/monitoring/edge-monitoring-server/values.yaml
+++ b/monitoring/edge-monitoring-server/values.yaml
@@ -21,6 +21,7 @@
alerts:
enabled: false
+ manyEdgeConnectTestsFailing: false
imagePullSecrets: []
nameOverride: ""