EDGEPOD-157 Upgrade Aether dashboards for better support of multi-sites
Change-Id: I525b8a75739a0e8271dd0a532ba52faf2627534c
diff --git a/nem-monitoring/grafana-dashboards/multi-site-system-summary.json b/nem-monitoring/grafana-dashboards/multi-site-system-summary.json
new file mode 100644
index 0000000..64b6bb5
--- /dev/null
+++ b/nem-monitoring/grafana-dashboards/multi-site-system-summary.json
@@ -0,0 +1,508 @@
+{
+ "annotations": {
+ "list": [
+ {
+ "builtIn": 1,
+ "datasource": "-- Grafana --",
+ "enable": true,
+ "hide": true,
+ "iconColor": "rgba(0, 211, 255, 1)",
+ "name": "Annotations & Alerts",
+ "type": "dashboard"
+ }
+ ]
+ },
+ "description": "Support Node Exporter v0.16 and above.",
+ "editable": true,
+ "gnetId": 11074,
+ "graphTooltip": 0,
+ "id": 9,
+ "iteration": 1580631219620,
+ "links": [],
+ "panels": [
+ {
+ "collapsed": false,
+ "datasource": null,
+ "gridPos": {
+ "h": 1,
+ "w": 24,
+ "x": 0,
+ "y": 0
+ },
+ "id": 179,
+ "panels": [],
+ "repeat": "site",
+ "title": "$site",
+ "type": "row"
+ },
+ {
+ "datasource": "Prometheus",
+ "gridPos": {
+ "h": 10,
+ "w": 8,
+ "x": 0,
+ "y": 1
+ },
+ "id": 177,
+ "options": {
+ "displayMode": "lcd",
+ "fieldOptions": {
+ "calcs": [
+ "last"
+ ],
+ "defaults": {
+ "mappings": [],
+ "max": 100,
+ "min": 0,
+ "thresholds": [
+ {
+ "color": "green",
+ "value": null
+ },
+ {
+ "color": "#EAB839",
+ "value": 60
+ },
+ {
+ "color": "red",
+ "value": 80
+ }
+ ],
+ "title": "",
+ "unit": "percent"
+ },
+ "override": {},
+ "values": false
+ },
+ "orientation": "horizontal"
+ },
+ "pluginVersion": "6.4.2",
+ "repeat": null,
+ "repeatDirection": "v",
+ "targets": [
+ {
+ "expr": "100 - (avg(irate(node_cpu_seconds_total{datacenter=\"$site\",mode=\"idle\"}[30m])) * 100)",
+ "instant": true,
+ "legendFormat": "CPU Busy",
+ "refId": "A"
+ },
+ {
+ "expr": "avg(irate(node_cpu_seconds_total{datacenter=\"$site\",mode=\"iowait\"}[30m])) * 100",
+ "hide": true,
+ "instant": true,
+ "legendFormat": "Busy Iowait",
+ "refId": "C"
+ },
+ {
+ "expr": "(1 - (node_memory_MemAvailable_bytes{datacenter=\"$site\"} / (node_memory_MemTotal_bytes{datacenter=\"$site\"})))* 100",
+ "instant": true,
+ "legendFormat": "Memory Usage ({{kubernetes_node}})",
+ "refId": "B"
+ },
+ {
+ "expr": "100 - ((node_filesystem_avail_bytes{datacenter=\"$site\",mountpoint=\"$maxmount\",fstype=~\"ext4|xfs\"} * 100) / node_filesystem_size_bytes{datacenter=\"$site\",mountpoint=\"$maxmount\",fstype=~\"ext4|xfs\"})",
+ "hide": false,
+ "instant": true,
+ "legendFormat": "Disk Usage ({{kubernetes_node}})",
+ "refId": "D"
+ }
+ ],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "",
+ "type": "bargauge"
+ },
+ {
+ "aliasColors": {
+ "15分钟": "#6ED0E0",
+ "1分钟": "#BF1B00",
+ "5分钟": "#CCA300"
+ },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "Prometheus",
+ "editable": true,
+ "error": false,
+ "fill": 1,
+ "fillGradient": 1,
+ "grid": {},
+ "gridPos": {
+ "h": 10,
+ "w": 8,
+ "x": 8,
+ "y": 1
+ },
+ "height": "300",
+ "id": 13,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "max": true,
+ "min": false,
+ "rightSide": false,
+ "show": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "maxPerRow": 6,
+ "nullPointMode": "null as zero",
+ "options": {
+ "dataLinks": []
+ },
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "repeat": null,
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "node_load1{datacenter=\"$site\"}",
+ "format": "time_series",
+ "hide": true,
+ "instant": false,
+ "interval": "",
+ "intervalFactor": 1,
+ "legendFormat": "{{kubernetes_node}}_1m",
+ "metric": "",
+ "refId": "A",
+ "step": 20,
+ "target": ""
+ },
+ {
+ "expr": "node_load5{datacenter=\"$site\"}",
+ "format": "time_series",
+ "instant": false,
+ "interval": "",
+ "intervalFactor": 1,
+ "legendFormat": "{{kubernetes_node}}",
+ "refId": "B",
+ "step": 20
+ },
+ {
+ "expr": "node_load15{datacenter=\"$site\"}",
+ "format": "time_series",
+ "hide": true,
+ "instant": false,
+ "interval": "",
+ "intervalFactor": 1,
+ "legendFormat": "{{kubernetes_node}}_15m",
+ "refId": "C",
+ "step": 20
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "System Load (5m average)",
+ "tooltip": {
+ "msResolution": false,
+ "shared": true,
+ "sort": 2,
+ "value_type": "cumulative"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {
+ "192.168.10.227:9100_em1_in下载": "super-light-green",
+ "192.168.10.227:9100_em1_out上传": "dark-blue"
+ },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "Prometheus",
+ "fill": 1,
+ "fillGradient": 3,
+ "gridPos": {
+ "h": 10,
+ "w": 8,
+ "x": 16,
+ "y": 1
+ },
+ "height": "300",
+ "id": 157,
+ "legend": {
+ "alignAsTable": true,
+ "avg": false,
+ "current": true,
+ "hideEmpty": true,
+ "hideZero": true,
+ "max": true,
+ "min": false,
+ "rightSide": false,
+ "show": true,
+ "sort": "current",
+ "sortDesc": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "options": {
+ "dataLinks": []
+ },
+ "percentage": false,
+ "pointradius": 0.5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [
+ {
+ "alias": "/.*_transmit$/",
+ "transform": "negative-Y"
+ }
+ ],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "irate(node_network_receive_bytes_total{datacenter=\"$site\",device!~'tap.*|veth.*|br.*|docker.*|virbr*|lo*|tunl0|cali.*'}[30m])*8",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "{{kubernetes_node}}_{{device}}_receive",
+ "refId": "A",
+ "step": 4
+ },
+ {
+ "expr": "irate(node_network_transmit_bytes_total{datacenter=\"$site\",device!~'tap.*|veth.*|br.*|docker.*|virbr*|lo*|tunl0|cali.*'}[30m])*8",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "{{kubernetes_node}}_{{device}}_transmit",
+ "refId": "B",
+ "step": 4
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "Network Traffic",
+ "tooltip": {
+ "shared": true,
+ "sort": 2,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "bps",
+ "label": "transmit(-)/receive(+)",
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ }
+ ],
+ "refresh": false,
+ "schemaVersion": 20,
+ "style": "dark",
+ "tags": [
+ "aether"
+ ],
+ "templating": {
+ "list": [
+ {
+ "allValue": null,
+ "current": {
+ "tags": [],
+ "text": "All",
+ "value": [
+ "$__all"
+ ]
+ },
+ "datasource": "Prometheus",
+ "definition": "label_values(node_uname_info, datacenter)",
+ "hide": 0,
+ "includeAll": true,
+ "label": "Site",
+ "multi": true,
+ "name": "site",
+ "query": "label_values(node_uname_info, datacenter)",
+ "refresh": 2,
+ "regex": "",
+ "skipUrlSync": false,
+ "sort": 0,
+ "tagValuesQuery": "",
+ "tags": [],
+ "tagsQuery": "",
+ "type": "query",
+ "useTags": false
+ },
+ {
+ "allValue": null,
+ "current": {
+ "text": "/",
+ "value": "/"
+ },
+ "datasource": "Prometheus",
+ "definition": "query_result(topk(1,sort_desc (max(node_filesystem_size_bytes{fstype=~\"ext4|xfs\"}) by (mountpoint))))",
+ "hide": 2,
+ "includeAll": false,
+ "label": "",
+ "multi": false,
+ "name": "maxmount",
+ "options": [],
+ "query": "query_result(topk(1,sort_desc (max(node_filesystem_size_bytes{fstype=~\"ext4|xfs\"}) by (mountpoint))))",
+ "refresh": 2,
+ "regex": "/.*\\\"(.*)\\\".*/",
+ "skipUrlSync": false,
+ "sort": 0,
+ "tagValuesQuery": "",
+ "tags": [],
+ "tagsQuery": "",
+ "type": "query",
+ "useTags": false
+ },
+ {
+ "allFormat": "glob",
+ "allValue": null,
+ "current": {
+ "isNone": true,
+ "text": "None",
+ "value": ""
+ },
+ "datasource": "Prometheus",
+ "definition": "",
+ "hide": 2,
+ "includeAll": false,
+ "label": "环境",
+ "multi": false,
+ "multiFormat": "regex values",
+ "name": "env",
+ "options": [],
+ "query": "label_values(node_exporter_build_info,env)",
+ "refresh": 2,
+ "regex": "",
+ "skipUrlSync": false,
+ "sort": 1,
+ "tagValuesQuery": "",
+ "tags": [],
+ "tagsQuery": "",
+ "type": "query",
+ "useTags": false
+ },
+ {
+ "allFormat": "glob",
+ "allValue": "",
+ "current": {
+ "isNone": true,
+ "text": "None",
+ "value": ""
+ },
+ "datasource": "Prometheus",
+ "definition": "label_values(node_exporter_build_info{env=~'$env'},name)",
+ "hide": 2,
+ "includeAll": false,
+ "label": "名称",
+ "multi": true,
+ "multiFormat": "regex values",
+ "name": "name",
+ "options": [],
+ "query": "label_values(node_exporter_build_info{env=~'$env'},name)",
+ "refresh": 2,
+ "regex": "",
+ "skipUrlSync": false,
+ "sort": 1,
+ "tagValuesQuery": "/.*/",
+ "tags": [],
+ "tagsQuery": "",
+ "type": "query",
+ "useTags": false
+ }
+ ]
+ },
+ "time": {
+ "from": "now-24h",
+ "to": "now"
+ },
+ "timepicker": {
+ "now": true,
+ "refresh_intervals": [
+ "5s",
+ "10s",
+ "30s",
+ "1m",
+ "5m",
+ "15m",
+ "30m",
+ "1h",
+ "2h",
+ "1d"
+ ],
+ "time_options": [
+ "5m",
+ "15m",
+ "1h",
+ "6h",
+ "12h",
+ "24h",
+ "2d",
+ "7d",
+ "30d"
+ ]
+ },
+ "timezone": "browser",
+ "title": "Multi-Site System Monitoring Summary",
+ "uid": "hb7fSE0Za",
+ "version": 7
+}