[SEBA-72] Chart to deploy prometheus and grafana
Change-Id: I408ee45b56f0b171ccf7ad1e8443ec3a4f21df12
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..df04e59
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,8 @@
+# local chart repo
+chart_repo
+
+# lockfiles for requirements.yaml for dependent charts
+requirements.lock
+
+# ignore dependent chart dirs
+helm-charts/*/charts/
\ No newline at end of file
diff --git a/helm-charts/nem-monitoring/.helmignore b/helm-charts/nem-monitoring/.helmignore
new file mode 100644
index 0000000..f0c1319
--- /dev/null
+++ b/helm-charts/nem-monitoring/.helmignore
@@ -0,0 +1,21 @@
+# Patterns to ignore when building packages.
+# This supports shell glob matching, relative path matching, and
+# negation (prefixed with !). Only one pattern per line.
+.DS_Store
+# Common VCS dirs
+.git/
+.gitignore
+.bzr/
+.bzrignore
+.hg/
+.hgignore
+.svn/
+# Common backup files
+*.swp
+*.bak
+*.tmp
+*~
+# Various IDEs
+.project
+.idea/
+*.tmproj
diff --git a/helm-charts/nem-monitoring/Chart.yaml b/helm-charts/nem-monitoring/Chart.yaml
new file mode 100644
index 0000000..ff22295
--- /dev/null
+++ b/helm-charts/nem-monitoring/Chart.yaml
@@ -0,0 +1,19 @@
+---
+# Copyright 2017-present Open Networking Foundation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+name: nem-monitoring
+description: Time Series Storage and Dashboard for SEBA
+version: 1.0.0-dev
+
diff --git a/helm-charts/nem-monitoring/README.md b/helm-charts/nem-monitoring/README.md
new file mode 100644
index 0000000..53c271c
--- /dev/null
+++ b/helm-charts/nem-monitoring/README.md
@@ -0,0 +1,20 @@
+# Nem Monitoring
+
+To deploy this chart please use:
+
+```shell
+helm install -n nem-monitoring nem-monitoring/
+```
+
+It will expose:
+
+- grafana on port `31000`
+- prometheus on port `31001`
+
+## Running on minikube
+
+On minikube you don't need all the permission schema, so install this chart with:
+
+```shell
+helm install -n nem-monitoring nem-monitoring/ -f nem-monitoring/examples/nem-monitoring-minikube.yaml
+```
\ No newline at end of file
diff --git a/helm-charts/nem-monitoring/examples/nem-monitoring-minikube.yaml b/helm-charts/nem-monitoring/examples/nem-monitoring-minikube.yaml
new file mode 100644
index 0000000..a8aa763
--- /dev/null
+++ b/helm-charts/nem-monitoring/examples/nem-monitoring-minikube.yaml
@@ -0,0 +1,23 @@
+---
+# Copyright 2018-present Open Networking Foundation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+prometheus:
+ rbac:
+ create: false
+grafana:
+ rbac:
+ create: false
+ serviceAccount:
+ create: false
\ No newline at end of file
diff --git a/helm-charts/nem-monitoring/grafana-dashboards/kb8s-app-metrics.json b/helm-charts/nem-monitoring/grafana-dashboards/kb8s-app-metrics.json
new file mode 100644
index 0000000..9b5d28f
--- /dev/null
+++ b/helm-charts/nem-monitoring/grafana-dashboards/kb8s-app-metrics.json
@@ -0,0 +1,1400 @@
+{
+ "__inputs": [
+ {
+ "name": "DS_PROMETHEUS",
+ "label": "prometheus",
+ "description": "",
+ "type": "datasource",
+ "pluginId": "prometheus",
+ "pluginName": "Prometheus"
+ }
+ ],
+ "__requires": [
+ {
+ "type": "grafana",
+ "id": "grafana",
+ "name": "Grafana",
+ "version": "4.1.1"
+ },
+ {
+ "type": "panel",
+ "id": "graph",
+ "name": "Graph",
+ "version": ""
+ },
+ {
+ "type": "datasource",
+ "id": "prometheus",
+ "name": "Prometheus",
+ "version": "1.0.0"
+ }
+ ],
+ "annotations": {
+ "list": []
+ },
+ "editable": true,
+ "gnetId": 1471,
+ "graphTooltip": 1,
+ "hideControls": false,
+ "id": null,
+ "links": [],
+ "refresh": "30s",
+ "rows": [
+ {
+ "collapse": false,
+ "height": "250px",
+ "panels": [
+ {
+ "aliasColors": {},
+ "bars": false,
+ "datasource": "Prometheus",
+ "editable": true,
+ "error": false,
+ "fill": 1,
+ "grid": {},
+ "id": 3,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": false,
+ "max": true,
+ "min": false,
+ "rightSide": true,
+ "show": true,
+ "sort": "avg",
+ "sortDesc": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 2,
+ "links": [],
+ "nullPointMode": "connected",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "span": 6,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sum(irate(http_requests_total{app=\"$container\", handler!=\"prometheus\", kubernetes_namespace=\"$namespace\"}[30s])) by (kubernetes_namespace,app,code)",
+ "interval": "",
+ "intervalFactor": 1,
+ "legendFormat": "native | {{code}}",
+ "refId": "A",
+ "step": 10
+ },
+ {
+ "expr": "sum(irate(nginx_http_requests_total{app=\"$container\", kubernetes_namespace=\"$namespace\"}[30s])) by (kubernetes_namespace,app,status)",
+ "interval": "",
+ "intervalFactor": 1,
+ "legendFormat": "nginx | {{status}}",
+ "refId": "B",
+ "step": 10
+ },
+ {
+ "expr": "sum(irate(haproxy_backend_http_responses_total{app=\"$container\", kubernetes_namespace=\"$namespace\"}[30s])) by (app,kubernetes_namespace,code)",
+ "interval": "",
+ "intervalFactor": 1,
+ "legendFormat": "haproxy | {{code}}",
+ "refId": "C",
+ "step": 10
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Request rate",
+ "tooltip": {
+ "msResolution": true,
+ "shared": false,
+ "sort": 0,
+ "value_type": "cumulative"
+ },
+ "type": "graph",
+ "xaxis": {
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "ops",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "datasource": "Prometheus",
+ "fill": 1,
+ "id": 15,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": false,
+ "max": true,
+ "min": false,
+ "rightSide": true,
+ "show": true,
+ "sort": "avg",
+ "sortDesc": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "span": 6,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sum(irate(haproxy_backend_http_responses_total{app=\"$container\", kubernetes_namespace=\"$namespace\",code=\"5xx\"}[30s])) by (app,kubernetes_namespace) / sum(irate(haproxy_backend_http_responses_total{app=\"$container\", kubernetes_namespace=\"$namespace\"}[30s])) by (app,kubernetes_namespace)",
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "haproxy",
+ "refId": "A",
+ "step": 20
+ },
+ {
+ "expr": "sum(irate(http_requests_total{app=\"$container\", handler!=\"prometheus\", kubernetes_namespace=\"$namespace\", code=~\"5[0-9]+\"}[30s])) by (kubernetes_namespace,app) / sum(irate(http_requests_total{app=\"$container\", handler!=\"prometheus\", kubernetes_namespace=\"$namespace\"}[30s])) by (kubernetes_namespace,app)",
+ "intervalFactor": 2,
+ "legendFormat": "native",
+ "refId": "B",
+ "step": 20
+ },
+ {
+ "expr": "sum(irate(nginx_http_requests_total{app=\"$container\", kubernetes_namespace=\"$namespace\", status=~\"5[0-9]+\"}[30s])) by (kubernetes_namespace,app) / sum(irate(nginx_http_requests_total{app=\"$container\", kubernetes_namespace=\"$namespace\"}[30s])) by (kubernetes_namespace,app)",
+ "intervalFactor": 2,
+ "legendFormat": "nginx",
+ "refId": "C",
+ "step": 20
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Error rate",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "percentunit",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": "0",
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ }
+ ],
+ "repeat": null,
+ "repeatIteration": null,
+ "repeatRowId": null,
+ "showTitle": false,
+ "title": "Request rate",
+ "titleSize": "h6"
+ },
+ {
+ "collapse": true,
+ "height": 224,
+ "panels": [
+ {
+ "aliasColors": {},
+ "bars": false,
+ "datasource": "Prometheus",
+ "editable": true,
+ "error": false,
+ "fill": 1,
+ "grid": {},
+ "id": 5,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": false,
+ "max": true,
+ "min": false,
+ "rightSide": true,
+ "show": true,
+ "sort": "max",
+ "sortDesc": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 2,
+ "links": [],
+ "nullPointMode": "connected",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "span": 12,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "histogram_quantile(0.99, sum(rate(http_request_duration_seconds_bucket{app=\"$container\", kubernetes_namespace=\"$namespace\"}[30s])) by (app,kubernetes_namespace,le))",
+ "intervalFactor": 1,
+ "legendFormat": "native | 0.99",
+ "refId": "A",
+ "step": 1
+ },
+ {
+ "expr": "histogram_quantile(0.90, sum(rate(http_request_duration_seconds_bucket{app=\"$container\", kubernetes_namespace=\"$namespace\"}[30s])) by (app,kubernetes_namespace,le))",
+ "intervalFactor": 1,
+ "legendFormat": "native | 0.90",
+ "refId": "B",
+ "step": 1
+ },
+ {
+ "expr": "histogram_quantile(0.5, sum(rate(http_request_duration_seconds_bucket{app=\"$container\", kubernetes_namespace=\"$namespace\"}[30s])) by (app,kubernetes_namespace,le))",
+ "interval": "",
+ "intervalFactor": 1,
+ "legendFormat": "native | 0.50",
+ "refId": "C",
+ "step": 1
+ },
+ {
+ "expr": "histogram_quantile(0.99, sum(rate(nginx_http_request_duration_seconds_bucket{app=\"$container\", kubernetes_namespace=\"$namespace\"}[30s])) by (app,kubernetes_namespace,le))",
+ "intervalFactor": 1,
+ "legendFormat": "nginx | 0.99",
+ "refId": "D",
+ "step": 1
+ },
+ {
+ "expr": "histogram_quantile(0.9, sum(rate(nginx_http_request_duration_seconds_bucket{app=\"$container\", kubernetes_namespace=\"$namespace\"}[30s])) by (app,kubernetes_namespace,le))",
+ "intervalFactor": 1,
+ "legendFormat": "nginx | 0.90",
+ "refId": "E",
+ "step": 1
+ },
+ {
+ "expr": "histogram_quantile(0.5, sum(rate(nginx_http_request_duration_seconds_bucket{app=\"$container\", kubernetes_namespace=\"$namespace\"}[30s])) by (app,kubernetes_namespace,le))",
+ "intervalFactor": 1,
+ "legendFormat": "nginx | 0.50",
+ "refId": "F",
+ "step": 1
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Response time percentiles",
+ "tooltip": {
+ "msResolution": true,
+ "shared": true,
+ "sort": 0,
+ "value_type": "cumulative"
+ },
+ "type": "graph",
+ "xaxis": {
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "s",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ }
+ ],
+ "repeat": null,
+ "repeatIteration": null,
+ "repeatRowId": null,
+ "showTitle": false,
+ "title": "Response time",
+ "titleSize": "h6"
+ },
+ {
+ "collapse": false,
+ "height": 250,
+ "panels": [
+ {
+ "aliasColors": {},
+ "bars": false,
+ "datasource": "Prometheus",
+ "editable": true,
+ "error": false,
+ "fill": 1,
+ "id": 7,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": false,
+ "max": true,
+ "min": false,
+ "rightSide": true,
+ "show": true,
+ "sort": "avg",
+ "sortDesc": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "connected",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "span": 12,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "count(count(container_memory_usage_bytes{container_name=\"$container\", namespace=\"$namespace\"}) by (pod_name))",
+ "interval": "",
+ "intervalFactor": 1,
+ "legendFormat": "pods",
+ "refId": "A",
+ "step": 5
+ },
+ {
+ "expr": "count(count(container_memory_usage_bytes{container_name=\"$container\", namespace=\"$namespace\"}) by (kubernetes_io_hostname))",
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "hosts",
+ "refId": "B",
+ "step": 10
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Number of pods",
+ "tooltip": {
+ "msResolution": false,
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": "0",
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ }
+ ],
+ "repeat": null,
+ "repeatIteration": null,
+ "repeatRowId": null,
+ "showTitle": false,
+ "title": "Pod count",
+ "titleSize": "h6"
+ },
+ {
+ "collapse": false,
+ "height": 250,
+ "panels": [
+ {
+ "aliasColors": {},
+ "bars": false,
+ "datasource": "Prometheus",
+ "editable": true,
+ "error": false,
+ "fill": 1,
+ "id": 12,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": false,
+ "max": true,
+ "min": false,
+ "rightSide": true,
+ "show": true,
+ "sort": "avg",
+ "sortDesc": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "connected",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [
+ {
+ "alias": "elasticsearch-logging-data-20170207a (logging) - system",
+ "color": "#BF1B00"
+ },
+ {
+ "alias": "elasticsearch-logging-data-20170207a (logging) - user",
+ "color": "#508642"
+ }
+ ],
+ "span": 12,
+ "stack": true,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sum(irate(container_cpu_system_seconds_total{container_name=\"$container\", namespace=\"$namespace\"}[30s])) by (namespace,container_name) / sum(container_spec_cpu_shares{container_name=\"$container\", namespace=\"$namespace\"} / 1024) by (namespace,container_name)",
+ "intervalFactor": 2,
+ "legendFormat": "system",
+ "refId": "C",
+ "step": 10
+ },
+ {
+ "expr": "sum(irate(container_cpu_user_seconds_total{container_name=\"$container\", namespace=\"$namespace\"}[30s])) by (namespace,container_name) / sum(container_spec_cpu_shares{container_name=\"$container\", namespace=\"$namespace\"} / 1024) by (namespace,container_name)",
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "user",
+ "refId": "B",
+ "step": 10
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Cpu usage (relative to request)",
+ "tooltip": {
+ "msResolution": false,
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "percentunit",
+ "label": "",
+ "logBase": 1,
+ "max": "1",
+ "min": "0",
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ }
+ ],
+ "repeat": null,
+ "repeatIteration": null,
+ "repeatRowId": null,
+ "showTitle": false,
+ "title": "Usage relative to request",
+ "titleSize": "h6"
+ },
+ {
+ "collapse": true,
+ "height": 250,
+ "panels": [
+ {
+ "aliasColors": {},
+ "bars": false,
+ "datasource": "Prometheus",
+ "editable": true,
+ "error": false,
+ "fill": 1,
+ "id": 10,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": false,
+ "max": true,
+ "min": false,
+ "rightSide": true,
+ "show": true,
+ "sort": "avg",
+ "sortDesc": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "connected",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "span": 6,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sum(irate(container_cpu_usage_seconds_total{container_name=\"$container\", namespace=\"$namespace\"}[30s])) by (namespace,container_name) / sum(container_spec_cpu_quota{container_name=\"$container\", namespace=\"$namespace\"} / container_spec_cpu_period{container_name=\"$container\", namespace=\"$namespace\"}) by (namespace,container_name)",
+ "interval": "",
+ "intervalFactor": 1,
+ "legendFormat": "actual",
+ "metric": "",
+ "refId": "A",
+ "step": 1
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Cpu usage (relative to limit)",
+ "tooltip": {
+ "msResolution": false,
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "percentunit",
+ "label": "",
+ "logBase": 1,
+ "max": "1",
+ "min": "0",
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "datasource": "Prometheus",
+ "editable": true,
+ "error": false,
+ "fill": 1,
+ "id": 11,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": false,
+ "max": true,
+ "min": false,
+ "rightSide": true,
+ "show": true,
+ "sort": "avg",
+ "sortDesc": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "connected",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "span": 6,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sum(container_memory_usage_bytes{container_name=\"$container\", namespace=\"$namespace\"}) by (namespace,container_name) / sum(container_spec_memory_limit_bytes{container_name=\"$container\", namespace=\"$namespace\"}) by (namespace,container_name)",
+ "interval": "",
+ "intervalFactor": 1,
+ "legendFormat": "actual",
+ "refId": "A",
+ "step": 1
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Memory usage (relative to limit)",
+ "tooltip": {
+ "msResolution": false,
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "percentunit",
+ "label": null,
+ "logBase": 1,
+ "max": "1",
+ "min": "0",
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ }
+ ],
+ "repeat": null,
+ "repeatIteration": null,
+ "repeatRowId": null,
+ "showTitle": false,
+ "title": "Usage relative to limit",
+ "titleSize": "h6"
+ },
+ {
+ "collapse": true,
+ "height": 250,
+ "panels": [
+ {
+ "aliasColors": {},
+ "bars": false,
+ "datasource": "Prometheus",
+ "fill": 1,
+ "id": 13,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": false,
+ "max": true,
+ "min": false,
+ "rightSide": true,
+ "show": true,
+ "sort": "avg",
+ "sortDesc": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "span": 6,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sum(irate(container_cpu_usage_seconds_total{container_name=\"$container\", namespace=\"$namespace\"}[30s])) by (id,pod_name)",
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "{{pod_name}}",
+ "refId": "A",
+ "step": 2
+ },
+ {
+ "expr": "sum(container_spec_cpu_quota{container_name=\"$container\", namespace=\"$namespace\"} / container_spec_cpu_period{container_name=\"$container\", namespace=\"$namespace\"}) by (namespace,container_name) / count(container_memory_usage_bytes{container_name=\"$container\", namespace=\"$namespace\"}) by (namespace,container_name) ",
+ "intervalFactor": 2,
+ "legendFormat": "limit",
+ "refId": "B",
+ "step": 2
+ },
+ {
+ "expr": "sum(container_spec_cpu_shares{container_name=\"$container\", namespace=\"$namespace\"} / 1024) by (namespace,container_name) / count(container_spec_cpu_shares{container_name=\"$container\", namespace=\"$namespace\"}) by (namespace,container_name) ",
+ "intervalFactor": 2,
+ "legendFormat": "request",
+ "refId": "C",
+ "step": 2
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Cpu usage (per pod)",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "label": "cores",
+ "logBase": 1,
+ "max": null,
+ "min": "0",
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "datasource": "Prometheus",
+ "fill": 1,
+ "id": 14,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": false,
+ "max": true,
+ "min": false,
+ "rightSide": true,
+ "show": true,
+ "sort": "avg",
+ "sortDesc": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "span": 6,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sum(container_memory_usage_bytes{container_name=\"$container\", namespace=\"$namespace\"}) by (id,pod_name)",
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "{{pod_name}}",
+ "refId": "A",
+ "step": 2
+ },
+ {
+ "expr": "sum(container_spec_memory_limit_bytes{container_name=\"$container\", namespace=\"$namespace\"}) by (namespace,container_name) / count(container_memory_usage_bytes{container_name=\"$container\", namespace=\"$namespace\"}) by (container_name,namespace)",
+ "intervalFactor": 2,
+ "legendFormat": "limit",
+ "refId": "B",
+ "step": 2
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Memory usage (per pod)",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "bytes",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": "0",
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ }
+ ],
+ "repeat": null,
+ "repeatIteration": null,
+ "repeatRowId": null,
+ "showTitle": false,
+ "title": "Usage per pod",
+ "titleSize": "h6"
+ },
+ {
+ "collapse": true,
+ "height": 250,
+ "panels": [
+ {
+ "aliasColors": {},
+ "bars": false,
+ "datasource": "Prometheus",
+ "editable": true,
+ "error": false,
+ "fill": 1,
+ "id": 8,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": false,
+ "max": true,
+ "min": false,
+ "rightSide": true,
+ "show": true,
+ "sort": "avg",
+ "sortDesc": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "connected",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "span": 6,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sum(irate(container_cpu_usage_seconds_total{container_name=\"$container\", namespace=\"$namespace\"}[30s])) by (namespace,container_name) / count(container_memory_usage_bytes{container_name=\"$container\", namespace=\"$namespace\"}) by (namespace,container_name) ",
+ "interval": "",
+ "intervalFactor": 1,
+ "legendFormat": "actual",
+ "refId": "A",
+ "step": 1
+ },
+ {
+ "expr": "sum(container_spec_cpu_quota{container_name=\"$container\", namespace=\"$namespace\"} / container_spec_cpu_period{container_name=\"$container\", namespace=\"$namespace\"}) by (namespace,container_name) / count(container_memory_usage_bytes{container_name=\"$container\", namespace=\"$namespace\"}) by (namespace,container_name) ",
+ "interval": "",
+ "intervalFactor": 1,
+ "legendFormat": "limit",
+ "refId": "B",
+ "step": 1
+ },
+ {
+ "expr": "sum(container_spec_cpu_shares{container_name=\"$container\", namespace=\"$namespace\"} / 1024) by (namespace,container_name) / count(container_spec_cpu_shares{container_name=\"$container\", namespace=\"$namespace\"}) by (namespace,container_name) ",
+ "interval": "",
+ "intervalFactor": 1,
+ "legendFormat": "request",
+ "refId": "C",
+ "step": 1
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Cpu usage (avg per pod)",
+ "tooltip": {
+ "msResolution": false,
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "none",
+ "label": "cores",
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "datasource": "Prometheus",
+ "editable": true,
+ "error": false,
+ "fill": 1,
+ "id": 9,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": false,
+ "max": true,
+ "min": false,
+ "rightSide": true,
+ "show": true,
+ "sort": "avg",
+ "sortDesc": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "connected",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "span": 6,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sum(container_memory_usage_bytes{container_name=\"$container\", namespace=\"$namespace\"}) by (namespace,container_name) / count(container_memory_usage_bytes{container_name=\"$container\", namespace=\"$namespace\"}) by (namespace,container_name) ",
+ "intervalFactor": 1,
+ "legendFormat": "actual",
+ "metric": "",
+ "refId": "A",
+ "step": 1
+ },
+ {
+ "expr": "sum(container_spec_memory_limit_bytes{container_name=\"$container\", namespace=\"$namespace\"}) by (namespace,container_name) / count(container_memory_usage_bytes{container_name=\"$container\", namespace=\"$namespace\"}) by (namespace,container_name) ",
+ "interval": "",
+ "intervalFactor": 1,
+ "legendFormat": "limit",
+ "refId": "B",
+ "step": 1
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Memory usage (avg per pod)",
+ "tooltip": {
+ "msResolution": false,
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "bytes",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": "0",
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ }
+ ],
+ "repeat": null,
+ "repeatIteration": null,
+ "repeatRowId": null,
+ "showTitle": false,
+ "title": "Usage per pod (average)",
+ "titleSize": "h6"
+ },
+ {
+ "collapse": true,
+ "height": 259.4375,
+ "panels": [
+ {
+ "aliasColors": {},
+ "bars": false,
+ "datasource": "Prometheus",
+ "editable": true,
+ "error": false,
+ "fill": 1,
+ "grid": {},
+ "id": 1,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": false,
+ "max": true,
+ "min": false,
+ "rightSide": true,
+ "show": true,
+ "sort": "avg",
+ "sortDesc": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 2,
+ "links": [],
+ "nullPointMode": "connected",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "span": 6,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sum(irate(container_cpu_usage_seconds_total{container_name=\"$container\", namespace=\"$namespace\"}[30s])) by (namespace,container_name)",
+ "hide": false,
+ "interval": "",
+ "intervalFactor": 1,
+ "legendFormat": "actual",
+ "metric": "",
+ "refId": "A",
+ "step": 1
+ },
+ {
+ "expr": "sum(container_spec_cpu_quota{container_name=\"$container\", namespace=\"$namespace\"} / container_spec_cpu_period{container_name=\"$container\", namespace=\"$namespace\"}) by (namespace,container_name)",
+ "intervalFactor": 1,
+ "legendFormat": "limit",
+ "refId": "B",
+ "step": 1
+ },
+ {
+ "expr": "sum(container_spec_cpu_shares{container_name=\"$container\", namespace=\"$namespace\"} / 1024) by (namespace,container_name) ",
+ "intervalFactor": 1,
+ "legendFormat": "request",
+ "refId": "C",
+ "step": 1
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Cpu usage (total)",
+ "tooltip": {
+ "msResolution": true,
+ "shared": false,
+ "sort": 0,
+ "value_type": "cumulative"
+ },
+ "type": "graph",
+ "xaxis": {
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "none",
+ "label": "cores",
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "datasource": "Prometheus",
+ "editable": true,
+ "error": false,
+ "fill": 1,
+ "grid": {},
+ "id": 2,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": false,
+ "max": true,
+ "min": false,
+ "rightSide": true,
+ "show": true,
+ "sort": "avg",
+ "sortDesc": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 2,
+ "links": [],
+ "nullPointMode": "connected",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "span": 6,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sum(container_memory_usage_bytes{container_name=\"$container\", namespace=\"$namespace\"}) by (namespace,container_name)",
+ "interval": "",
+ "intervalFactor": 1,
+ "legendFormat": "actual",
+ "refId": "A",
+ "step": 1
+ },
+ {
+ "expr": "sum(container_spec_memory_limit_bytes{container_name=\"$container\", namespace=\"$namespace\"}) by (namespace,container_name)",
+ "intervalFactor": 1,
+ "legendFormat": "limit",
+ "refId": "B",
+ "step": 1
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Memory usage (total)",
+ "tooltip": {
+ "msResolution": true,
+ "shared": false,
+ "sort": 0,
+ "value_type": "cumulative"
+ },
+ "type": "graph",
+ "xaxis": {
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "bytes",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": "0",
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ }
+ ],
+ "repeat": null,
+ "repeatIteration": null,
+ "repeatRowId": null,
+ "showTitle": false,
+ "title": "Usage total",
+ "titleSize": "h6"
+ }
+ ],
+ "schemaVersion": 14,
+ "style": "dark",
+ "tags": [],
+ "templating": {
+ "list": [
+ {
+ "allValue": ".+",
+ "current": {},
+ "datasource": "Prometheus",
+ "hide": 0,
+ "includeAll": false,
+ "label": null,
+ "multi": false,
+ "name": "namespace",
+ "options": [],
+ "query": "label_values(container_memory_usage_bytes{namespace=~\".+\",container_name!=\"POD\"},namespace)",
+ "refresh": 1,
+ "regex": "",
+ "sort": 1,
+ "tagValuesQuery": null,
+ "tags": [],
+ "tagsQuery": null,
+ "type": "query",
+ "useTags": false
+ },
+ {
+ "allValue": ".+",
+ "current": {},
+ "datasource": "Prometheus",
+ "hide": 0,
+ "includeAll": false,
+ "label": null,
+ "multi": false,
+ "name": "container",
+ "options": [],
+ "query": "label_values(container_memory_usage_bytes{namespace=~\"$namespace\",container_name!=\"POD\"},container_name)",
+ "refresh": 1,
+ "regex": "",
+ "sort": 1,
+ "tagValuesQuery": null,
+ "tags": [],
+ "tagsQuery": null,
+ "type": "query",
+ "useTags": false
+ }
+ ]
+ },
+ "time": {
+ "from": "now-3h",
+ "to": "now"
+ },
+ "timepicker": {
+ "refresh_intervals": [
+ "5s",
+ "10s",
+ "30s",
+ "1m",
+ "5m",
+ "15m",
+ "30m",
+ "1h",
+ "2h",
+ "1d"
+ ],
+ "time_options": [
+ "5m",
+ "15m",
+ "1h",
+ "6h",
+ "12h",
+ "24h",
+ "2d",
+ "7d",
+ "30d"
+ ]
+ },
+ "timezone": "browser",
+ "title": "Kubernetes App Metrics",
+ "version": 37,
+ "description": "After selecting your namespace and container you get a wealth of metrics like request rate, error rate, response times, pod count, cpu and memory usage. You can view cpu and memory usage in a variety of ways, compared to the limit, compared to the request, per pod, average per pod, etc."
+ }
\ No newline at end of file
diff --git a/helm-charts/nem-monitoring/grafana-dashboards/voltha-kpi.json b/helm-charts/nem-monitoring/grafana-dashboards/voltha-kpi.json
new file mode 100644
index 0000000..68c3b82
--- /dev/null
+++ b/helm-charts/nem-monitoring/grafana-dashboards/voltha-kpi.json
@@ -0,0 +1,564 @@
+{
+ "annotations": {
+ "list": [
+ {
+ "builtIn": 1,
+ "datasource": "-- Grafana --",
+ "enable": true,
+ "hide": true,
+ "iconColor": "rgba(0, 211, 255, 1)",
+ "name": "Annotations & Alerts",
+ "type": "dashboard"
+ }
+ ]
+ },
+ "editable": true,
+ "gnetId": null,
+ "graphTooltip": 0,
+ "links": [],
+ "panels": [
+ {
+ "gridPos": {
+ "h": 1,
+ "w": 24,
+ "x": 0,
+ "y": 0
+ },
+ "id": 7,
+ "title": "Bytes",
+ "type": "row"
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "Prometheus",
+ "description": "Count the tx bytes on the NNI ports by device",
+ "fill": 1,
+ "gridPos": {
+ "h": 9,
+ "w": 12,
+ "x": 0,
+ "y": 1
+ },
+ "id": 9,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": true,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sum(tx_bytes_total{port_type=\"nni\"})by(device_id) / 1024 / 1024",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "refId": "A"
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Transmitted MB by device",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "Prometheus",
+ "description": "Count the rx bytes on the NNI ports by device",
+ "fill": 1,
+ "gridPos": {
+ "h": 9,
+ "w": 12,
+ "x": 12,
+ "y": 1
+ },
+ "id": 10,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": true,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sum(rx_bytes_total{port_type=\"nni\"})by(device_id) / 1024 / 1024",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "refId": "A"
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Received MB by device",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "collapsed": false,
+ "gridPos": {
+ "h": 1,
+ "w": 24,
+ "x": 0,
+ "y": 10
+ },
+ "id": 2,
+ "panels": [],
+ "title": "Packets",
+ "type": "row"
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "Prometheus",
+ "description": "Count the tx packets on the NNI ports by device",
+ "fill": 1,
+ "gridPos": {
+ "h": 9,
+ "w": 12,
+ "x": 0,
+ "y": 11
+ },
+ "id": 4,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": true,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sum(tx_packets_total{port_type=\"nni\"})by(device_id)",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "refId": "A"
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Transmitted packets by device",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "Prometheus",
+ "description": "Count the rx packets on the NNI ports by device",
+ "fill": 1,
+ "gridPos": {
+ "h": 9,
+ "w": 12,
+ "x": 12,
+ "y": 11
+ },
+ "id": 5,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": true,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sum(rx_packets_total{port_type=\"nni\"})by(device_id)",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "refId": "A"
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Received packets by device",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "collapsed": true,
+ "gridPos": {
+ "h": 1,
+ "w": 24,
+ "x": 0,
+ "y": 20
+ },
+ "id": 14,
+ "panels": [
+ {
+ "alert": {
+ "conditions": [
+ {
+ "evaluator": {
+ "params": [
+ 5
+ ],
+ "type": "gt"
+ },
+ "operator": {
+ "type": "and"
+ },
+ "query": {
+ "params": [
+ "A",
+ "1m",
+ "now"
+ ]
+ },
+ "reducer": {
+ "params": [],
+ "type": "sum"
+ },
+ "type": "query"
+ }
+ ],
+ "executionErrorState": "alerting",
+ "frequency": "60s",
+ "handler": 1,
+ "name": "Realtime Incoming MB by NNI port alert",
+ "noDataState": "no_data",
+ "notifications": []
+ },
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": null,
+ "fill": 1,
+ "gridPos": {
+ "h": 10,
+ "w": 24,
+ "x": 0,
+ "y": 21
+ },
+ "id": 12,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "hideEmpty": true,
+ "hideZero": true,
+ "max": false,
+ "min": false,
+ "show": true,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "$$hashKey": "object:362",
+ "expr": "delta(rx_bytes_total{port_type=\"nni\"}[30s]) / 1024 / 1024",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "refId": "A"
+ }
+ ],
+ "thresholds": [
+ {
+ "colorMode": "critical",
+ "fill": true,
+ "line": true,
+ "op": "gt",
+ "value": 5
+ }
+ ],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Realtime Incoming MB by NNI port",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "$$hashKey": "object:421",
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "$$hashKey": "object:422",
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ }
+ ],
+ "title": "Realtime Traffic Data",
+ "type": "row"
+ }
+ ],
+ "refresh": "15s",
+ "schemaVersion": 16,
+ "style": "dark",
+ "tags": [],
+ "templating": {
+ "list": []
+ },
+ "time": {
+ "from": "now-5m",
+ "to": "now"
+ },
+ "timepicker": {
+ "refresh_intervals": [
+ "5s",
+ "10s",
+ "30s",
+ "1m",
+ "5m",
+ "15m",
+ "30m",
+ "1h",
+ "2h",
+ "1d"
+ ],
+ "time_options": [
+ "5m",
+ "15m",
+ "1h",
+ "6h",
+ "12h",
+ "24h",
+ "2d",
+ "7d",
+ "30d"
+ ]
+ },
+ "timezone": "",
+ "title": "Voltha",
+ "uid": "i9V9JtOmz",
+ "version": 1
+}
\ No newline at end of file
diff --git a/helm-charts/nem-monitoring/requirements.yaml b/helm-charts/nem-monitoring/requirements.yaml
new file mode 100644
index 0000000..a55e7ea
--- /dev/null
+++ b/helm-charts/nem-monitoring/requirements.yaml
@@ -0,0 +1,23 @@
+---
+# Copyright 2017-present Open Networking Foundation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+dependencies:
+- name: prometheus
+ version: 6.8.1
+ repository: https://kubernetes-charts.storage.googleapis.com/
+- name: grafana
+ version: 1.12.0
+ repository: https://kubernetes-charts.storage.googleapis.com/
+
diff --git a/helm-charts/nem-monitoring/templates/NOTES.txt b/helm-charts/nem-monitoring/templates/NOTES.txt
new file mode 100644
index 0000000..780bf45
--- /dev/null
+++ b/helm-charts/nem-monitoring/templates/NOTES.txt
@@ -0,0 +1,12 @@
+Thank you for installing {{ .Chart.Name }}.
+
+Your release is named {{ .Release.Name }}.
+
+You can now access Grafana at: <cluster-ip>:31300
+using:
+
+username: {{ .Values.grafana.adminUser}}
+password: {{ .Values.grafana.adminPassword}}
+
+and Prometheus at: <cluster-ip>:31301
+
diff --git a/helm-charts/nem-monitoring/templates/grafana-dashboard-kb8s-configmap.yaml b/helm-charts/nem-monitoring/templates/grafana-dashboard-kb8s-configmap.yaml
new file mode 100644
index 0000000..3334c51
--- /dev/null
+++ b/helm-charts/nem-monitoring/templates/grafana-dashboard-kb8s-configmap.yaml
@@ -0,0 +1,24 @@
+---
+# Copyright 2018-present Open Networking Foundation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+apiVersion: v1
+kind: ConfigMap
+metadata:
+ name: grafana-dashboard-kb8s
+ labels:
+ grafana_dashboard: "1"
+data:
+ kb8s.json: |
+{{ .Files.Get "grafana-dashboards/kb8s-app-metrics.json" | indent 4 }}
\ No newline at end of file
diff --git a/helm-charts/nem-monitoring/templates/grafana-dashboard-voltha-configmap.yaml b/helm-charts/nem-monitoring/templates/grafana-dashboard-voltha-configmap.yaml
new file mode 100644
index 0000000..efbb0dd
--- /dev/null
+++ b/helm-charts/nem-monitoring/templates/grafana-dashboard-voltha-configmap.yaml
@@ -0,0 +1,24 @@
+---
+# Copyright 2018-present Open Networking Foundation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+apiVersion: v1
+kind: ConfigMap
+metadata:
+ name: grafana-dashboard-voltha
+ labels:
+ grafana_dashboard: "1"
+data:
+ voltha.json: |
+{{ .Files.Get "grafana-dashboards/voltha-kpi.json" | indent 4 }}
\ No newline at end of file
diff --git a/helm-charts/nem-monitoring/templates/voltha-exporter-deployment.yaml b/helm-charts/nem-monitoring/templates/voltha-exporter-deployment.yaml
new file mode 100644
index 0000000..f69f163
--- /dev/null
+++ b/helm-charts/nem-monitoring/templates/voltha-exporter-deployment.yaml
@@ -0,0 +1,38 @@
+---
+# Copyright 2017-present Open Networking Foundation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+apiVersion: extensions/v1beta1
+kind: Deployment
+metadata:
+ name: voltha-kpi-exporter
+ labels:
+ release: {{ .Release.Name }}
+spec:
+ replicas: 2
+ template:
+ metadata:
+ labels:
+ app: voltha-kpi-exporter
+ release: {{ .Release.Name }}
+ spec:
+ containers:
+ - name: voltha-kpi-exporter
+ image: {{ tpl .Values.nem_voltha_kpi_exporter_image . | quote }}
+ imagePullPolicy: {{ .Values.imagePullPolicy }}
+ args: ["-broker={{ .Values.args.voltha_kpi_broker }}", "-topic={{ .Values.args.voltha_kpi_topic }}"]
+ ports:
+ - containerPort: 8080
+ port: 8080
+ protocol: TCP
diff --git a/helm-charts/nem-monitoring/templates/voltha-exporter-service.yaml b/helm-charts/nem-monitoring/templates/voltha-exporter-service.yaml
new file mode 100644
index 0000000..60a67f4
--- /dev/null
+++ b/helm-charts/nem-monitoring/templates/voltha-exporter-service.yaml
@@ -0,0 +1,30 @@
+---
+# Copyright 2017-present Open Networking Foundation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+apiVersion: v1
+kind: Service
+metadata:
+ name: voltha-kpi-exporter
+ labels:
+ release: {{ .Release.Name }}
+spec:
+ type: NodePort
+ ports:
+ - port: 8080
+ targetPort: 8080
+ nodePort: {{ .Values.nem_voltha_kpi_exporter_nodePort }}
+ protocol: TCP
+ selector:
+ app: "voltha-kpi-exporter"
diff --git a/helm-charts/nem-monitoring/values.yaml b/helm-charts/nem-monitoring/values.yaml
new file mode 100644
index 0000000..cc496ae
--- /dev/null
+++ b/helm-charts/nem-monitoring/values.yaml
@@ -0,0 +1,205 @@
+---
+# Copyright 2018-present Open Networking Foundation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+nem_voltha_kpi_exporter_image: "matteoscandolo/prometheus-go-exporter:latest"
+nem_voltha_kpi_exporter_nodePort: 31080
+imagePullPolicy: "Always"
+args:
+ voltha_kpi_broker: voltha-kafka.default.svc.cluster.local:9092
+ voltha_kpi_topic: voltha.kpis
+
+prometheus:
+ alertmanager:
+ persistentVolume:
+ enabled: false
+ server:
+ persistentVolume:
+ enabled: false
+ service:
+ type: NodePort
+ nodePort: 31301
+ serverFiles:
+ alerts: {}
+ rules: {}
+
+ prometheus.yml:
+ rule_files:
+ - /etc/config/rules
+ - /etc/config/alerts
+
+ scrape_configs:
+ # voltha KPI
+ - job_name: 'voltha-kpi'
+ metrics_path: /metrics
+ scrape_interval: 15s
+ static_configs:
+ - targets:
+ - voltha-kpi-exporter:8080
+
+ # KB8s monitoring jobs
+ - job_name: prometheus
+ static_configs:
+ - targets:
+ - localhost:9090
+ - job_name: 'kubernetes-apiservers'
+ kubernetes_sd_configs:
+ - role: endpoints
+ scheme: https
+ tls_config:
+ ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
+ insecure_skip_verify: true
+ bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
+ relabel_configs:
+ - source_labels: [__meta_kubernetes_namespace, __meta_kubernetes_service_name, __meta_kubernetes_endpoint_port_name]
+ action: keep
+ regex: default;kubernetes;https
+ - job_name: 'kubernetes-nodes'
+ # Default to scraping over https. If required, just disable this or change to
+ # `http`.
+ scheme: https
+ tls_config:
+ ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
+ insecure_skip_verify: true
+ bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
+ kubernetes_sd_configs:
+ - role: node
+ relabel_configs:
+ - action: labelmap
+ regex: __meta_kubernetes_node_label_(.+)
+ - target_label: __address__
+ replacement: kubernetes.default.svc:443
+ - source_labels: [__meta_kubernetes_node_name]
+ regex: (.+)
+ target_label: __metrics_path__
+ replacement: /api/v1/nodes/${1}/proxy/metrics
+ - job_name: 'kubernetes-nodes-cadvisor'
+ scheme: https
+ tls_config:
+ ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
+ insecure_skip_verify: true
+ bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
+ kubernetes_sd_configs:
+ - role: node
+ relabel_configs:
+ - action: labelmap
+ regex: __meta_kubernetes_node_label_(.+)
+ - target_label: __address__
+ replacement: kubernetes.default.svc:443
+ - source_labels: [__meta_kubernetes_node_name]
+ regex: (.+)
+ target_label: __metrics_path__
+ replacement: /api/v1/nodes/${1}/proxy/metrics/cadvisor
+ - job_name: 'kubernetes-service-endpoints'
+ kubernetes_sd_configs:
+ - role: endpoints
+ relabel_configs:
+ - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scrape]
+ action: keep
+ regex: true
+ - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scheme]
+ action: replace
+ target_label: __scheme__
+ regex: (https?)
+ - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_path]
+ action: replace
+ target_label: __metrics_path__
+ regex: (.+)
+ - source_labels: [__address__, __meta_kubernetes_service_annotation_prometheus_io_port]
+ action: replace
+ target_label: __address__
+ regex: ([^:]+)(?::\d+)?;(\d+)
+ replacement: $1:$2
+ - action: labelmap
+ regex: __meta_kubernetes_service_label_(.+)
+ - source_labels: [__meta_kubernetes_namespace]
+ action: replace
+ target_label: kubernetes_namespace
+ - source_labels: [__meta_kubernetes_service_name]
+ action: replace
+ target_label: kubernetes_name
+ - job_name: 'prometheus-pushgateway'
+ honor_labels: true
+ kubernetes_sd_configs:
+ - role: service
+ relabel_configs:
+ - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_probe]
+ action: keep
+ regex: pushgateway
+ - job_name: 'kubernetes-services'
+ metrics_path: /probe
+ params:
+ module: [http_2xx]
+ kubernetes_sd_configs:
+ - role: service
+ relabel_configs:
+ - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_probe]
+ action: keep
+ regex: true
+ - source_labels: [__address__]
+ target_label: __param_target
+ - target_label: __address__
+ replacement: blackbox
+ - source_labels: [__param_target]
+ target_label: instance
+ - action: labelmap
+ regex: __meta_kubernetes_service_label_(.+)
+ - source_labels: [__meta_kubernetes_namespace]
+ target_label: kubernetes_namespace
+ - source_labels: [__meta_kubernetes_service_name]
+ target_label: kubernetes_name
+ - job_name: 'kubernetes-pods'
+ kubernetes_sd_configs:
+ - role: pod
+ relabel_configs:
+ - source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_scrape]
+ action: keep
+ regex: true
+ - source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_path]
+ action: replace
+ target_label: __metrics_path__
+ regex: (.+)
+ - source_labels: [__address__, __meta_kubernetes_pod_annotation_prometheus_io_port]
+ action: replace
+ regex: ([^:]+)(?::\d+)?;(\d+)
+ replacement: $1:$2
+ target_label: __address__
+ - action: labelmap
+ regex: __meta_kubernetes_pod_label_(.+)
+ - source_labels: [__meta_kubernetes_namespace]
+ action: replace
+ target_label: kubernetes_namespace
+ - source_labels: [__meta_kubernetes_pod_name]
+ action: replace
+ target_label: kubernetes_pod_name
+
+grafana:
+ adminUser: admin
+ adminPassword: strongpassword
+ service:
+ type: NodePort
+ nodePort: 31300
+ sidecar:
+ dashboards:
+ enabled: true
+ label: grafana_dashboard
+ datasources:
+ datasources.yaml:
+ apiVersion: 1
+ datasources:
+ - name: Prometheus
+ type: prometheus
+ url: http://nem-monitoring-prometheus-server.default.svc.cluster.local
+ access: proxy
+ isDefault: true
\ No newline at end of file