formula-prometheus/prometheus/files/grafana_dashboards/alertmanager_prometheus.json
Mateusz Matuszkowiak 53234ab0e5 Use max instead of min
Since these services are replicated (two per service),
we should check for the max, not min, as one of the replicas
could be down, but still the API is Up.

Change-Id: I6dad9b74d65415f5f4f91e00c732792010487619
2018-03-20 14:12:30 +01:00

737 lines
19 KiB
JSON

{% raw %}
{
"annotations": {
"list": []
},
"description": "Monitors Alertmanager using Prometheus.",
"editable": true,
"gnetId": 315,
"graphTooltip": 1,
"hideControls": false,
"id": null,
"links": [],
"refresh": "1m",
"rows": [
{
"collapse": false,
"height": 250,
"panels": [
{
"cacheTimeout": null,
"colorBackground": false,
"colorValue": true,
"colors": [
"rgba(245, 54, 54, 0.9)",
"rgba(237, 129, 40, 0.89)",
"rgba(50, 172, 45, 0.97)"
],
"datasource": "prometheus",
"format": "none",
"gauge": {
"maxValue": 1,
"minValue": 0,
"show": false,
"thresholdLabels": false,
"thresholdMarkers": true
},
"id": 1,
"interval": null,
"links": [],
"mappingType": 1,
"mappingTypes": [
{
"name": "value to text",
"value": 1
},
{
"name": "range to text",
"value": 2
}
],
"maxDataPoints": 100,
"nullPointMode": "connected",
"nullText": null,
"postfix": "",
"postfixFontSize": "50%",
"prefix": "",
"prefixFontSize": "50%",
"rangeMaps": [
{
"from": "null",
"text": "N/A",
"to": "null"
}
],
"span": 2,
"sparkline": {
"fillColor": "rgba(31, 118, 189, 0.18)",
"full": false,
"lineColor": "rgb(31, 120, 193)",
"show": true
},
"tableColumn": "",
"targets": [
{
"expr": "max(up{job=\"alertmanager\"})",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "",
"refId": "A",
"step": 60
}
],
"thresholds": "1,1",
"title": "Status",
"type": "singlestat",
"valueFontSize": "80%",
"valueMaps": [
{
"op": "=",
"text": "UNKNOWN",
"value": "null"
},
{
"op": "=",
"text": "OK",
"value": "1"
},
{
"op": "=",
"text": "DOWN",
"value": "0"
}
],
"valueName": "current"
},
{
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "prometheus",
"decimals": 0,
"fill": 1,
"id": 2,
"legend": {
"avg": false,
"current": false,
"max": false,
"min": false,
"show": true,
"total": false,
"values": false
},
"lines": true,
"linewidth": 1,
"links": [],
"nullPointMode": "null",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"spaceLength": 10,
"span": 10,
"stack": false,
"steppedLine": false,
"targets": [
{
"expr": "rate(alertmanager_alerts_received_total{job=\"alertmanager\",instance=~\"$Alertmanager:[1-9][0-9]*\"}[10m])",
"format": "time_series",
"hide": false,
"intervalFactor": 2,
"legendFormat": "10m rate {{ status }}",
"refId": "A",
"step": 4
},
{
"expr": "rate(alertmanager_alerts_received_total{job=\"alertmanager\",instance=~\"$Alertmanager:[1-9][0-9]*\"}[1h])",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "1h rate {{ status }}",
"refId": "B",
"step": 4
},
{
"expr": "rate(alertmanager_alerts_received_total{job=\"alertmanager\",instance=~\"$Alertmanager:[1-9][0-9]*\"}[6h])",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "6h rate {{ status }}",
"refId": "C",
"step": 4
}
],
"thresholds": [],
"timeFrom": null,
"timeShift": null,
"title": "Received alerts",
"tooltip": {
"shared": true,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": []
},
"yaxes": [
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": "0",
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
}
]
}
],
"repeat": null,
"repeatIteration": null,
"repeatRowId": null,
"showTitle": true,
"title": "Service Status",
"titleSize": "h6"
},
{
"collapse": false,
"height": 250,
"panels": [
{
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "prometheus",
"fill": 1,
"id": 11,
"legend": {
"avg": false,
"current": false,
"max": false,
"min": false,
"show": true,
"total": false,
"values": false
},
"lines": true,
"linewidth": 1,
"links": [],
"nullPointMode": "null",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"spaceLength": 10,
"span": 6,
"stack": false,
"steppedLine": false,
"targets": [
{
"expr": "rate(alertmanager_notifications_total{job=\"alertmanager\",instance=~\"$Alertmanager:[1-9][0-9]*\"}[5m])\n",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{ integration }}",
"refId": "A",
"step": 10
}
],
"thresholds": [],
"timeFrom": null,
"timeShift": null,
"title": "Successful notifications rate",
"tooltip": {
"shared": true,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": []
},
"yaxes": [
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
}
]
},
{
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "prometheus",
"fill": 1,
"id": 10,
"legend": {
"avg": false,
"current": false,
"max": false,
"min": false,
"show": true,
"total": false,
"values": false
},
"lines": true,
"linewidth": 1,
"links": [],
"nullPointMode": "null",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"spaceLength": 10,
"span": 6,
"stack": false,
"steppedLine": false,
"targets": [
{
"expr": "rate(alertmanager_notifications_failed_total{job=\"alertmanager\",instance=~\"$Alertmanager:[1-9][0-9]*\"}[5m])\n",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{ integration }}",
"refId": "A",
"step": 10
}
],
"thresholds": [],
"timeFrom": null,
"timeShift": null,
"title": "Failed notifications rate",
"tooltip": {
"shared": true,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": []
},
"yaxes": [
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
}
]
}
],
"repeat": null,
"repeatIteration": null,
"repeatRowId": null,
"showTitle": true,
"title": "Alerts",
"titleSize": "h6"
},
{
"collapse": false,
"height": 250,
"panels": [
{
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "prometheus",
"decimals": null,
"editable": true,
"error": false,
"fill": 1,
"id": 6,
"legend": {
"avg": false,
"current": false,
"max": false,
"min": false,
"show": true,
"total": false,
"values": false
},
"lines": true,
"linewidth": 1,
"links": [],
"nullPointMode": "null",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"seriesOverrides": [
{
"alias": "/-/",
"fill": 0
}
],
"spaceLength": 10,
"span": 4,
"stack": false,
"steppedLine": false,
"targets": [
{
"expr": "process_resident_memory_bytes{job=\"alertmanager\",instance=~\"$Alertmanager:[1-9][0-9]*\"}",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "RSS",
"metric": "process_resident_memory_bytes",
"refId": "B",
"step": 20
},
{
"expr": "max_over_time(go_memstats_alloc_bytes{job=\"alertmanager\",instance=~\"$Alertmanager:[1-9][0-9]*\"}[5m])",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "Allocated bytes - 5m max",
"metric": "go_memstats_alloc_bytes",
"refId": "D",
"step": 20
},
{
"expr": "go_memstats_alloc_bytes{job=\"alertmanager\",instance=~\"$Alertmanager:[1-9][0-9]*\"}",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "Allocated bytes",
"metric": "go_memstats_alloc_bytes",
"refId": "A",
"step": 20
},
{
"expr": "min_over_time(go_memstats_alloc_bytes{job=\"alertmanager\",instance=~\"$Alertmanager:[1-9][0-9]*\"}[5m])",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "Allocated bytes - 5m min",
"metric": "go_memstats_alloc_bytes",
"refId": "C",
"step": 20
}
],
"thresholds": [],
"timeFrom": null,
"timeShift": null,
"title": "Memory",
"tooltip": {
"msResolution": false,
"shared": true,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": []
},
"yaxes": [
{
"format": "bytes",
"label": null,
"logBase": 1,
"max": null,
"min": "0",
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
}
]
},
{
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "prometheus",
"editable": true,
"error": false,
"fill": 1,
"id": 7,
"legend": {
"avg": false,
"current": false,
"max": false,
"min": false,
"show": true,
"total": false,
"values": false
},
"lines": true,
"linewidth": 1,
"links": [],
"nullPointMode": "null",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"spaceLength": 10,
"span": 4,
"stack": false,
"steppedLine": false,
"targets": [
{
"expr": "rate(go_memstats_alloc_bytes_total{job=\"alertmanager\",instance=~\"$Alertmanager:[1-9][0-9]*\"}[1m])",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "Allocated Bytes/s",
"metric": "go_memstats_alloc_bytes",
"refId": "A",
"step": 20
}
],
"thresholds": [],
"timeFrom": null,
"timeShift": null,
"title": "Allocations",
"tooltip": {
"msResolution": false,
"shared": true,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": []
},
"yaxes": [
{
"format": "bytes",
"label": null,
"logBase": 1,
"max": null,
"min": "0",
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
}
]
},
{
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "prometheus",
"decimals": 2,
"editable": true,
"error": false,
"fill": 1,
"id": 9,
"legend": {
"alignAsTable": false,
"avg": false,
"current": false,
"hideEmpty": false,
"max": false,
"min": false,
"rightSide": false,
"show": true,
"total": false,
"values": false
},
"lines": true,
"linewidth": 1,
"links": [],
"nullPointMode": "null",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"spaceLength": 10,
"span": 4,
"stack": false,
"steppedLine": false,
"targets": [
{
"expr": "irate(process_cpu_seconds_total{job=\"alertmanager\",instance=~\"$Alertmanager:[1-9][0-9]*\"}[1m])",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "Irate",
"metric": "",
"refId": "A",
"step": 20
},
{
"expr": "rate(process_cpu_seconds_total{job=\"alertmanager\",instance=~\"$Alertmanager:[1-9][0-9]*\"}[5m])",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "5m rate",
"metric": "",
"refId": "B",
"step": 20
}
],
"thresholds": [],
"timeFrom": null,
"timeShift": null,
"title": "CPU",
"tooltip": {
"msResolution": false,
"shared": true,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": [
"avg"
]
},
"yaxes": [
{
"format": "none",
"label": null,
"logBase": 1,
"max": null,
"min": "0",
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
}
]
}
],
"repeat": null,
"repeatIteration": null,
"repeatRowId": null,
"showTitle": true,
"title": "Memory & CPU",
"titleSize": "h6"
}
],
"schemaVersion": 14,
"style": "dark",
"tags": [
"prometheus"
],
"templating": {
"list": [
{
"allValue": null,
"current": {},
"datasource": "prometheus",
"hide": 0,
"includeAll": false,
"label": "Alertmanager",
"multi": false,
"name": "Alertmanager",
"options": [],
"query": "query_result(up{job=\"alertmanager\"} == 1)",
"refresh": 1,
"refresh_on_load": true,
"regex": ".*instance=\"([^:]+):[1-9][0-9]*\".*",
"sort": 1,
"tagValuesQuery": null,
"tags": [],
"tagsQuery": null,
"type": "query",
"useTags": false
}
]
},
"time": {
"from": "now-1h",
"to": "now"
},
"timepicker": {
"refresh_intervals": [
"5s",
"10s",
"30s",
"1m",
"5m",
"15m",
"30m",
"1h",
"2h",
"1d"
],
"time_options": [
"5m",
"15m",
"1h",
"6h",
"12h",
"24h",
"2d",
"7d",
"30d"
]
},
"timezone": "browser",
"title": "Alertmanager",
"version": 1
}
{%- endraw %}