Trigger the target down alert after 2 minutes
Otherwise the alert fires as soon as Prometheus can't scrape a target. It is too aggressive in case of transient connectivity issues or endpoint restart. Change-Id: Ib3de5b141db7a7f2397bf332844a9c44d38f2d3c
This commit is contained in:
parent
af5d9a9503
commit
cd90c9f842
1 changed files with 3 additions and 2 deletions
|
@ -6,12 +6,13 @@ server:
|
||||||
{% raw %}
|
{% raw %}
|
||||||
PrometheusTargetDown:
|
PrometheusTargetDown:
|
||||||
if: 'up != 1'
|
if: 'up != 1'
|
||||||
|
for: 2m
|
||||||
labels:
|
labels:
|
||||||
severity: critical
|
severity: critical
|
||||||
service: prometheus
|
service: prometheus
|
||||||
annotations:
|
annotations:
|
||||||
summary: 'Prometheus endpoint {{ $labels.instance }} is down'
|
summary: 'Prometheus endpoint {{ $labels.instance }} down'
|
||||||
description: 'Prometheus endpoint {{ $labels.instance }} is down for job {{ $labels.job }}'
|
description: 'The Prometheus target {{ $labels.instance }} is down for the job {{ $labels.job }}.'
|
||||||
{% endraw %}
|
{% endraw %}
|
||||||
{%- endif %}
|
{%- endif %}
|
||||||
{%- if remote_storage_adapter.get('enabled', False) %}
|
{%- if remote_storage_adapter.get('enabled', False) %}
|
||||||
|
|
Loading…
Reference in a new issue