From cd90c9f842ef2e98319d77c684f6c16f001c733c Mon Sep 17 00:00:00 2001 From: Simon Pasquier Date: Tue, 12 Sep 2017 15:14:21 +0200 Subject: [PATCH] Trigger the target down alert after 2 minutes Otherwise the alert fires as soon as Prometheus can't scrape a target. It is too aggressive in case of transient connectivity issues or endpoint restart. Change-Id: Ib3de5b141db7a7f2397bf332844a9c44d38f2d3c --- prometheus/meta/prometheus.yml | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/prometheus/meta/prometheus.yml b/prometheus/meta/prometheus.yml index 1ef4d26..07d76bd 100644 --- a/prometheus/meta/prometheus.yml +++ b/prometheus/meta/prometheus.yml @@ -6,12 +6,13 @@ server: {% raw %} PrometheusTargetDown: if: 'up != 1' + for: 2m labels: severity: critical service: prometheus annotations: - summary: 'Prometheus endpoint {{ $labels.instance }} is down' - description: 'Prometheus endpoint {{ $labels.instance }} is down for job {{ $labels.job }}' + summary: 'Prometheus endpoint {{ $labels.instance }} down' + description: 'The Prometheus target {{ $labels.instance }} is down for the job {{ $labels.job }}.' {% endraw %} {%- endif %} {%- if remote_storage_adapter.get('enabled', False) %}