{%- if pillar.prometheus is defined %} {%- from "prometheus/map.jinja" import server, remote_storage_adapter, monitoring with context %} server: alert: {%- if server.get('enabled', False) %} {% raw %} PrometheusTargetDown: if: 'up != 1' for: 2m labels: severity: critical service: prometheus annotations: summary: 'Prometheus endpoint {{ $labels.instance }} down' description: 'The Prometheus target {{ $labels.instance }} is down for the job {{ $labels.job }}.' {% endraw %} {%- endif %} {%- if remote_storage_adapter.get('enabled', False) %} RemoteStorageAdapterSendingTooSlow: {%- set threshold = monitoring.remote_storage_adapter.sent_vs_received_ratio|float %} if: >- 100.0 - (100.0 * sent_samples_total{job="remote_storage_adapter"} / on (job, instance) received_samples_total) > {{ threshold }} {% raw %} labels: severity: warning service: remote_storage_adapter annotations: summary: 'Remote storage adapter too slow on {{ $labels.instance }}' description: 'Remote storage adapter can not ingest samples fast enough on {{ $labels.instance }} (current value={{ $value }}%, threshold={%- endraw %}{{ threshold }}%).' RemoteStorageAdapterIgnoredTooHigh: {%- set threshold = monitoring.remote_storage_adapter.ignored_vs_sent_ratio|float %} if: >- 100.0 * prometheus_influxdb_ignored_samples_total{job="remote_storage_adapter"} / on (job, instance) sent_samples_total > {{ threshold }} {% raw %} labels: severity: warning service: remote_storage_adapter annotations: summary: 'Remote storage adapter receiving too many invalid metrics on {{ $labels.instance }}' description: 'Remote storage adapter is receiving too many invalid metrics on {{ $labels.instance }} (current value={{ $value }}%, threshold={%- endraw %}{{ threshold }}%).' {%- endif %} {%- endif %}