diff --git a/metadata/service/support.yml b/metadata/service/support.yml index 76476fd..df2b7c3 100644 --- a/metadata/service/support.yml +++ b/metadata/service/support.yml @@ -1,5 +1,7 @@ parameters: prometheus: _support: + prometheus: + enabled: true grafana: enabled: true diff --git a/prometheus/collector.sls b/prometheus/collector.sls new file mode 100644 index 0000000..9b7946b --- /dev/null +++ b/prometheus/collector.sls @@ -0,0 +1,29 @@ +{%- set service_grains = {'prometheus': {'server': {'alert': {}, 'recording': []}}} %} +{%- for service_name, service in pillar.items() %} + {%- if service.get('_support', {}).get('prometheus', {}).get('enabled', False) %} + {%- set grains_fragment_file = service_name+'/meta/prometheus.yml' %} + {%- macro load_grains_file() %}{% include grains_fragment_file ignore missing %}{% endmacro %} + {%- set grains_yaml = load_grains_file()|load_yaml %} + {%- if grains_yaml is mapping %} + {%- set service_grains = salt['grains.filter_by']({'default': service_grains}, merge={'prometheus': grains_yaml}) %} + {%- endif %} + {%- endif %} +{%- endfor %} + +prometheus_grains_dir: + file.directory: + - name: /etc/salt/grains.d + - mode: 700 + - makedirs: true + - user: root + +prometheus_grain: + file.managed: + - name: /etc/salt/grains.d/prometheus + - source: salt://prometheus/files/prometheus.grain + - template: jinja + - mode: 600 + - defaults: + service_grains: {{ service_grains|yaml }} + - require: + - file: prometheus_grains_dir diff --git a/prometheus/files/alerts.yml b/prometheus/files/alerts.yml index 68c1c88..d9ebc7e 100644 --- a/prometheus/files/alerts.yml +++ b/prometheus/files/alerts.yml @@ -1,13 +1,44 @@ {%- from "prometheus/map.jinja" import server with context %} -{%- if server.recording is defined %} -{%- for recording_rule in server.recording %} -{{ recording_rule.name }} = {{ recording_rule.query }} -{%- endfor %} -{%- endif %} +{%- set alerts = {} %} +{%- set recordings = {} %} -{%- if server.alert is defined %} -{%- for alertname, alert in server.alert.iteritems() %} + +{%- for recording_rule in server.get('recording', []) %} + {%- if recording_rule.name not in recordings %} + {%- do recordings.update({recording_rule.name: recording_rule.query}) %} + {%- endif %} +{%- endfor %} + +{%- for node_name, node_grains in salt['mine.get']('*', 'grains.items').iteritems() %} + {%- set server_grain = node_grains.get('prometheus', {}).get('server', {}) %} + {%- for recording_rule in server_grain.get('recording', []) %} + {%- if recording_rule.name not in recordings %} + {%- do recordings.update({recording_rule.name: recording_rule.query}) %} + {%- endif %} + {%- endfor %} +{%- endfor %} + +{%- for recording_name, query in recordings.iteritems() %} +{{ recording_name }} = {{ query }} +{%- endfor %} + +{%- for alertname, alert in server.get('alert', {}).iteritems() %} + {%- if alertname not in alerts %} + {%- do alerts.update({alertname: alert}) %} + {%- endif %} +{%- endfor %} + +{%- for node_name, node_grains in salt['mine.get']('*', 'grains.items').iteritems() %} + {%- set server_grain = node_grains.get('prometheus', {}).get('server', {}) %} + {%- for alertname, alert in server_grain.get('alert', {}).iteritems() %} + {%- if alertname not in alerts %} + {%- do alerts.update({alertname: alert}) %} + {%- endif %} + {%- endfor %} +{%- endfor %} + +{%- for alertname, alert in alerts.iteritems() %} ALERT {{ alertname }} IF {{ alert.if }} {%- if alert.for is defined %}FOR {{ alert.for }}{%- endif %} @@ -26,4 +57,3 @@ ALERT {{ alertname }} } {%- endif %} {%- endfor %} -{%- endif %} diff --git a/prometheus/files/prometheus.grain b/prometheus/files/prometheus.grain new file mode 100644 index 0000000..3e3b373 --- /dev/null +++ b/prometheus/files/prometheus.grain @@ -0,0 +1 @@ +{{ service_grains|yaml(False) }} diff --git a/prometheus/meta/prometheus.yml b/prometheus/meta/prometheus.yml new file mode 100644 index 0000000..b395c17 --- /dev/null +++ b/prometheus/meta/prometheus.yml @@ -0,0 +1,12 @@ +{% raw %} +server: + alert: + PrometheusUP: + if: 'up != 0' + labels: + severity: critical + service: prometheus + annotations: + summary: 'Prometheus endpoint {{ $labels.instance }} is down' + description: 'Prometheus endpoint {{ $labels.instance }} is down for job {{ $labels.job }}' +{% endraw %}