Allow to deploy prometheus as standalone service

Now it is able to install proemtheus on host and
use it to scrape data from other prometheuses.

Prod-related: PROD-17350

Change-Id: I11d65920c0e9b685dead22a5551521fa15d13f25
This commit is contained in:
Bartosz Kupidura 2018-01-26 13:35:38 +01:00 committed by Dmitry Kalashnik
parent ce1daf8670
commit 5e9fb1d347
16 changed files with 347 additions and 75 deletions

View file

@ -160,6 +160,17 @@ Configure pushgateway
enabled: true
external_port: 15012
Install prometheus as service
-------------------------------------
.. code-block:: yaml
parameters:
prometheus:
server:
is_container: false
Documentation and Bugs
======================

View file

@ -0,0 +1,10 @@
parameters:
prometheus:
relay:
backends:
- host: ${_param:cluster_node01_address}
port: ${_param:prometheus_server_bind_port}
- host: ${_param:cluster_node02_address}
port: ${_param:prometheus_server_bind_port}
- host: ${_param:cluster_node03_address}
port: ${_param:prometheus_server_bind_port}

View file

@ -0,0 +1,12 @@
applications:
- prometheus
classes:
- service.prometheus.support
parameters:
_param:
prometheus_relay_bind_port: 8080
prometheus:
relay:
enabled: true
bind:
port: ${_param:prometheus_relay_bind_port}

View file

@ -11,6 +11,11 @@ parameters:
prometheus:
server:
enabled: true
is_container: true
use_grains:
target: true
recording: true
alert: true
dir:
config: /srv/volumes/prometheus/server
config_in_container: /srv/prometheus

View file

@ -0,0 +1,37 @@
applications:
- prometheus
classes:
- service.prometheus.support
parameters:
_param:
prometheus_evaluation_interval: "15s"
prometheus_region_label: "region1"
prometheus_scrape_interval: "15s"
prometheus_storage_retention: "4320h"
prometheus_server_bind_port: 9090
prometheus:
server:
enabled: true
is_container: false
use_grains:
target: false
recording: false
alert: false
dir:
config: /etc/prometheus
data: /var/lib/prometheus/data
bind:
port: ${_param:prometheus_server_bind_port}
address: 0.0.0.0
storage:
local:
retention: ${_param:prometheus_storage_retention}
alertmanager:
notification_queue_capacity: 10000
config:
global:
scrape_interval: ${_param:prometheus_scrape_interval}
scrape_timeout: "15s"
evaluation_interval: ${_param:prometheus_evaluation_interval}
external_labels:
region: ${_param:prometheus_region_label}

View file

@ -0,0 +1,10 @@
{%- from "prometheus/map.jinja" import relay with context %}
{%- if relay.get("backends") %}
{%- set relay_backends = [] %}
{%- for backend in relay.backends %}
{%- set address = "%s:%d" | format(backend.host, backend.port) %}
{%- do relay_backends.append(address) %}
{%- endfor %}
PROMETHEUS_RELAY_BACKENDS={{ relay_backends | join(',') }}
{%- endif %}
PROMETHEUS_RELAY_BIND_PORT={{ relay.bind.port }}

View file

@ -0,0 +1,18 @@
{%- from "prometheus/map.jinja" import relay with context %}
[Unit]
After=network.target
[Service]
User=root
EnvironmentFile=-/etc/default/prometheus-relay
ExecStart=/usr/bin/prometheus-relay \
-port=${PROMETHEUS_RELAY_BIND_PORT} \
{%- if relay.get("backends") %}
-urls=${PROMETHEUS_RELAY_BACKENDS}
{%- endif %}
ExecReload=/bin/kill -HUP $MAINPID
Restart=on-failure
KillMode=control-group
[Install]
WantedBy=multi-user.target

View file

@ -1,14 +1,14 @@
{%- from "prometheus/map.jinja" import server with context %}
{%- set alerts = {} %}
{%- set recordings = {} %}
{%- for node_name, node_grains in salt['mine.get']('*', 'grains.items').iteritems() %}
{%- set server_grain = node_grains.get('prometheus', {}).get('server', {}) %}
{%- for recordingname, recording in server_grain.get('recording', {}).iteritems() %}
{%- do recordings.update({recordingname: recording}) %}
{%- if server.get('use_grains', {}).get('recording', True) %}
{%- for node_name, node_grains in salt['mine.get']('*', 'grains.items').iteritems() %}
{%- set server_grain = node_grains.get('prometheus', {}).get('server', {}) %}
{%- for recordingname, recording in server_grain.get('recording', {}).iteritems() %}
{%- do recordings.update({recordingname: recording}) %}
{%- endfor %}
{%- endfor %}
{%- endfor %}
{%- endif %}
{%- set recordings = salt['grains.filter_by']({'default': recordings}, merge=server.get('recording', {})) %}
@ -18,12 +18,14 @@
{%- endif %}
{%- endfor %}
{%- for node_name, node_grains in salt['mine.get']('*', 'grains.items').iteritems() %}
{%- set server_grain = node_grains.get('prometheus', {}).get('server', {}) %}
{%- for alertname, alert in server_grain.get('alert', {}).iteritems() %}
{%- if server.get('use_grains', {}).get('alert', True) %}
{%- for node_name, node_grains in salt['mine.get']('*', 'grains.items').iteritems() %}
{%- set server_grain = node_grains.get('prometheus', {}).get('server', {}) %}
{%- for alertname, alert in server_grain.get('alert', {}).iteritems() %}
{%- do alerts.update({alertname: alert}) %}
{%- endfor %}
{%- endfor %}
{%- endfor %}
{%- endif %}
{%- set alerts = salt['grains.filter_by']({'default': alerts}, merge=server.get('alert', {})) %}

View file

@ -1,14 +1,14 @@
{%- from "prometheus/map.jinja" import server with context %}
{%- set alerts = {} %}
{%- set recordings = {} %}
{%- for node_name, node_grains in salt['mine.get']('*', 'grains.items').iteritems() %}
{%- set server_grain = node_grains.get('prometheus', {}).get('server', {}) %}
{%- for recordingname, recording in server_grain.get('recording', {}).iteritems() %}
{%- do recordings.update({recordingname: recording}) %}
{%- if server.get('use_grains', {}).get('recording', True) %}
{%- for node_name, node_grains in salt['mine.get']('*', 'grains.items').iteritems() %}
{%- set server_grain = node_grains.get('prometheus', {}).get('server', {}) %}
{%- for recordingname, recording in server_grain.get('recording', {}).iteritems() %}
{%- do recordings.update({recordingname: recording}) %}
{%- endfor %}
{%- endfor %}
{%- endfor %}
{%- endif %}
groups:
{%- set recordings = salt['grains.filter_by']({'default': recordings}, merge=server.get('recording', {})) %}
@ -19,15 +19,19 @@ groups:
- record: {{ recordingname }}
expr: >-
{{ recording.query | indent(6, False) }}
labels:
recording_label: {{ recording.get('label', server.get('recording_label', 'federation')) }}
{%- endif %}
{%- endfor %}
{%- for node_name, node_grains in salt['mine.get']('*', 'grains.items').iteritems() %}
{%- set server_grain = node_grains.get('prometheus', {}).get('server', {}) %}
{%- for alertname, alert in server_grain.get('alert', {}).iteritems() %}
{%- if server.get('use_grains', {}).get('alert', True) %}
{%- for node_name, node_grains in salt['mine.get']('*', 'grains.items').iteritems() %}
{%- set server_grain = node_grains.get('prometheus', {}).get('server', {}) %}
{%- for alertname, alert in server_grain.get('alert', {}).iteritems() %}
{%- do alerts.update({alertname: alert}) %}
{%- endfor %}
{%- endfor %}
{%- endfor %}
{%- endif %}
{%- set alerts = salt['grains.filter_by']({'default': alerts}, merge=server.get('alert', {})) %}
- name: alert.rules

View file

@ -0,0 +1,5 @@
{% from "prometheus/map.jinja" import server with context %}
PROMETHEUS_CONFIG_DIR={{ server.dir.config }}
PROMETHEUS_DATA_DIR={{ server.dir.data }}
PROMETHEUS_BIND_ADDRESS={{ server.bind.address }}:{{ server.bind.port }}
PROMETHEUS_STORAGE_LOCAL_RETENTION={{ server.storage.local.retention }}

View file

@ -1,5 +1,3 @@
{%- from "prometheus/map.jinja" import server with context %}
global:
{%- if server.get('config', {}).global is defined %}
{{ server.config.global | yaml(False) | indent(2, true) }}
@ -38,39 +36,44 @@ rule_files:
- alerts.yml
{%- set static_target = server.target.static %}
{%- for node_name, node_grains in salt['mine.get']('*', 'grains.items').iteritems() %}
{%- set static_grain = node_grains.get('prometheus', {}).get('server', {}).get('target', {}).get('static', {}) %}
{%- for job_name, job in static_grain.iteritems() %}
{%- if static_target[job_name] is not defined %}
{%- do static_target.update({job_name: {
'enabled': job.get('enabled', True),
'metrics_path': job.get('metrics_path', '/metrics'),
'scheme': job.get('scheme', 'http')}
}) %}
{%- if job.get('tls_config') %}
{%- do static_target[job_name].update({
'tls_config': {
'skip_verify': job.get('tls_config', {}).get('skip_verify', False),
'cert_name': job.get('tls_config', {}).get('cert_name'),
'key_name': job.get('tls_config', {}).get('key_name')
}
{%- if server.get('use_grains', {}).get('target', True) %}
{%- for node_name, node_grains in salt['mine.get']('*', 'grains.items').iteritems() %}
{%- set static_grain = node_grains.get('prometheus', {}).get('server', {}).get('target', {}).get('static', {}) %}
{%- for job_name, job in static_grain.iteritems() %}
{%- if static_target[job_name] is not defined %}
{%- do static_target.update({job_name: {
'enabled': job.get('enabled', True),
'metrics_path': job.get('metrics_path', '/metrics'),
'honor_labels': job.get('honor_labels', False),
'scrape_interval': job.get('scrape_interval', server.get('config', {}).get('global', {}).get('scrape_interval', '15s')),
'scrape_timeout': job.get('scrape_interval', server.get('config', {}).get('global', {}).get('scrape_interval', '15s')),
'scheme': job.get('scheme', 'http')}
}) %}
{%- if job.get('tls_config') %}
{%- do static_target[job_name].update({
'tls_config': {
'skip_verify': job.get('tls_config', {}).get('skip_verify', False),
'cert_name': job.get('tls_config', {}).get('cert_name'),
'key_name': job.get('tls_config', {}).get('key_name')
}
}) %}
{%- endif %}
{%- endif %}
{%- endif %}
{%- if static_target[job_name]['endpoint'] is not defined %}
{%- do static_target[job_name].update({'endpoint': []}) %}
{%- endif %}
{%- if static_target[job_name]['metric_relabel'] is not defined %}
{%- do static_target[job_name].update({'metric_relabel': []}) %}
{%- endif %}
{%- for target in job.get('endpoint', []) %}
{%- do static_target[job_name]['endpoint'].append(target) %}
{%- endfor %}
{%- for label in job.get('metric_relabel', []) %}
{%- do static_target[job_name]['metric_relabel'].append(label) %}
{%- if static_target[job_name]['endpoint'] is not defined %}
{%- do static_target[job_name].update({'endpoint': []}) %}
{%- endif %}
{%- if static_target[job_name]['metric_relabel'] is not defined %}
{%- do static_target[job_name].update({'metric_relabel': []}) %}
{%- endif %}
{%- for target in job.get('endpoint', []) %}
{%- do static_target[job_name]['endpoint'].append(target) %}
{%- endfor %}
{%- for label in job.get('metric_relabel', []) %}
{%- do static_target[job_name]['metric_relabel'].append(label) %}
{%- endfor %}
{%- endfor %}
{%- endfor %}
{%- endfor %}
{%- endif %}
scrape_configs:
{%- for job_name, job in static_target.iteritems() %}
@ -83,6 +86,15 @@ scrape_configs:
- job_name: {{ job_name }}
{% if job.get('scheme') %}scheme: {{ job.scheme }}{%- endif %}
{% if job.get('metrics_path') %}metrics_path: {{ job.metrics_path }}{%- endif %}
{% if job.honor_labels is defined %}honor_labels: {{ job.honor_labels }}{%- endif %}
{% if job.scrape_interval is defined %}scrape_interval: {{ job.scrape_interval }}{%- endif %}
{% if job.scrape_timeout is defined %}scrape_timeout: {{ job.scrape_timeout }}{%- endif %}
{%- if job.get('params') %}
params:
{%- for param_name, param_value in job.get('params', {}).iteritems() %}
{{ param_name }}: {{ param_value }}
{%- endfor %}
{%- endif %}
{%- if job.get('tls_config') %}
tls_config:
{% if job.tls_config.get('skip_verify') is defined %}insecure_skip_verify: {{ job.tls_config.skip_verify | lower }}{%- endif %}
@ -111,12 +123,14 @@ scrape_configs:
{%- endfor %}
{%- set kubernetes_target = {} %}
{%- for node_name, node_grains in salt['mine.get']('*', 'grains.items').iteritems() %}
{%- set kubernetes_grain = node_grains.get('prometheus', {}).get('server', {}).get('target', {}).get('kubernetes', {}) %}
{%- if kubernetes_grain %}
{%- do kubernetes_target.update(kubernetes_grain) %}
{%- endif %}
{%- endfor %}
{%- if server.get('use_grains', {}).get('target', True) %}
{%- for node_name, node_grains in salt['mine.get']('*', 'grains.items').iteritems() %}
{%- set kubernetes_grain = node_grains.get('prometheus', {}).get('server', {}).get('target', {}).get('kubernetes', {}) %}
{%- if kubernetes_grain %}
{%- do kubernetes_target.update(kubernetes_grain) %}
{%- endif %}
{%- endfor %}
{%- endif %}
{%- if kubernetes_target.get('enabled', False) %}

View file

@ -0,0 +1,18 @@
[Unit]
Description=Prometheus Server
Documentation=https://prometheus.io/docs/introduction/overview/
After=network-online.target
[Service]
EnvironmentFile=-/etc/default/prometheus
User=root
Group=root
Restart=on-failure
ExecStart=/usr/bin/prometheus \
--web.listen-address=${PROMETHEUS_BIND_ADDRESS} \
--config.file=${PROMETHEUS_CONFIG_DIR}/prometheus.yml \
--storage.tsdb.path=${PROMETHEUS_DATA_DIR} \
--storage.tsdb.retention=${PROMETHEUS_STORAGE_LOCAL_RETENTION}
[Install]
WantedBy=multi-user.target

View file

@ -1,10 +1,14 @@
{%- if pillar.prometheus.server is defined or
{%- if pillar.prometheus.get('server', {}).get('enabled', False) or
pillar.prometheus.get('relay', {}).get('enabled', False) or
pillar.prometheus.alertmanager is defined or
pillar.prometheus.exporters is defined %}
include:
{%- if pillar.prometheus.server is defined %}
{%- if pillar.prometheus.get('server', {}).get('enabled', False) %}
- prometheus.server
{%- endif %}
{%- if pillar.prometheus.get('relay', {}).get('enabled', False) %}
- prometheus.relay
{%- endif %}
{%- if pillar.prometheus.alertmanager is defined %}
- prometheus.alertmanager
{%- endif %}

View file

@ -1,6 +1,7 @@
{% set server = salt['grains.filter_by']({
'default': {
'version': 2.0,
'pkgs': ['prometheus-bin'],
'target': {
'static': {
},
@ -8,6 +9,12 @@
},
}, merge=salt['pillar.get']('prometheus:server')) %}
{% set relay = salt['grains.filter_by']({
'default': {
'pkgs': ['prometheus-relay']
},
}, merge=salt['pillar.get']('prometheus:relay')) %}
{% set alertmanager = salt['grains.filter_by']({
'default': {
},

53
prometheus/relay.sls Normal file
View file

@ -0,0 +1,53 @@
{% from "prometheus/map.jinja" import relay with context %}
{%- if relay.enabled %}
prometheus_relay_packages:
pkg.installed:
- names: {{ relay.pkgs }}
prometheus_relay_default_file:
file.managed:
- name: /etc/default/prometheus-relay
- source: salt://prometheus/files/relay/default
- template: jinja
{%- if grains.get('init') == 'systemd' %}
prometheus_relay_systemd_config:
file.managed:
- name: /etc/systemd/system/prometheus-relay.service
- source: salt://prometheus/files/relay/service
- makedirs: true
- user: root
- group: root
- mode: 644
- template: jinja
- require:
- file: prometheus_relay_default_file
prometheus_relay_restart_systemd:
module.wait:
- name: service.systemctl_reload
- watch:
- file: prometheus_relay_systemd_config
- watch_in:
- service: prometheus_relay_service
{%- endif %}
prometheus_relay_service:
service.running:
- name: prometheus-relay
- enable: True
{%- if grains.get('noservices') %}
- onlyif: /bin/false
{%- endif %}
- watch:
{%- if grains.get('init') == 'systemd' %}
- file: prometheus_relay_systemd_config
{%- endif %}
- file: prometheus_relay_default_file
- require:
- pkg: prometheus_relay_packages
{%- endif %}

View file

@ -1,30 +1,92 @@
{% from "prometheus/map.jinja" import server with context %}
{%- if server.enabled %}
{%- if pillar.docker is defined and pillar.docker.host is defined %}
{{server.dir.config}}:
prometheus_server_config_dir:
file.directory:
- name: {{ server.dir.config }}
- makedirs: True
{{server.dir.data}}:
prometheus_server_data_dir:
file.directory:
- name: {{ server.dir.data }}
- makedirs: True
- mode: 755
{{server.dir.config}}/prometheus.yml:
prometheus_server_config_file:
file.managed:
- source: salt://prometheus/files/server/prometheus.yml
- template: jinja
- require:
- file: {{server.dir.config}}
- name: {{ server.dir.config }}/prometheus.yml
- source: salt://prometheus/files/server/prometheus.yml
- template: jinja
- defaults:
server: {{ server }}
- require:
- file: prometheus_server_config_dir
{{server.dir.config}}/alerts.yml:
prometheus_server_alerts_file:
file.managed:
- source: salt://prometheus/files/server/{{server.version}}/alerts.yml
- template: jinja
- require:
- file: {{server.dir.config}}
- name: {{ server.dir.config }}/alerts.yml
- source: salt://prometheus/files/server/{{server.version}}/alerts.yml
- template: jinja
- defaults:
server: {{ server }}
- require:
- file: prometheus_server_config_dir
{%- if not server.get('is_container', True) %}
prometheus_server_packages:
pkg.installed:
- names: {{ server.pkgs }}
prometheus_server_default_file:
file.managed:
- name: /etc/default/prometheus
- source: salt://prometheus/files/server/default
- template: jinja
- defaults:
server: {{ server }}
{%- if grains.get('init') == 'systemd' %}
prometheus_server_systemd_config:
file.managed:
- name: /etc/systemd/system/prometheus.service
- source: salt://prometheus/files/server/service
- makedirs: true
- user: root
- group: root
- mode: 644
- template: jinja
- require:
- file: prometheus_server_default_file
prometheus_server_restart_systemd:
module.wait:
- name: service.systemctl_reload
- watch:
- file: prometheus_server_systemd_config
- watch_in:
- service: prometheus_server_service
{%- endif %}
prometheus_server_service:
service.running:
- name: prometheus
- enable: True
{%- if grains.get('noservices') %}
- onlyif: /bin/false
{%- endif %}
- watch:
{%- if grains.get('init') == 'systemd' %}
- file: prometheus_server_systemd_config
{%- endif %}
- file: prometheus_server_config_file
- file: prometheus_server_alerts_file
- require:
- file: prometheus_server_data_dir
- pkg: prometheus_server_packages
{%- endif %}
{%- endif %}