From 5e9fb1d347e3712fe2ac67e804ad6393d9b0727a Mon Sep 17 00:00:00 2001 From: Bartosz Kupidura Date: Fri, 26 Jan 2018 13:35:38 +0100 Subject: [PATCH] Allow to deploy prometheus as standalone service Now it is able to install proemtheus on host and use it to scrape data from other prometheuses. Prod-related: PROD-17350 Change-Id: I11d65920c0e9b685dead22a5551521fa15d13f25 --- README.rst | 11 ++++ metadata/service/relay/cluster.yml | 10 +++ metadata/service/relay/init.yml | 12 ++++ metadata/service/server/container.yml | 5 ++ metadata/service/server/standalone.yml | 37 +++++++++++ prometheus/files/relay/default | 10 +++ prometheus/files/relay/service | 18 ++++++ prometheus/files/server/1.7/alerts.yml | 24 +++---- prometheus/files/server/2.0/alerts.yml | 26 ++++---- prometheus/files/server/default | 5 ++ prometheus/files/server/prometheus.yml | 88 ++++++++++++++----------- prometheus/files/server/service | 18 ++++++ prometheus/init.sls | 8 ++- prometheus/map.jinja | 7 ++ prometheus/relay.sls | 53 +++++++++++++++ prometheus/server.sls | 90 ++++++++++++++++++++++---- 16 files changed, 347 insertions(+), 75 deletions(-) create mode 100644 metadata/service/relay/cluster.yml create mode 100644 metadata/service/relay/init.yml create mode 100644 metadata/service/server/standalone.yml create mode 100644 prometheus/files/relay/default create mode 100644 prometheus/files/relay/service create mode 100644 prometheus/files/server/default create mode 100644 prometheus/files/server/service create mode 100644 prometheus/relay.sls diff --git a/README.rst b/README.rst index 95be7cc..42ce26d 100644 --- a/README.rst +++ b/README.rst @@ -160,6 +160,17 @@ Configure pushgateway enabled: true external_port: 15012 +Install prometheus as service +------------------------------------- + +.. code-block:: yaml + +parameters: + prometheus: + server: + is_container: false + + Documentation and Bugs ====================== diff --git a/metadata/service/relay/cluster.yml b/metadata/service/relay/cluster.yml new file mode 100644 index 0000000..73c5e66 --- /dev/null +++ b/metadata/service/relay/cluster.yml @@ -0,0 +1,10 @@ +parameters: + prometheus: + relay: + backends: + - host: ${_param:cluster_node01_address} + port: ${_param:prometheus_server_bind_port} + - host: ${_param:cluster_node02_address} + port: ${_param:prometheus_server_bind_port} + - host: ${_param:cluster_node03_address} + port: ${_param:prometheus_server_bind_port} diff --git a/metadata/service/relay/init.yml b/metadata/service/relay/init.yml new file mode 100644 index 0000000..31608f8 --- /dev/null +++ b/metadata/service/relay/init.yml @@ -0,0 +1,12 @@ +applications: + - prometheus +classes: + - service.prometheus.support +parameters: + _param: + prometheus_relay_bind_port: 8080 + prometheus: + relay: + enabled: true + bind: + port: ${_param:prometheus_relay_bind_port} diff --git a/metadata/service/server/container.yml b/metadata/service/server/container.yml index 663092a..7f170b7 100644 --- a/metadata/service/server/container.yml +++ b/metadata/service/server/container.yml @@ -11,6 +11,11 @@ parameters: prometheus: server: enabled: true + is_container: true + use_grains: + target: true + recording: true + alert: true dir: config: /srv/volumes/prometheus/server config_in_container: /srv/prometheus diff --git a/metadata/service/server/standalone.yml b/metadata/service/server/standalone.yml new file mode 100644 index 0000000..40dbc7f --- /dev/null +++ b/metadata/service/server/standalone.yml @@ -0,0 +1,37 @@ +applications: + - prometheus +classes: + - service.prometheus.support +parameters: + _param: + prometheus_evaluation_interval: "15s" + prometheus_region_label: "region1" + prometheus_scrape_interval: "15s" + prometheus_storage_retention: "4320h" + prometheus_server_bind_port: 9090 + prometheus: + server: + enabled: true + is_container: false + use_grains: + target: false + recording: false + alert: false + dir: + config: /etc/prometheus + data: /var/lib/prometheus/data + bind: + port: ${_param:prometheus_server_bind_port} + address: 0.0.0.0 + storage: + local: + retention: ${_param:prometheus_storage_retention} + alertmanager: + notification_queue_capacity: 10000 + config: + global: + scrape_interval: ${_param:prometheus_scrape_interval} + scrape_timeout: "15s" + evaluation_interval: ${_param:prometheus_evaluation_interval} + external_labels: + region: ${_param:prometheus_region_label} diff --git a/prometheus/files/relay/default b/prometheus/files/relay/default new file mode 100644 index 0000000..953c4c0 --- /dev/null +++ b/prometheus/files/relay/default @@ -0,0 +1,10 @@ +{%- from "prometheus/map.jinja" import relay with context %} +{%- if relay.get("backends") %} + {%- set relay_backends = [] %} + {%- for backend in relay.backends %} + {%- set address = "%s:%d" | format(backend.host, backend.port) %} + {%- do relay_backends.append(address) %} + {%- endfor %} +PROMETHEUS_RELAY_BACKENDS={{ relay_backends | join(',') }} +{%- endif %} +PROMETHEUS_RELAY_BIND_PORT={{ relay.bind.port }} diff --git a/prometheus/files/relay/service b/prometheus/files/relay/service new file mode 100644 index 0000000..a17167f --- /dev/null +++ b/prometheus/files/relay/service @@ -0,0 +1,18 @@ +{%- from "prometheus/map.jinja" import relay with context %} +[Unit] +After=network.target + +[Service] +User=root +EnvironmentFile=-/etc/default/prometheus-relay +ExecStart=/usr/bin/prometheus-relay \ + -port=${PROMETHEUS_RELAY_BIND_PORT} \ +{%- if relay.get("backends") %} + -urls=${PROMETHEUS_RELAY_BACKENDS} +{%- endif %} +ExecReload=/bin/kill -HUP $MAINPID +Restart=on-failure +KillMode=control-group + +[Install] +WantedBy=multi-user.target diff --git a/prometheus/files/server/1.7/alerts.yml b/prometheus/files/server/1.7/alerts.yml index e0682c7..8610f93 100644 --- a/prometheus/files/server/1.7/alerts.yml +++ b/prometheus/files/server/1.7/alerts.yml @@ -1,14 +1,14 @@ -{%- from "prometheus/map.jinja" import server with context %} - {%- set alerts = {} %} {%- set recordings = {} %} -{%- for node_name, node_grains in salt['mine.get']('*', 'grains.items').iteritems() %} - {%- set server_grain = node_grains.get('prometheus', {}).get('server', {}) %} - {%- for recordingname, recording in server_grain.get('recording', {}).iteritems() %} - {%- do recordings.update({recordingname: recording}) %} +{%- if server.get('use_grains', {}).get('recording', True) %} + {%- for node_name, node_grains in salt['mine.get']('*', 'grains.items').iteritems() %} + {%- set server_grain = node_grains.get('prometheus', {}).get('server', {}) %} + {%- for recordingname, recording in server_grain.get('recording', {}).iteritems() %} + {%- do recordings.update({recordingname: recording}) %} + {%- endfor %} {%- endfor %} -{%- endfor %} +{%- endif %} {%- set recordings = salt['grains.filter_by']({'default': recordings}, merge=server.get('recording', {})) %} @@ -18,12 +18,14 @@ {%- endif %} {%- endfor %} -{%- for node_name, node_grains in salt['mine.get']('*', 'grains.items').iteritems() %} - {%- set server_grain = node_grains.get('prometheus', {}).get('server', {}) %} - {%- for alertname, alert in server_grain.get('alert', {}).iteritems() %} +{%- if server.get('use_grains', {}).get('alert', True) %} + {%- for node_name, node_grains in salt['mine.get']('*', 'grains.items').iteritems() %} + {%- set server_grain = node_grains.get('prometheus', {}).get('server', {}) %} + {%- for alertname, alert in server_grain.get('alert', {}).iteritems() %} {%- do alerts.update({alertname: alert}) %} + {%- endfor %} {%- endfor %} -{%- endfor %} +{%- endif %} {%- set alerts = salt['grains.filter_by']({'default': alerts}, merge=server.get('alert', {})) %} diff --git a/prometheus/files/server/2.0/alerts.yml b/prometheus/files/server/2.0/alerts.yml index 7ca88e0..ca38699 100644 --- a/prometheus/files/server/2.0/alerts.yml +++ b/prometheus/files/server/2.0/alerts.yml @@ -1,14 +1,14 @@ -{%- from "prometheus/map.jinja" import server with context %} - {%- set alerts = {} %} {%- set recordings = {} %} -{%- for node_name, node_grains in salt['mine.get']('*', 'grains.items').iteritems() %} - {%- set server_grain = node_grains.get('prometheus', {}).get('server', {}) %} - {%- for recordingname, recording in server_grain.get('recording', {}).iteritems() %} - {%- do recordings.update({recordingname: recording}) %} +{%- if server.get('use_grains', {}).get('recording', True) %} + {%- for node_name, node_grains in salt['mine.get']('*', 'grains.items').iteritems() %} + {%- set server_grain = node_grains.get('prometheus', {}).get('server', {}) %} + {%- for recordingname, recording in server_grain.get('recording', {}).iteritems() %} + {%- do recordings.update({recordingname: recording}) %} + {%- endfor %} {%- endfor %} -{%- endfor %} +{%- endif %} groups: {%- set recordings = salt['grains.filter_by']({'default': recordings}, merge=server.get('recording', {})) %} @@ -19,15 +19,19 @@ groups: - record: {{ recordingname }} expr: >- {{ recording.query | indent(6, False) }} + labels: + recording_label: {{ recording.get('label', server.get('recording_label', 'federation')) }} {%- endif %} {%- endfor %} -{%- for node_name, node_grains in salt['mine.get']('*', 'grains.items').iteritems() %} - {%- set server_grain = node_grains.get('prometheus', {}).get('server', {}) %} - {%- for alertname, alert in server_grain.get('alert', {}).iteritems() %} +{%- if server.get('use_grains', {}).get('alert', True) %} + {%- for node_name, node_grains in salt['mine.get']('*', 'grains.items').iteritems() %} + {%- set server_grain = node_grains.get('prometheus', {}).get('server', {}) %} + {%- for alertname, alert in server_grain.get('alert', {}).iteritems() %} {%- do alerts.update({alertname: alert}) %} + {%- endfor %} {%- endfor %} -{%- endfor %} +{%- endif %} {%- set alerts = salt['grains.filter_by']({'default': alerts}, merge=server.get('alert', {})) %} - name: alert.rules diff --git a/prometheus/files/server/default b/prometheus/files/server/default new file mode 100644 index 0000000..ee921da --- /dev/null +++ b/prometheus/files/server/default @@ -0,0 +1,5 @@ +{% from "prometheus/map.jinja" import server with context %} +PROMETHEUS_CONFIG_DIR={{ server.dir.config }} +PROMETHEUS_DATA_DIR={{ server.dir.data }} +PROMETHEUS_BIND_ADDRESS={{ server.bind.address }}:{{ server.bind.port }} +PROMETHEUS_STORAGE_LOCAL_RETENTION={{ server.storage.local.retention }} diff --git a/prometheus/files/server/prometheus.yml b/prometheus/files/server/prometheus.yml index 0223e35..57c3bee 100644 --- a/prometheus/files/server/prometheus.yml +++ b/prometheus/files/server/prometheus.yml @@ -1,5 +1,3 @@ -{%- from "prometheus/map.jinja" import server with context %} - global: {%- if server.get('config', {}).global is defined %} {{ server.config.global | yaml(False) | indent(2, true) }} @@ -38,39 +36,44 @@ rule_files: - alerts.yml {%- set static_target = server.target.static %} -{%- for node_name, node_grains in salt['mine.get']('*', 'grains.items').iteritems() %} - {%- set static_grain = node_grains.get('prometheus', {}).get('server', {}).get('target', {}).get('static', {}) %} - {%- for job_name, job in static_grain.iteritems() %} - {%- if static_target[job_name] is not defined %} - {%- do static_target.update({job_name: { - 'enabled': job.get('enabled', True), - 'metrics_path': job.get('metrics_path', '/metrics'), - 'scheme': job.get('scheme', 'http')} - }) %} - {%- if job.get('tls_config') %} - {%- do static_target[job_name].update({ - 'tls_config': { - 'skip_verify': job.get('tls_config', {}).get('skip_verify', False), - 'cert_name': job.get('tls_config', {}).get('cert_name'), - 'key_name': job.get('tls_config', {}).get('key_name') - } +{%- if server.get('use_grains', {}).get('target', True) %} + {%- for node_name, node_grains in salt['mine.get']('*', 'grains.items').iteritems() %} + {%- set static_grain = node_grains.get('prometheus', {}).get('server', {}).get('target', {}).get('static', {}) %} + {%- for job_name, job in static_grain.iteritems() %} + {%- if static_target[job_name] is not defined %} + {%- do static_target.update({job_name: { + 'enabled': job.get('enabled', True), + 'metrics_path': job.get('metrics_path', '/metrics'), + 'honor_labels': job.get('honor_labels', False), + 'scrape_interval': job.get('scrape_interval', server.get('config', {}).get('global', {}).get('scrape_interval', '15s')), + 'scrape_timeout': job.get('scrape_interval', server.get('config', {}).get('global', {}).get('scrape_interval', '15s')), + 'scheme': job.get('scheme', 'http')} }) %} + {%- if job.get('tls_config') %} + {%- do static_target[job_name].update({ + 'tls_config': { + 'skip_verify': job.get('tls_config', {}).get('skip_verify', False), + 'cert_name': job.get('tls_config', {}).get('cert_name'), + 'key_name': job.get('tls_config', {}).get('key_name') + } + }) %} + {%- endif %} {%- endif %} - {%- endif %} - {%- if static_target[job_name]['endpoint'] is not defined %} - {%- do static_target[job_name].update({'endpoint': []}) %} - {%- endif %} - {%- if static_target[job_name]['metric_relabel'] is not defined %} - {%- do static_target[job_name].update({'metric_relabel': []}) %} - {%- endif %} - {%- for target in job.get('endpoint', []) %} - {%- do static_target[job_name]['endpoint'].append(target) %} - {%- endfor %} - {%- for label in job.get('metric_relabel', []) %} - {%- do static_target[job_name]['metric_relabel'].append(label) %} + {%- if static_target[job_name]['endpoint'] is not defined %} + {%- do static_target[job_name].update({'endpoint': []}) %} + {%- endif %} + {%- if static_target[job_name]['metric_relabel'] is not defined %} + {%- do static_target[job_name].update({'metric_relabel': []}) %} + {%- endif %} + {%- for target in job.get('endpoint', []) %} + {%- do static_target[job_name]['endpoint'].append(target) %} + {%- endfor %} + {%- for label in job.get('metric_relabel', []) %} + {%- do static_target[job_name]['metric_relabel'].append(label) %} + {%- endfor %} {%- endfor %} {%- endfor %} -{%- endfor %} +{%- endif %} scrape_configs: {%- for job_name, job in static_target.iteritems() %} @@ -83,6 +86,15 @@ scrape_configs: - job_name: {{ job_name }} {% if job.get('scheme') %}scheme: {{ job.scheme }}{%- endif %} {% if job.get('metrics_path') %}metrics_path: {{ job.metrics_path }}{%- endif %} + {% if job.honor_labels is defined %}honor_labels: {{ job.honor_labels }}{%- endif %} + {% if job.scrape_interval is defined %}scrape_interval: {{ job.scrape_interval }}{%- endif %} + {% if job.scrape_timeout is defined %}scrape_timeout: {{ job.scrape_timeout }}{%- endif %} + {%- if job.get('params') %} + params: + {%- for param_name, param_value in job.get('params', {}).iteritems() %} + {{ param_name }}: {{ param_value }} + {%- endfor %} + {%- endif %} {%- if job.get('tls_config') %} tls_config: {% if job.tls_config.get('skip_verify') is defined %}insecure_skip_verify: {{ job.tls_config.skip_verify | lower }}{%- endif %} @@ -111,12 +123,14 @@ scrape_configs: {%- endfor %} {%- set kubernetes_target = {} %} -{%- for node_name, node_grains in salt['mine.get']('*', 'grains.items').iteritems() %} - {%- set kubernetes_grain = node_grains.get('prometheus', {}).get('server', {}).get('target', {}).get('kubernetes', {}) %} - {%- if kubernetes_grain %} - {%- do kubernetes_target.update(kubernetes_grain) %} - {%- endif %} -{%- endfor %} +{%- if server.get('use_grains', {}).get('target', True) %} + {%- for node_name, node_grains in salt['mine.get']('*', 'grains.items').iteritems() %} + {%- set kubernetes_grain = node_grains.get('prometheus', {}).get('server', {}).get('target', {}).get('kubernetes', {}) %} + {%- if kubernetes_grain %} + {%- do kubernetes_target.update(kubernetes_grain) %} + {%- endif %} + {%- endfor %} +{%- endif %} {%- if kubernetes_target.get('enabled', False) %} diff --git a/prometheus/files/server/service b/prometheus/files/server/service new file mode 100644 index 0000000..c1bc00e --- /dev/null +++ b/prometheus/files/server/service @@ -0,0 +1,18 @@ +[Unit] +Description=Prometheus Server +Documentation=https://prometheus.io/docs/introduction/overview/ +After=network-online.target + +[Service] +EnvironmentFile=-/etc/default/prometheus +User=root +Group=root +Restart=on-failure +ExecStart=/usr/bin/prometheus \ + --web.listen-address=${PROMETHEUS_BIND_ADDRESS} \ + --config.file=${PROMETHEUS_CONFIG_DIR}/prometheus.yml \ + --storage.tsdb.path=${PROMETHEUS_DATA_DIR} \ + --storage.tsdb.retention=${PROMETHEUS_STORAGE_LOCAL_RETENTION} + +[Install] +WantedBy=multi-user.target diff --git a/prometheus/init.sls b/prometheus/init.sls index cd31765..73d51ba 100644 --- a/prometheus/init.sls +++ b/prometheus/init.sls @@ -1,10 +1,14 @@ -{%- if pillar.prometheus.server is defined or +{%- if pillar.prometheus.get('server', {}).get('enabled', False) or + pillar.prometheus.get('relay', {}).get('enabled', False) or pillar.prometheus.alertmanager is defined or pillar.prometheus.exporters is defined %} include: - {%- if pillar.prometheus.server is defined %} + {%- if pillar.prometheus.get('server', {}).get('enabled', False) %} - prometheus.server {%- endif %} + {%- if pillar.prometheus.get('relay', {}).get('enabled', False) %} + - prometheus.relay + {%- endif %} {%- if pillar.prometheus.alertmanager is defined %} - prometheus.alertmanager {%- endif %} diff --git a/prometheus/map.jinja b/prometheus/map.jinja index d5c7d9b..8db8b45 100644 --- a/prometheus/map.jinja +++ b/prometheus/map.jinja @@ -1,6 +1,7 @@ {% set server = salt['grains.filter_by']({ 'default': { 'version': 2.0, + 'pkgs': ['prometheus-bin'], 'target': { 'static': { }, @@ -8,6 +9,12 @@ }, }, merge=salt['pillar.get']('prometheus:server')) %} +{% set relay = salt['grains.filter_by']({ + 'default': { + 'pkgs': ['prometheus-relay'] + }, +}, merge=salt['pillar.get']('prometheus:relay')) %} + {% set alertmanager = salt['grains.filter_by']({ 'default': { }, diff --git a/prometheus/relay.sls b/prometheus/relay.sls new file mode 100644 index 0000000..fe00db4 --- /dev/null +++ b/prometheus/relay.sls @@ -0,0 +1,53 @@ +{% from "prometheus/map.jinja" import relay with context %} +{%- if relay.enabled %} + +prometheus_relay_packages: + pkg.installed: + - names: {{ relay.pkgs }} + +prometheus_relay_default_file: + file.managed: + - name: /etc/default/prometheus-relay + - source: salt://prometheus/files/relay/default + - template: jinja + +{%- if grains.get('init') == 'systemd' %} + +prometheus_relay_systemd_config: + file.managed: + - name: /etc/systemd/system/prometheus-relay.service + - source: salt://prometheus/files/relay/service + - makedirs: true + - user: root + - group: root + - mode: 644 + - template: jinja + - require: + - file: prometheus_relay_default_file + +prometheus_relay_restart_systemd: + module.wait: + - name: service.systemctl_reload + - watch: + - file: prometheus_relay_systemd_config + - watch_in: + - service: prometheus_relay_service + +{%- endif %} + +prometheus_relay_service: + service.running: + - name: prometheus-relay + - enable: True + {%- if grains.get('noservices') %} + - onlyif: /bin/false + {%- endif %} + - watch: +{%- if grains.get('init') == 'systemd' %} + - file: prometheus_relay_systemd_config +{%- endif %} + - file: prometheus_relay_default_file + - require: + - pkg: prometheus_relay_packages + +{%- endif %} diff --git a/prometheus/server.sls b/prometheus/server.sls index 388ca1d..da4378c 100644 --- a/prometheus/server.sls +++ b/prometheus/server.sls @@ -1,30 +1,92 @@ {% from "prometheus/map.jinja" import server with context %} {%- if server.enabled %} -{%- if pillar.docker is defined and pillar.docker.host is defined %} - -{{server.dir.config}}: +prometheus_server_config_dir: file.directory: + - name: {{ server.dir.config }} - makedirs: True -{{server.dir.data}}: +prometheus_server_data_dir: file.directory: + - name: {{ server.dir.data }} - makedirs: True - mode: 755 -{{server.dir.config}}/prometheus.yml: +prometheus_server_config_file: file.managed: - - source: salt://prometheus/files/server/prometheus.yml - - template: jinja - - require: - - file: {{server.dir.config}} + - name: {{ server.dir.config }}/prometheus.yml + - source: salt://prometheus/files/server/prometheus.yml + - template: jinja + - defaults: + server: {{ server }} + - require: + - file: prometheus_server_config_dir -{{server.dir.config}}/alerts.yml: +prometheus_server_alerts_file: file.managed: - - source: salt://prometheus/files/server/{{server.version}}/alerts.yml - - template: jinja - - require: - - file: {{server.dir.config}} + - name: {{ server.dir.config }}/alerts.yml + - source: salt://prometheus/files/server/{{server.version}}/alerts.yml + - template: jinja + - defaults: + server: {{ server }} + - require: + - file: prometheus_server_config_dir + +{%- if not server.get('is_container', True) %} + +prometheus_server_packages: + pkg.installed: + - names: {{ server.pkgs }} + +prometheus_server_default_file: + file.managed: + - name: /etc/default/prometheus + - source: salt://prometheus/files/server/default + - template: jinja + - defaults: + server: {{ server }} + +{%- if grains.get('init') == 'systemd' %} + +prometheus_server_systemd_config: + file.managed: + - name: /etc/systemd/system/prometheus.service + - source: salt://prometheus/files/server/service + - makedirs: true + - user: root + - group: root + - mode: 644 + - template: jinja + - require: + - file: prometheus_server_default_file + +prometheus_server_restart_systemd: + module.wait: + - name: service.systemctl_reload + - watch: + - file: prometheus_server_systemd_config + - watch_in: + - service: prometheus_server_service {%- endif %} + +prometheus_server_service: + service.running: + - name: prometheus + - enable: True + {%- if grains.get('noservices') %} + - onlyif: /bin/false + {%- endif %} + - watch: +{%- if grains.get('init') == 'systemd' %} + - file: prometheus_server_systemd_config +{%- endif %} + - file: prometheus_server_config_file + - file: prometheus_server_alerts_file + - require: + - file: prometheus_server_data_dir + - pkg: prometheus_server_packages + +{%- endif %} + {%- endif %}