From 3de29f6d5a3017b57c553c5e2fb63a50994df840 Mon Sep 17 00:00:00 2001 From: Mangirdas Date: Sat, 27 Jan 2018 08:05:31 +0000 Subject: Rebase Prometheus example for new scrape endpoints and expose alert manager --- .../openshift-prometheus/private/uninstall.yml | 8 + playbooks/openshift-prometheus/uninstall.yml | 2 + roles/openshift_prometheus/defaults/main.yaml | 15 ++ roles/openshift_prometheus/tasks/facts.yaml | 10 ++ .../tasks/install_prometheus.yaml | 119 +++++++------- roles/openshift_prometheus/tasks/main.yaml | 4 +- roles/openshift_prometheus/tasks/uninstall.yaml | 7 + .../tasks/uninstall_prometheus.yaml | 7 - roles/openshift_prometheus/templates/prometheus.j2 | 92 ++++++++--- .../templates/prometheus.yml.j2 | 175 ++++++++++++++------- 10 files changed, 290 insertions(+), 149 deletions(-) create mode 100644 playbooks/openshift-prometheus/private/uninstall.yml create mode 100644 playbooks/openshift-prometheus/uninstall.yml create mode 100644 roles/openshift_prometheus/tasks/facts.yaml create mode 100644 roles/openshift_prometheus/tasks/uninstall.yaml delete mode 100644 roles/openshift_prometheus/tasks/uninstall_prometheus.yaml diff --git a/playbooks/openshift-prometheus/private/uninstall.yml b/playbooks/openshift-prometheus/private/uninstall.yml new file mode 100644 index 000000000..2df39c2a8 --- /dev/null +++ b/playbooks/openshift-prometheus/private/uninstall.yml @@ -0,0 +1,8 @@ +--- +- name: Uninstall Prometheus + hosts: masters[0] + tasks: + - name: Run the Prometheus Uninstall Role Tasks + include_role: + name: openshift_prometheus + tasks_from: uninstall diff --git a/playbooks/openshift-prometheus/uninstall.yml b/playbooks/openshift-prometheus/uninstall.yml new file mode 100644 index 000000000..c92ade786 --- /dev/null +++ b/playbooks/openshift-prometheus/uninstall.yml @@ -0,0 +1,2 @@ +--- +- import_playbook: private/uninstall.yml diff --git a/roles/openshift_prometheus/defaults/main.yaml b/roles/openshift_prometheus/defaults/main.yaml index 1b21c4739..37a05f3f0 100644 --- a/roles/openshift_prometheus/defaults/main.yaml +++ b/roles/openshift_prometheus/defaults/main.yaml @@ -7,9 +7,24 @@ openshift_prometheus_namespace: openshift-metrics # defaults hosts for routes openshift_prometheus_hostname: prometheus-{{openshift_prometheus_namespace}}.{{openshift_master_default_subdomain}} openshift_prometheus_alerts_hostname: alerts-{{openshift_prometheus_namespace}}.{{openshift_master_default_subdomain}} +openshift_prometheus_alertmanager_hostname: alertmanager-{{openshift_prometheus_namespace}}.{{openshift_master_default_subdomain}} + openshift_prometheus_node_selector: {"region":"infra"} +openshift_prometheus_service_port: 443 +openshift_prometheus_service_targetport: 8443 +openshift_prometheus_service_name: prometheus +openshift_prometheus_alerts_service_targetport: 9443 +openshift_prometheus_alerts_service_name: alerts +openshift_prometheus_alertmanager_service_targetport: 10443 +openshift_prometheus_alertmanager_service_name: alertmanager +openshift_prometheus_serviceaccount_annotations: [] +l_openshift_prometheus_serviceaccount_annotations: + - serviceaccounts.openshift.io/oauth-redirectreference.prom='{"kind":"OAuthRedirectReference","apiVersion":"v1","reference":{"kind":"Route","name":"prometheus"}}' + - serviceaccounts.openshift.io/oauth-redirectreference.alerts='{"kind":"OAuthRedirectReference","apiVersion":"v1","reference":{"kind":"Route","name":"alerts"}}' + - serviceaccounts.openshift.io/oauth-redirectreference.alertmanager='{"kind":"OAuthRedirectReference","apiVersion":"v1","reference":{"kind":"Route","name":"alertmanager"}}' + # additional prometheus rules file openshift_prometheus_additional_rules_file: null diff --git a/roles/openshift_prometheus/tasks/facts.yaml b/roles/openshift_prometheus/tasks/facts.yaml new file mode 100644 index 000000000..214089732 --- /dev/null +++ b/roles/openshift_prometheus/tasks/facts.yaml @@ -0,0 +1,10 @@ +--- +# The kubernetes version impacts the prometheus scraping endpoint +# so gathering it before constructing the configmap +- name: get oc version + oc_version: + register: oc_version + +- set_fact: + kubernetes_version: "{{ oc_version.results.kubernetes_short | float }}" + openshift_prometheus_serviceaccount_annotations: "{{ l_openshift_prometheus_serviceaccount_annotations + openshift_prometheus_serviceaccount_annotations|list }}" diff --git a/roles/openshift_prometheus/tasks/install_prometheus.yaml b/roles/openshift_prometheus/tasks/install_prometheus.yaml index 749df5152..0b565502f 100644 --- a/roles/openshift_prometheus/tasks/install_prometheus.yaml +++ b/roles/openshift_prometheus/tasks/install_prometheus.yaml @@ -1,4 +1,6 @@ --- +# set facts +- include_tasks: facts.yaml # namespace - name: Add prometheus project @@ -9,7 +11,7 @@ description: Prometheus # secrets -- name: Set alert and prometheus secrets +- name: Set alert, alertmanager and prometheus secrets oc_secret: state: present name: "{{ item }}-proxy" @@ -20,30 +22,24 @@ with_items: - prometheus - alerts + - alertmanager # serviceaccount - name: create prometheus serviceaccount oc_serviceaccount: state: present - name: prometheus + name: "{{ openshift_prometheus_service_name }}" namespace: "{{ openshift_prometheus_namespace }}" - # TODO add annotations when supproted - # annotations: - # serviceaccounts.openshift.io/oauth-redirectreference.prom: '{"kind":"OAuthRedirectReference","apiVersion":"v1","reference":{"kind":"Route","name":"prometheus"}}' - # serviceaccounts.openshift.io/oauth-redirectreference.alerts: '{"kind":"OAuthRedirectReference","apiVersion":"v1","reference":{"kind":"Route","name":"alerts"}}' - - secrets: - - prometheus-secrets changed_when: no + # TODO remove this when annotations are supported by oc_serviceaccount - name: annotate serviceaccount command: > {{ openshift_client_binary }} annotate --overwrite -n {{ openshift_prometheus_namespace }} - serviceaccount prometheus - serviceaccounts.openshift.io/oauth-redirectreference.prom='{"kind":"OAuthRedirectReference","apiVersion":"v1","reference":{"kind":"Route","name":"prometheus"}}' - serviceaccounts.openshift.io/oauth-redirectreference.alerts='{"kind":"OAuthRedirectReference","apiVersion":"v1","reference":{"kind":"Route","name":"alerts"}}' - + serviceaccount {{ openshift_prometheus_service_name }} {{ item }} + with_items: + "{{ openshift_prometheus_serviceaccount_annotations }}" # create clusterrolebinding for prometheus serviceaccount - name: Set cluster-reader permissions for prometheus @@ -52,63 +48,61 @@ namespace: "{{ openshift_prometheus_namespace }}" resource_kind: cluster-role resource_name: cluster-reader - user: "system:serviceaccount:{{ openshift_prometheus_namespace }}:prometheus" + user: "system:serviceaccount:{{ openshift_prometheus_namespace }}:{{ openshift_prometheus_service_name }}" + -# create prometheus and alerts services -# TODO join into 1 task with loop -- name: Create prometheus service +- name: create services for prometheus oc_service: - state: present - name: "{{ item.name }}" + name: "{{ openshift_prometheus_service_name }}" namespace: "{{ openshift_prometheus_namespace }}" - selector: - app: prometheus labels: - name: "{{ item.name }}" - # TODO add annotations when supported - # annotations: - # service.alpha.openshift.io/serving-cert-secret-name: "{{item.name}}-tls" + name: prometheus + annotations: + oprometheus.io/scrape: 'true' + oprometheus.io/scheme: https + service.alpha.openshift.io/serving-cert-secret-name: prometheus-tls ports: - - port: 443 - targetPort: 8443 - with_items: - - name: prometheus + - name: prometheus + port: "{{ openshift_prometheus_service_port }}" + targetPort: "{{ openshift_prometheus_service_targetport }}" + protocol: TCP + selector: + app: prometheus -- name: Create alerts service +- name: create services for alert buffer oc_service: - state: present - name: "{{ item.name }}" + name: "{{ openshift_prometheus_alerts_service_name }}" namespace: "{{ openshift_prometheus_namespace }}" + labels: + name: prometheus + annotations: + service.alpha.openshift.io/serving-cert-secret-name: alerts-tls + ports: + - name: prometheus + port: "{{ openshift_prometheus_service_port }}" + targetPort: "{{ openshift_prometheus_alerts_service_targetport }}" + protocol: TCP selector: app: prometheus + +- name: create services for alertmanager + oc_service: + name: "{{ openshift_prometheus_alertmanager_service_name }}" + namespace: "{{ openshift_prometheus_namespace }}" labels: - name: "{{ item.name }}" - # TODO add annotations when supported - # annotations: - # service.alpha.openshift.io/serving-cert-secret-name: "{{item.name}}-tls" + name: prometheus + annotations: + service.alpha.openshift.io/serving-cert-secret-name: alertmanager-tls ports: - - port: 443 - targetPort: 9443 - with_items: - - name: alerts - - -# Annotate services with secret name -# TODO remove this when annotations are supported by oc_service -- name: annotate prometheus service - command: > - {{ openshift_client_binary }} annotate --overwrite -n {{ openshift_prometheus_namespace }} - service prometheus - prometheus.io/scrape='true' - prometheus.io/scheme=https - service.alpha.openshift.io/serving-cert-secret-name=prometheus-tls - -- name: annotate alerts service - command: > - {{ openshift_client_binary }} annotate --overwrite -n {{ openshift_prometheus_namespace }} - service alerts 'service.alpha.openshift.io/serving-cert-secret-name=prometheus-alerts-tls' + - name: prometheus + port: "{{ openshift_prometheus_service_port }}" + targetPort: "{{ openshift_prometheus_alertmanager_service_targetport }}" + protocol: TCP + selector: + app: prometheus # create prometheus and alerts routes +# TODO: oc_route module should support insecureEdgeTerminationPolicy: Redirect - name: create prometheus and alerts routes oc_route: state: present @@ -122,6 +116,8 @@ host: "{{ openshift_prometheus_hostname }}" - name: alerts host: "{{ openshift_prometheus_alerts_hostname }}" + - name: alertmanager + host: "{{ openshift_prometheus_alertmanager_hostname }}" # Storage - name: create prometheus pvc @@ -169,15 +165,6 @@ path: "{{ tempdir }}/prometheus.additional.rules" register: additional_rules_stat -# The kubernetes version impacts the prometheus scraping endpoint -# so gathering it before constructing the configmap -- name: get oc version - oc_version: - register: oc_version - -- set_fact: - kubernetes_version: "{{ oc_version.results.kubernetes_short | float }}" - - template: src: prometheus.yml.j2 dest: "{{ tempdir }}/prometheus.yml" @@ -219,7 +206,7 @@ - name: Set alertmanager configmap oc_configmap: state: present - name: "prometheus-alerts" + name: "alertmanager" namespace: "{{ openshift_prometheus_namespace }}" from_file: alertmanager.yml: "{{ tempdir }}/alertmanager.yml" diff --git a/roles/openshift_prometheus/tasks/main.yaml b/roles/openshift_prometheus/tasks/main.yaml index b859eb111..66d65a3f2 100644 --- a/roles/openshift_prometheus/tasks/main.yaml +++ b/roles/openshift_prometheus/tasks/main.yaml @@ -16,9 +16,11 @@ - name: Create templates subdirectory file: state: directory - path: "{{ tempdir }}/templates" + path: "{{ tempdir }}/{{ item }}" mode: 0755 changed_when: False + with_items: + - templates - include_tasks: install_prometheus.yaml when: openshift_prometheus_state == 'present' diff --git a/roles/openshift_prometheus/tasks/uninstall.yaml b/roles/openshift_prometheus/tasks/uninstall.yaml new file mode 100644 index 000000000..d746402db --- /dev/null +++ b/roles/openshift_prometheus/tasks/uninstall.yaml @@ -0,0 +1,7 @@ +--- + +# remove namespace - This will delete all the objects inside the namespace +- name: Remove prometheus project + oc_project: + state: absent + name: "{{ openshift_prometheus_namespace }}" diff --git a/roles/openshift_prometheus/tasks/uninstall_prometheus.yaml b/roles/openshift_prometheus/tasks/uninstall_prometheus.yaml deleted file mode 100644 index d746402db..000000000 --- a/roles/openshift_prometheus/tasks/uninstall_prometheus.yaml +++ /dev/null @@ -1,7 +0,0 @@ ---- - -# remove namespace - This will delete all the objects inside the namespace -- name: Remove prometheus project - oc_project: - state: absent - name: "{{ openshift_prometheus_namespace }}" diff --git a/roles/openshift_prometheus/templates/prometheus.j2 b/roles/openshift_prometheus/templates/prometheus.j2 index d780550b8..c0abd483b 100644 --- a/roles/openshift_prometheus/templates/prometheus.j2 +++ b/roles/openshift_prometheus/templates/prometheus.j2 @@ -19,7 +19,7 @@ spec: labels: app: prometheus spec: - serviceAccountName: prometheus + serviceAccountName: "{{ openshift_prometheus_service_name }}" {% if openshift_prometheus_node_selector is iterable and openshift_prometheus_node_selector | length > 0 %} nodeSelector: {% for key, value in openshift_prometheus_node_selector.items() %} @@ -47,15 +47,15 @@ spec: cpu: "{{ openshift_prometheus_oauth_proxy_cpu_limit }}" {% endif %} ports: - - containerPort: 8443 + - containerPort: {{ openshift_prometheus_service_targetport }} name: web args: - -provider=openshift - - -https-address=:8443 + - -https-address=:{{ openshift_prometheus_service_targetport }} - -http-address= - -email-domain=* - -upstream=http://localhost:9090 - - -client-id=system:serviceaccount:{{ namespace }}:prometheus + - -client-id=system:serviceaccount:{{ namespace }}:{{ openshift_prometheus_service_name }} - '-openshift-sar={"resource": "namespaces", "verb": "get", "resourceName": "{{ namespace }}", "namespace": "{{ namespace }}"}' - '-openshift-delegate-urls={"/": {"resource": "namespaces", "verb": "get", "resourceName": "{{ namespace }}", "namespace": "{{ namespace }}"}}' - -tls-cert=/etc/tls/private/tls.crt @@ -67,9 +67,9 @@ spec: - -skip-auth-regex=^/metrics volumeMounts: - mountPath: /etc/tls/private - name: prometheus-tls + name: prometheus-tls-secret - mountPath: /etc/proxy/secrets - name: prometheus-secrets + name: prometheus-proxy-secret - mountPath: /prometheus name: prometheus-data @@ -104,7 +104,7 @@ spec: - mountPath: /prometheus name: prometheus-data - # Deploy alertmanager behind prometheus-alert-buffer behind an oauth proxy + # Deploy alert-buffer behind oauth alerts-proxy - name: alerts-proxy image: "{{ l_openshift_prometheus_proxy_image_prefix }}oauth-proxy:{{ l_openshift_prometheus_proxy_image_version }}" imagePullPolicy: IfNotPresent @@ -124,15 +124,15 @@ spec: cpu: "{{ openshift_prometheus_oauth_proxy_cpu_limit }}" {% endif %} ports: - - containerPort: 9443 + - containerPort: {{ openshift_prometheus_alerts_service_targetport }} name: web args: - -provider=openshift - - -https-address=:9443 + - -https-address=:{{ openshift_prometheus_alerts_service_targetport }} - -http-address= - -email-domain=* - -upstream=http://localhost:9099 - - -client-id=system:serviceaccount:{{ namespace }}:prometheus + - -client-id=system:serviceaccount:{{ namespace }}:{{ openshift_prometheus_service_name }} - '-openshift-sar={"resource": "namespaces", "verb": "get", "resourceName": "{{ namespace }}", "namespace": "{{ namespace }}"}' - '-openshift-delegate-urls={"/": {"resource": "namespaces", "verb": "get", "resourceName": "{{ namespace }}", "namespace": "{{ namespace }}"}}' - -tls-cert=/etc/tls/private/tls.crt @@ -143,9 +143,9 @@ spec: - -openshift-ca=/var/run/secrets/kubernetes.io/serviceaccount/ca.crt volumeMounts: - mountPath: /etc/tls/private - name: alerts-tls + name: alerts-tls-secret - mountPath: /etc/proxy/secrets - name: alerts-secrets + name: alerts-proxy-secret - name: alert-buffer args: @@ -169,11 +169,54 @@ spec: {% endif %} volumeMounts: - mountPath: /alert-buffer - name: alert-buffer-data + name: alerts-data ports: - containerPort: 9099 name: alert-buf + # Deploy alertmanager behind oauth alertmanager-proxy + - name: alertmanager-proxy + image: "{{ l_openshift_prometheus_proxy_image_prefix }}oauth-proxy:{{ l_openshift_prometheus_proxy_image_version }}" + imagePullPolicy: IfNotPresent + requests: +{% if openshift_prometheus_oauth_proxy_memory_requests is defined and openshift_prometheus_oauth_proxy_memory_requests is not none %} + memory: "{{ openshift_prometheus_oauth_proxy_memory_requests }}" +{% endif %} +{% if openshift_prometheus_oauth_proxy_cpu_requests is defined and openshift_prometheus_oauth_proxy_cpu_requests is not none %} + cpu: "{{ openshift_prometheus_oauth_proxy_cpu_requests }}" +{% endif %} + limits: +{% if openshift_prometheus_oauth_proxy_memory_limit is defined and openshift_prometheus_oauth_proxy_memory_limit is not none %} + memory: "{{ openshift_prometheus_oauth_proxy_memory_limit }}" +{% endif %} +{% if openshift_prometheus_oauth_proxy_cpu_limit is defined and openshift_prometheus_oauth_proxy_cpu_limit is not none %} + cpu: "{{ openshift_prometheus_oauth_proxy_cpu_limit }}" +{% endif %} + ports: + - containerPort: {{ openshift_prometheus_alertmanager_service_targetport }} + name: web + args: + - -provider=openshift + - -https-address=:{{ openshift_prometheus_alertmanager_service_targetport }} + - -http-address= + - -email-domain=* + - -upstream=http://localhost:9093 + - -client-id=system:serviceaccount:{{ namespace }}:{{ openshift_prometheus_service_name }} + - -openshift-ca=/etc/pki/tls/cert.pem + - -openshift-ca=/var/run/secrets/kubernetes.io/serviceaccount/ca.crt + - '-openshift-sar={"resource": "namespaces", "verb": "get", "resourceName": "{{ namespace }}", "namespace": "{{ namespace }}"}' + - '-openshift-delegate-urls={"/": {"resource": "namespaces", "verb": "get", "resourceName": "{{ namespace }}", "namespace": "{{ namespace }}"}}' + - -tls-cert=/etc/tls/private/tls.crt + - -tls-key=/etc/tls/private/tls.key + - -client-secret-file=/var/run/secrets/kubernetes.io/serviceaccount/token + - -cookie-secret-file=/etc/proxy/secrets/session_secret + - -skip-auth-regex=^/metrics + volumeMounts: + - mountPath: /etc/tls/private + name: alertmanager-tls-secret + - mountPath: /etc/proxy/secrets + name: alertmanager-proxy-secret + - name: alertmanager args: - -config.file=/etc/alertmanager/alertmanager.yml @@ -205,14 +248,15 @@ spec: restartPolicy: Always volumes: + - name: prometheus-config configMap: defaultMode: 420 name: prometheus - - name: prometheus-secrets + - name: prometheus-proxy-secret secret: secretName: prometheus-proxy - - name: prometheus-tls + - name: prometheus-tls-secret secret: secretName: prometheus-tls - name: prometheus-data @@ -225,13 +269,19 @@ spec: - name: alertmanager-config configMap: defaultMode: 420 - name: prometheus-alerts - - name: alerts-secrets + name: alertmanager + - name: alertmanager-proxy-secret secret: - secretName: alerts-proxy - - name: alerts-tls + secretName: alertmanager-proxy + - name: alertmanager-tls-secret + secret: + secretName: alertmanager-tls + - name: alerts-tls-secret secret: - secretName: prometheus-alerts-tls + secretName: alerts-tls + - name: alerts-proxy-secret + secret: + secretName: alerts-proxy - name: alertmanager-data {% if openshift_prometheus_alertmanager_storage_type == 'pvc' %} persistentVolumeClaim: @@ -239,7 +289,7 @@ spec: {% else %} emptydir: {} {% endif %} - - name: alert-buffer-data + - name: alerts-data {% if openshift_prometheus_alertbuffer_storage_type == 'pvc' %} persistentVolumeClaim: claimName: {{ openshift_prometheus_alertbuffer_pvc_name }} diff --git a/roles/openshift_prometheus/templates/prometheus.yml.j2 b/roles/openshift_prometheus/templates/prometheus.yml.j2 index 63430f834..005c2c564 100644 --- a/roles/openshift_prometheus/templates/prometheus.yml.j2 +++ b/roles/openshift_prometheus/templates/prometheus.yml.j2 @@ -1,10 +1,5 @@ rule_files: - - 'prometheus.rules' -{% if openshift_prometheus_additional_rules_file is defined and openshift_prometheus_additional_rules_file is not none %} - - 'prometheus.additional.rules' -{% endif %} - - + - '*.rules' # A scrape configuration for running Prometheus on a Kubernetes cluster. # This uses separate scrape configs for cluster components (i.e. API server, node) @@ -39,31 +34,11 @@ scrape_configs: action: keep regex: default;kubernetes;https -# Scrape config for nodes. -# -# Each node exposes a /metrics endpoint that contains operational metrics for -# the Kubelet and other components. -- job_name: 'kubernetes-nodes' - - scheme: https - tls_config: - ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt - bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token - - kubernetes_sd_configs: - - role: node - - relabel_configs: - - action: labelmap - regex: __meta_kubernetes_node_label_(.+) - # Scrape config for controllers. # # Each master node exposes a /metrics endpoint on :8444 that contains operational metrics for # the controllers. # -# TODO: move this to a pure endpoints based metrics gatherer when controllers are exposed via -# endpoints. - job_name: 'kubernetes-controllers' scheme: https @@ -87,6 +62,27 @@ scrape_configs: regex: (.+)(?::\d+) replacement: $1:8444 +# Scrape config for nodes. +# +# Each node exposes a /metrics endpoint that contains operational metrics for +# the Kubelet and other components. +- job_name: 'kubernetes-nodes' + scheme: https + tls_config: + ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt + bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token + kubernetes_sd_configs: + - role: node + # Drop a very high cardinality metric that is incorrect in 3.7. It will be + # fixed in 3.9. + metric_relabel_configs: + - source_labels: [__name__] + action: drop + regex: 'openshift_sdn_pod_(setup|teardown)_latency(.*)' + relabel_configs: + - action: labelmap + regex: __meta_kubernetes_node_label_(.+) + # Scrape config for cAdvisor. # # Beginning in Kube 1.7, each node exposes a /metrics/cadvisor endpoint that @@ -107,6 +103,14 @@ scrape_configs: kubernetes_sd_configs: - role: node + # Exclude a set of high cardinality metrics that can contribute to significant + # memory use in large clusters. These can be selectively enabled as necessary + # for medium or small clusters. + metric_relabel_configs: + - source_labels: [__name__] + action: drop + regex: 'container_(cpu_user_seconds_total|cpu_cfs_periods_total|memory_usage_bytes|memory_swap|memory_working_set_bytes|memory_cache|last_seen|fs_(read_seconds_total|write_seconds_total|sector_(.*)|io_(.*)|reads_merged_total|writes_merged_total)|tasks_state|memory_failcnt|memory_failures_total|spec_memory_swap_limit_bytes|fs_(.*)_bytes_total|spec_(.*))' + relabel_configs: - action: labelmap regex: __meta_kubernetes_node_label_(.+) @@ -133,38 +137,101 @@ scrape_configs: - role: endpoints relabel_configs: - - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scrape] - action: keep - regex: true - - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scheme] - action: replace - target_label: __scheme__ - regex: (https?) - - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_path] + # only scrape infrastructure components + - source_labels: [__meta_kubernetes_namespace] + action: keep + regex: 'default|logging|metrics|kube-.+|openshift|openshift-.+' + # drop infrastructure components managed by other scrape targets + - source_labels: [__meta_kubernetes_service_name] + action: drop + regex: 'prometheus-node-exporter' + # only those that have requested scraping + - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scrape] + action: keep + regex: true + - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scheme] + action: replace + target_label: __scheme__ + regex: (https?) + - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_path] + action: replace + target_label: __metrics_path__ + regex: (.+) + - source_labels: [__address__, __meta_kubernetes_service_annotation_prometheus_io_port] + action: replace + target_label: __address__ + regex: (.+)(?::\d+);(\d+) + replacement: $1:$2 + - action: labelmap + regex: __meta_kubernetes_service_label_(.+) + - source_labels: [__meta_kubernetes_namespace] + action: replace + target_label: kubernetes_namespace + - source_labels: [__meta_kubernetes_service_name] + action: replace + target_label: kubernetes_name + +# Scrape config for node-exporter, which is expected to be running on port 9100. +- job_name: 'kubernetes-nodes-exporter' + + tls_config: + ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt + + kubernetes_sd_configs: + - role: node + + metric_relabel_configs: + - source_labels: [__name__] + action: drop + regex: 'node_cpu|node_(disk|scrape_collector)_.+' + # preserve a subset of the network, netstat, vmstat, and filesystem series + - source_labels: [__name__] action: replace - target_label: __metrics_path__ - regex: (.+) - - source_labels: [__address__, __meta_kubernetes_service_annotation_prometheus_io_port] + regex: '(node_(netstat_Ip_.+|vmstat_(nr|thp)_.+|filesystem_(free|size|device_error)|network_(transmit|receive)_(drop|errs)))' + target_label: __name__ + replacement: renamed_$1 + - source_labels: [__name__] + action: drop + regex: 'node_(netstat|vmstat|filesystem|network)_.+' + - source_labels: [__name__] action: replace + regex: 'renamed_(.+)' + target_label: __name__ + replacement: $1 + # drop any partial expensive series + - source_labels: [__name__, device] + action: drop + regex: 'node_network_.+;veth.+' + - source_labels: [__name__, mountpoint] + action: drop + regex: 'node_filesystem_(free|size|device_error);([^/].*|/.+)' + + relabel_configs: + - source_labels: [__address__] + regex: '(.*):10250' + replacement: '${1}:9100' target_label: __address__ - regex: (.+)(?::\d+);(\d+) - replacement: $1:$2 - - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_username] - action: replace - target_label: __basic_auth_username__ - regex: (.+) - - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_password] - action: replace - target_label: __basic_auth_password__ - regex: (.+) + - source_labels: [__meta_kubernetes_node_label_kubernetes_io_hostname] + target_label: __instance__ - action: labelmap - regex: __meta_kubernetes_service_label_(.+) - - source_labels: [__meta_kubernetes_namespace] - action: replace - target_label: kubernetes_namespace - - source_labels: [__meta_kubernetes_service_name] - action: replace - target_label: kubernetes_name + regex: __meta_kubernetes_node_label_(.+) + +# Scrape config for the template service broker +- job_name: 'openshift-template-service-broker' + scheme: https + tls_config: + ca_file: /var/run/secrets/kubernetes.io/serviceaccount/service-ca.crt + server_name: apiserver.openshift-template-service-broker.svc + bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token + + kubernetes_sd_configs: + - role: endpoints + + relabel_configs: + - source_labels: [__meta_kubernetes_namespace, __meta_kubernetes_service_name, __meta_kubernetes_endpoint_port_name] + action: keep + regex: openshift-template-service-broker;apiserver;https + alerting: alertmanagers: -- cgit v1.2.3