diff options
author | OpenShift Merge Robot <openshift-merge-robot@users.noreply.github.com> | 2018-01-29 03:25:44 -0800 |
---|---|---|
committer | GitHub <noreply@github.com> | 2018-01-29 03:25:44 -0800 |
commit | 2ec70a36f50f670887f2f257d348db6332de5d26 (patch) | |
tree | 4e9eebca6c1979b57275d0a60ac30cb6dfcb1457 | |
parent | 312692222f11c19df680867b4e497947e343d017 (diff) | |
parent | 3de29f6d5a3017b57c553c5e2fb63a50994df840 (diff) | |
download | openshift-2ec70a36f50f670887f2f257d348db6332de5d26.tar.gz openshift-2ec70a36f50f670887f2f257d348db6332de5d26.tar.bz2 openshift-2ec70a36f50f670887f2f257d348db6332de5d26.tar.xz openshift-2ec70a36f50f670887f2f257d348db6332de5d26.zip |
Merge pull request #6811 from mjudeikis/prometheus-new-template
Automatic merge from submit-queue.
Prometheus new template rebase
Updating Prometheus for new templates/example.
1. New scraping rules, fixes
2. exposed alert manager
3. clean ansible
4. add a custom annotation for routes (in the example when AVI router in use we need to be able to add custom annotations)
5. Externalise some of the configs
Still work in progress...
FIY: @zgalor
-rw-r--r-- | playbooks/openshift-prometheus/private/uninstall.yml | 8 | ||||
-rw-r--r-- | playbooks/openshift-prometheus/uninstall.yml | 2 | ||||
-rw-r--r-- | roles/openshift_prometheus/defaults/main.yaml | 15 | ||||
-rw-r--r-- | roles/openshift_prometheus/tasks/facts.yaml | 10 | ||||
-rw-r--r-- | roles/openshift_prometheus/tasks/install_prometheus.yaml | 119 | ||||
-rw-r--r-- | roles/openshift_prometheus/tasks/main.yaml | 4 | ||||
-rw-r--r-- | roles/openshift_prometheus/tasks/uninstall.yaml (renamed from roles/openshift_prometheus/tasks/uninstall_prometheus.yaml) | 0 | ||||
-rw-r--r-- | roles/openshift_prometheus/templates/prometheus.j2 | 92 | ||||
-rw-r--r-- | roles/openshift_prometheus/templates/prometheus.yml.j2 | 175 |
9 files changed, 283 insertions, 142 deletions
diff --git a/playbooks/openshift-prometheus/private/uninstall.yml b/playbooks/openshift-prometheus/private/uninstall.yml new file mode 100644 index 000000000..2df39c2a8 --- /dev/null +++ b/playbooks/openshift-prometheus/private/uninstall.yml @@ -0,0 +1,8 @@ +--- +- name: Uninstall Prometheus + hosts: masters[0] + tasks: + - name: Run the Prometheus Uninstall Role Tasks + include_role: + name: openshift_prometheus + tasks_from: uninstall diff --git a/playbooks/openshift-prometheus/uninstall.yml b/playbooks/openshift-prometheus/uninstall.yml new file mode 100644 index 000000000..c92ade786 --- /dev/null +++ b/playbooks/openshift-prometheus/uninstall.yml @@ -0,0 +1,2 @@ +--- +- import_playbook: private/uninstall.yml diff --git a/roles/openshift_prometheus/defaults/main.yaml b/roles/openshift_prometheus/defaults/main.yaml index 1b21c4739..37a05f3f0 100644 --- a/roles/openshift_prometheus/defaults/main.yaml +++ b/roles/openshift_prometheus/defaults/main.yaml @@ -7,9 +7,24 @@ openshift_prometheus_namespace: openshift-metrics # defaults hosts for routes openshift_prometheus_hostname: prometheus-{{openshift_prometheus_namespace}}.{{openshift_master_default_subdomain}} openshift_prometheus_alerts_hostname: alerts-{{openshift_prometheus_namespace}}.{{openshift_master_default_subdomain}} +openshift_prometheus_alertmanager_hostname: alertmanager-{{openshift_prometheus_namespace}}.{{openshift_master_default_subdomain}} + openshift_prometheus_node_selector: {"region":"infra"} +openshift_prometheus_service_port: 443 +openshift_prometheus_service_targetport: 8443 +openshift_prometheus_service_name: prometheus +openshift_prometheus_alerts_service_targetport: 9443 +openshift_prometheus_alerts_service_name: alerts +openshift_prometheus_alertmanager_service_targetport: 10443 +openshift_prometheus_alertmanager_service_name: alertmanager +openshift_prometheus_serviceaccount_annotations: [] +l_openshift_prometheus_serviceaccount_annotations: + - serviceaccounts.openshift.io/oauth-redirectreference.prom='{"kind":"OAuthRedirectReference","apiVersion":"v1","reference":{"kind":"Route","name":"prometheus"}}' + - serviceaccounts.openshift.io/oauth-redirectreference.alerts='{"kind":"OAuthRedirectReference","apiVersion":"v1","reference":{"kind":"Route","name":"alerts"}}' + - serviceaccounts.openshift.io/oauth-redirectreference.alertmanager='{"kind":"OAuthRedirectReference","apiVersion":"v1","reference":{"kind":"Route","name":"alertmanager"}}' + # additional prometheus rules file openshift_prometheus_additional_rules_file: null diff --git a/roles/openshift_prometheus/tasks/facts.yaml b/roles/openshift_prometheus/tasks/facts.yaml new file mode 100644 index 000000000..214089732 --- /dev/null +++ b/roles/openshift_prometheus/tasks/facts.yaml @@ -0,0 +1,10 @@ +--- +# The kubernetes version impacts the prometheus scraping endpoint +# so gathering it before constructing the configmap +- name: get oc version + oc_version: + register: oc_version + +- set_fact: + kubernetes_version: "{{ oc_version.results.kubernetes_short | float }}" + openshift_prometheus_serviceaccount_annotations: "{{ l_openshift_prometheus_serviceaccount_annotations + openshift_prometheus_serviceaccount_annotations|list }}" diff --git a/roles/openshift_prometheus/tasks/install_prometheus.yaml b/roles/openshift_prometheus/tasks/install_prometheus.yaml index 749df5152..0b565502f 100644 --- a/roles/openshift_prometheus/tasks/install_prometheus.yaml +++ b/roles/openshift_prometheus/tasks/install_prometheus.yaml @@ -1,4 +1,6 @@ --- +# set facts +- include_tasks: facts.yaml # namespace - name: Add prometheus project @@ -9,7 +11,7 @@ description: Prometheus # secrets -- name: Set alert and prometheus secrets +- name: Set alert, alertmanager and prometheus secrets oc_secret: state: present name: "{{ item }}-proxy" @@ -20,30 +22,24 @@ with_items: - prometheus - alerts + - alertmanager # serviceaccount - name: create prometheus serviceaccount oc_serviceaccount: state: present - name: prometheus + name: "{{ openshift_prometheus_service_name }}" namespace: "{{ openshift_prometheus_namespace }}" - # TODO add annotations when supproted - # annotations: - # serviceaccounts.openshift.io/oauth-redirectreference.prom: '{"kind":"OAuthRedirectReference","apiVersion":"v1","reference":{"kind":"Route","name":"prometheus"}}' - # serviceaccounts.openshift.io/oauth-redirectreference.alerts: '{"kind":"OAuthRedirectReference","apiVersion":"v1","reference":{"kind":"Route","name":"alerts"}}' - - secrets: - - prometheus-secrets changed_when: no + # TODO remove this when annotations are supported by oc_serviceaccount - name: annotate serviceaccount command: > {{ openshift_client_binary }} annotate --overwrite -n {{ openshift_prometheus_namespace }} - serviceaccount prometheus - serviceaccounts.openshift.io/oauth-redirectreference.prom='{"kind":"OAuthRedirectReference","apiVersion":"v1","reference":{"kind":"Route","name":"prometheus"}}' - serviceaccounts.openshift.io/oauth-redirectreference.alerts='{"kind":"OAuthRedirectReference","apiVersion":"v1","reference":{"kind":"Route","name":"alerts"}}' - + serviceaccount {{ openshift_prometheus_service_name }} {{ item }} + with_items: + "{{ openshift_prometheus_serviceaccount_annotations }}" # create clusterrolebinding for prometheus serviceaccount - name: Set cluster-reader permissions for prometheus @@ -52,63 +48,61 @@ namespace: "{{ openshift_prometheus_namespace }}" resource_kind: cluster-role resource_name: cluster-reader - user: "system:serviceaccount:{{ openshift_prometheus_namespace }}:prometheus" + user: "system:serviceaccount:{{ openshift_prometheus_namespace }}:{{ openshift_prometheus_service_name }}" + -# create prometheus and alerts services -# TODO join into 1 task with loop -- name: Create prometheus service +- name: create services for prometheus oc_service: - state: present - name: "{{ item.name }}" + name: "{{ openshift_prometheus_service_name }}" namespace: "{{ openshift_prometheus_namespace }}" - selector: - app: prometheus labels: - name: "{{ item.name }}" - # TODO add annotations when supported - # annotations: - # service.alpha.openshift.io/serving-cert-secret-name: "{{item.name}}-tls" + name: prometheus + annotations: + oprometheus.io/scrape: 'true' + oprometheus.io/scheme: https + service.alpha.openshift.io/serving-cert-secret-name: prometheus-tls ports: - - port: 443 - targetPort: 8443 - with_items: - - name: prometheus + - name: prometheus + port: "{{ openshift_prometheus_service_port }}" + targetPort: "{{ openshift_prometheus_service_targetport }}" + protocol: TCP + selector: + app: prometheus -- name: Create alerts service +- name: create services for alert buffer oc_service: - state: present - name: "{{ item.name }}" + name: "{{ openshift_prometheus_alerts_service_name }}" namespace: "{{ openshift_prometheus_namespace }}" + labels: + name: prometheus + annotations: + service.alpha.openshift.io/serving-cert-secret-name: alerts-tls + ports: + - name: prometheus + port: "{{ openshift_prometheus_service_port }}" + targetPort: "{{ openshift_prometheus_alerts_service_targetport }}" + protocol: TCP selector: app: prometheus + +- name: create services for alertmanager + oc_service: + name: "{{ openshift_prometheus_alertmanager_service_name }}" + namespace: "{{ openshift_prometheus_namespace }}" labels: - name: "{{ item.name }}" - # TODO add annotations when supported - # annotations: - # service.alpha.openshift.io/serving-cert-secret-name: "{{item.name}}-tls" + name: prometheus + annotations: + service.alpha.openshift.io/serving-cert-secret-name: alertmanager-tls ports: - - port: 443 - targetPort: 9443 - with_items: - - name: alerts - - -# Annotate services with secret name -# TODO remove this when annotations are supported by oc_service -- name: annotate prometheus service - command: > - {{ openshift_client_binary }} annotate --overwrite -n {{ openshift_prometheus_namespace }} - service prometheus - prometheus.io/scrape='true' - prometheus.io/scheme=https - service.alpha.openshift.io/serving-cert-secret-name=prometheus-tls - -- name: annotate alerts service - command: > - {{ openshift_client_binary }} annotate --overwrite -n {{ openshift_prometheus_namespace }} - service alerts 'service.alpha.openshift.io/serving-cert-secret-name=prometheus-alerts-tls' + - name: prometheus + port: "{{ openshift_prometheus_service_port }}" + targetPort: "{{ openshift_prometheus_alertmanager_service_targetport }}" + protocol: TCP + selector: + app: prometheus # create prometheus and alerts routes +# TODO: oc_route module should support insecureEdgeTerminationPolicy: Redirect - name: create prometheus and alerts routes oc_route: state: present @@ -122,6 +116,8 @@ host: "{{ openshift_prometheus_hostname }}" - name: alerts host: "{{ openshift_prometheus_alerts_hostname }}" + - name: alertmanager + host: "{{ openshift_prometheus_alertmanager_hostname }}" # Storage - name: create prometheus pvc @@ -169,15 +165,6 @@ path: "{{ tempdir }}/prometheus.additional.rules" register: additional_rules_stat -# The kubernetes version impacts the prometheus scraping endpoint -# so gathering it before constructing the configmap -- name: get oc version - oc_version: - register: oc_version - -- set_fact: - kubernetes_version: "{{ oc_version.results.kubernetes_short | float }}" - - template: src: prometheus.yml.j2 dest: "{{ tempdir }}/prometheus.yml" @@ -219,7 +206,7 @@ - name: Set alertmanager configmap oc_configmap: state: present - name: "prometheus-alerts" + name: "alertmanager" namespace: "{{ openshift_prometheus_namespace }}" from_file: alertmanager.yml: "{{ tempdir }}/alertmanager.yml" diff --git a/roles/openshift_prometheus/tasks/main.yaml b/roles/openshift_prometheus/tasks/main.yaml index b859eb111..66d65a3f2 100644 --- a/roles/openshift_prometheus/tasks/main.yaml +++ b/roles/openshift_prometheus/tasks/main.yaml @@ -16,9 +16,11 @@ - name: Create templates subdirectory file: state: directory - path: "{{ tempdir }}/templates" + path: "{{ tempdir }}/{{ item }}" mode: 0755 changed_when: False + with_items: + - templates - include_tasks: install_prometheus.yaml when: openshift_prometheus_state == 'present' diff --git a/roles/openshift_prometheus/tasks/uninstall_prometheus.yaml b/roles/openshift_prometheus/tasks/uninstall.yaml index d746402db..d746402db 100644 --- a/roles/openshift_prometheus/tasks/uninstall_prometheus.yaml +++ b/roles/openshift_prometheus/tasks/uninstall.yaml diff --git a/roles/openshift_prometheus/templates/prometheus.j2 b/roles/openshift_prometheus/templates/prometheus.j2 index d780550b8..c0abd483b 100644 --- a/roles/openshift_prometheus/templates/prometheus.j2 +++ b/roles/openshift_prometheus/templates/prometheus.j2 @@ -19,7 +19,7 @@ spec: labels: app: prometheus spec: - serviceAccountName: prometheus + serviceAccountName: "{{ openshift_prometheus_service_name }}" {% if openshift_prometheus_node_selector is iterable and openshift_prometheus_node_selector | length > 0 %} nodeSelector: {% for key, value in openshift_prometheus_node_selector.items() %} @@ -47,15 +47,15 @@ spec: cpu: "{{ openshift_prometheus_oauth_proxy_cpu_limit }}" {% endif %} ports: - - containerPort: 8443 + - containerPort: {{ openshift_prometheus_service_targetport }} name: web args: - -provider=openshift - - -https-address=:8443 + - -https-address=:{{ openshift_prometheus_service_targetport }} - -http-address= - -email-domain=* - -upstream=http://localhost:9090 - - -client-id=system:serviceaccount:{{ namespace }}:prometheus + - -client-id=system:serviceaccount:{{ namespace }}:{{ openshift_prometheus_service_name }} - '-openshift-sar={"resource": "namespaces", "verb": "get", "resourceName": "{{ namespace }}", "namespace": "{{ namespace }}"}' - '-openshift-delegate-urls={"/": {"resource": "namespaces", "verb": "get", "resourceName": "{{ namespace }}", "namespace": "{{ namespace }}"}}' - -tls-cert=/etc/tls/private/tls.crt @@ -67,9 +67,9 @@ spec: - -skip-auth-regex=^/metrics volumeMounts: - mountPath: /etc/tls/private - name: prometheus-tls + name: prometheus-tls-secret - mountPath: /etc/proxy/secrets - name: prometheus-secrets + name: prometheus-proxy-secret - mountPath: /prometheus name: prometheus-data @@ -104,7 +104,7 @@ spec: - mountPath: /prometheus name: prometheus-data - # Deploy alertmanager behind prometheus-alert-buffer behind an oauth proxy + # Deploy alert-buffer behind oauth alerts-proxy - name: alerts-proxy image: "{{ l_openshift_prometheus_proxy_image_prefix }}oauth-proxy:{{ l_openshift_prometheus_proxy_image_version }}" imagePullPolicy: IfNotPresent @@ -124,15 +124,15 @@ spec: cpu: "{{ openshift_prometheus_oauth_proxy_cpu_limit }}" {% endif %} ports: - - containerPort: 9443 + - containerPort: {{ openshift_prometheus_alerts_service_targetport }} name: web args: - -provider=openshift - - -https-address=:9443 + - -https-address=:{{ openshift_prometheus_alerts_service_targetport }} - -http-address= - -email-domain=* - -upstream=http://localhost:9099 - - -client-id=system:serviceaccount:{{ namespace }}:prometheus + - -client-id=system:serviceaccount:{{ namespace }}:{{ openshift_prometheus_service_name }} - '-openshift-sar={"resource": "namespaces", "verb": "get", "resourceName": "{{ namespace }}", "namespace": "{{ namespace }}"}' - '-openshift-delegate-urls={"/": {"resource": "namespaces", "verb": "get", "resourceName": "{{ namespace }}", "namespace": "{{ namespace }}"}}' - -tls-cert=/etc/tls/private/tls.crt @@ -143,9 +143,9 @@ spec: - -openshift-ca=/var/run/secrets/kubernetes.io/serviceaccount/ca.crt volumeMounts: - mountPath: /etc/tls/private - name: alerts-tls + name: alerts-tls-secret - mountPath: /etc/proxy/secrets - name: alerts-secrets + name: alerts-proxy-secret - name: alert-buffer args: @@ -169,11 +169,54 @@ spec: {% endif %} volumeMounts: - mountPath: /alert-buffer - name: alert-buffer-data + name: alerts-data ports: - containerPort: 9099 name: alert-buf + # Deploy alertmanager behind oauth alertmanager-proxy + - name: alertmanager-proxy + image: "{{ l_openshift_prometheus_proxy_image_prefix }}oauth-proxy:{{ l_openshift_prometheus_proxy_image_version }}" + imagePullPolicy: IfNotPresent + requests: +{% if openshift_prometheus_oauth_proxy_memory_requests is defined and openshift_prometheus_oauth_proxy_memory_requests is not none %} + memory: "{{ openshift_prometheus_oauth_proxy_memory_requests }}" +{% endif %} +{% if openshift_prometheus_oauth_proxy_cpu_requests is defined and openshift_prometheus_oauth_proxy_cpu_requests is not none %} + cpu: "{{ openshift_prometheus_oauth_proxy_cpu_requests }}" +{% endif %} + limits: +{% if openshift_prometheus_oauth_proxy_memory_limit is defined and openshift_prometheus_oauth_proxy_memory_limit is not none %} + memory: "{{ openshift_prometheus_oauth_proxy_memory_limit }}" +{% endif %} +{% if openshift_prometheus_oauth_proxy_cpu_limit is defined and openshift_prometheus_oauth_proxy_cpu_limit is not none %} + cpu: "{{ openshift_prometheus_oauth_proxy_cpu_limit }}" +{% endif %} + ports: + - containerPort: {{ openshift_prometheus_alertmanager_service_targetport }} + name: web + args: + - -provider=openshift + - -https-address=:{{ openshift_prometheus_alertmanager_service_targetport }} + - -http-address= + - -email-domain=* + - -upstream=http://localhost:9093 + - -client-id=system:serviceaccount:{{ namespace }}:{{ openshift_prometheus_service_name }} + - -openshift-ca=/etc/pki/tls/cert.pem + - -openshift-ca=/var/run/secrets/kubernetes.io/serviceaccount/ca.crt + - '-openshift-sar={"resource": "namespaces", "verb": "get", "resourceName": "{{ namespace }}", "namespace": "{{ namespace }}"}' + - '-openshift-delegate-urls={"/": {"resource": "namespaces", "verb": "get", "resourceName": "{{ namespace }}", "namespace": "{{ namespace }}"}}' + - -tls-cert=/etc/tls/private/tls.crt + - -tls-key=/etc/tls/private/tls.key + - -client-secret-file=/var/run/secrets/kubernetes.io/serviceaccount/token + - -cookie-secret-file=/etc/proxy/secrets/session_secret + - -skip-auth-regex=^/metrics + volumeMounts: + - mountPath: /etc/tls/private + name: alertmanager-tls-secret + - mountPath: /etc/proxy/secrets + name: alertmanager-proxy-secret + - name: alertmanager args: - -config.file=/etc/alertmanager/alertmanager.yml @@ -205,14 +248,15 @@ spec: restartPolicy: Always volumes: + - name: prometheus-config configMap: defaultMode: 420 name: prometheus - - name: prometheus-secrets + - name: prometheus-proxy-secret secret: secretName: prometheus-proxy - - name: prometheus-tls + - name: prometheus-tls-secret secret: secretName: prometheus-tls - name: prometheus-data @@ -225,13 +269,19 @@ spec: - name: alertmanager-config configMap: defaultMode: 420 - name: prometheus-alerts - - name: alerts-secrets + name: alertmanager + - name: alertmanager-proxy-secret secret: - secretName: alerts-proxy - - name: alerts-tls + secretName: alertmanager-proxy + - name: alertmanager-tls-secret + secret: + secretName: alertmanager-tls + - name: alerts-tls-secret secret: - secretName: prometheus-alerts-tls + secretName: alerts-tls + - name: alerts-proxy-secret + secret: + secretName: alerts-proxy - name: alertmanager-data {% if openshift_prometheus_alertmanager_storage_type == 'pvc' %} persistentVolumeClaim: @@ -239,7 +289,7 @@ spec: {% else %} emptydir: {} {% endif %} - - name: alert-buffer-data + - name: alerts-data {% if openshift_prometheus_alertbuffer_storage_type == 'pvc' %} persistentVolumeClaim: claimName: {{ openshift_prometheus_alertbuffer_pvc_name }} diff --git a/roles/openshift_prometheus/templates/prometheus.yml.j2 b/roles/openshift_prometheus/templates/prometheus.yml.j2 index 63430f834..005c2c564 100644 --- a/roles/openshift_prometheus/templates/prometheus.yml.j2 +++ b/roles/openshift_prometheus/templates/prometheus.yml.j2 @@ -1,10 +1,5 @@ rule_files: - - 'prometheus.rules' -{% if openshift_prometheus_additional_rules_file is defined and openshift_prometheus_additional_rules_file is not none %} - - 'prometheus.additional.rules' -{% endif %} - - + - '*.rules' # A scrape configuration for running Prometheus on a Kubernetes cluster. # This uses separate scrape configs for cluster components (i.e. API server, node) @@ -39,31 +34,11 @@ scrape_configs: action: keep regex: default;kubernetes;https -# Scrape config for nodes. -# -# Each node exposes a /metrics endpoint that contains operational metrics for -# the Kubelet and other components. -- job_name: 'kubernetes-nodes' - - scheme: https - tls_config: - ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt - bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token - - kubernetes_sd_configs: - - role: node - - relabel_configs: - - action: labelmap - regex: __meta_kubernetes_node_label_(.+) - # Scrape config for controllers. # # Each master node exposes a /metrics endpoint on :8444 that contains operational metrics for # the controllers. # -# TODO: move this to a pure endpoints based metrics gatherer when controllers are exposed via -# endpoints. - job_name: 'kubernetes-controllers' scheme: https @@ -87,6 +62,27 @@ scrape_configs: regex: (.+)(?::\d+) replacement: $1:8444 +# Scrape config for nodes. +# +# Each node exposes a /metrics endpoint that contains operational metrics for +# the Kubelet and other components. +- job_name: 'kubernetes-nodes' + scheme: https + tls_config: + ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt + bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token + kubernetes_sd_configs: + - role: node + # Drop a very high cardinality metric that is incorrect in 3.7. It will be + # fixed in 3.9. + metric_relabel_configs: + - source_labels: [__name__] + action: drop + regex: 'openshift_sdn_pod_(setup|teardown)_latency(.*)' + relabel_configs: + - action: labelmap + regex: __meta_kubernetes_node_label_(.+) + # Scrape config for cAdvisor. # # Beginning in Kube 1.7, each node exposes a /metrics/cadvisor endpoint that @@ -107,6 +103,14 @@ scrape_configs: kubernetes_sd_configs: - role: node + # Exclude a set of high cardinality metrics that can contribute to significant + # memory use in large clusters. These can be selectively enabled as necessary + # for medium or small clusters. + metric_relabel_configs: + - source_labels: [__name__] + action: drop + regex: 'container_(cpu_user_seconds_total|cpu_cfs_periods_total|memory_usage_bytes|memory_swap|memory_working_set_bytes|memory_cache|last_seen|fs_(read_seconds_total|write_seconds_total|sector_(.*)|io_(.*)|reads_merged_total|writes_merged_total)|tasks_state|memory_failcnt|memory_failures_total|spec_memory_swap_limit_bytes|fs_(.*)_bytes_total|spec_(.*))' + relabel_configs: - action: labelmap regex: __meta_kubernetes_node_label_(.+) @@ -133,38 +137,101 @@ scrape_configs: - role: endpoints relabel_configs: - - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scrape] - action: keep - regex: true - - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scheme] - action: replace - target_label: __scheme__ - regex: (https?) - - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_path] + # only scrape infrastructure components + - source_labels: [__meta_kubernetes_namespace] + action: keep + regex: 'default|logging|metrics|kube-.+|openshift|openshift-.+' + # drop infrastructure components managed by other scrape targets + - source_labels: [__meta_kubernetes_service_name] + action: drop + regex: 'prometheus-node-exporter' + # only those that have requested scraping + - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scrape] + action: keep + regex: true + - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scheme] + action: replace + target_label: __scheme__ + regex: (https?) + - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_path] + action: replace + target_label: __metrics_path__ + regex: (.+) + - source_labels: [__address__, __meta_kubernetes_service_annotation_prometheus_io_port] + action: replace + target_label: __address__ + regex: (.+)(?::\d+);(\d+) + replacement: $1:$2 + - action: labelmap + regex: __meta_kubernetes_service_label_(.+) + - source_labels: [__meta_kubernetes_namespace] + action: replace + target_label: kubernetes_namespace + - source_labels: [__meta_kubernetes_service_name] + action: replace + target_label: kubernetes_name + +# Scrape config for node-exporter, which is expected to be running on port 9100. +- job_name: 'kubernetes-nodes-exporter' + + tls_config: + ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt + + kubernetes_sd_configs: + - role: node + + metric_relabel_configs: + - source_labels: [__name__] + action: drop + regex: 'node_cpu|node_(disk|scrape_collector)_.+' + # preserve a subset of the network, netstat, vmstat, and filesystem series + - source_labels: [__name__] action: replace - target_label: __metrics_path__ - regex: (.+) - - source_labels: [__address__, __meta_kubernetes_service_annotation_prometheus_io_port] + regex: '(node_(netstat_Ip_.+|vmstat_(nr|thp)_.+|filesystem_(free|size|device_error)|network_(transmit|receive)_(drop|errs)))' + target_label: __name__ + replacement: renamed_$1 + - source_labels: [__name__] + action: drop + regex: 'node_(netstat|vmstat|filesystem|network)_.+' + - source_labels: [__name__] action: replace + regex: 'renamed_(.+)' + target_label: __name__ + replacement: $1 + # drop any partial expensive series + - source_labels: [__name__, device] + action: drop + regex: 'node_network_.+;veth.+' + - source_labels: [__name__, mountpoint] + action: drop + regex: 'node_filesystem_(free|size|device_error);([^/].*|/.+)' + + relabel_configs: + - source_labels: [__address__] + regex: '(.*):10250' + replacement: '${1}:9100' target_label: __address__ - regex: (.+)(?::\d+);(\d+) - replacement: $1:$2 - - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_username] - action: replace - target_label: __basic_auth_username__ - regex: (.+) - - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_password] - action: replace - target_label: __basic_auth_password__ - regex: (.+) + - source_labels: [__meta_kubernetes_node_label_kubernetes_io_hostname] + target_label: __instance__ - action: labelmap - regex: __meta_kubernetes_service_label_(.+) - - source_labels: [__meta_kubernetes_namespace] - action: replace - target_label: kubernetes_namespace - - source_labels: [__meta_kubernetes_service_name] - action: replace - target_label: kubernetes_name + regex: __meta_kubernetes_node_label_(.+) + +# Scrape config for the template service broker +- job_name: 'openshift-template-service-broker' + scheme: https + tls_config: + ca_file: /var/run/secrets/kubernetes.io/serviceaccount/service-ca.crt + server_name: apiserver.openshift-template-service-broker.svc + bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token + + kubernetes_sd_configs: + - role: endpoints + + relabel_configs: + - source_labels: [__meta_kubernetes_namespace, __meta_kubernetes_service_name, __meta_kubernetes_endpoint_port_name] + action: keep + regex: openshift-template-service-broker;apiserver;https + alerting: alertmanagers: |