From 242c36ca0c938602b3bd0de156b21579e9ecc712 Mon Sep 17 00:00:00 2001 From: Davis Phillips Date: Mon, 11 Dec 2017 09:44:38 -0600 Subject: add vsphere examples in hosts.example --- inventory/hosts.example | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'inventory') diff --git a/inventory/hosts.example b/inventory/hosts.example index c18a53671..82f38dfd0 100644 --- a/inventory/hosts.example +++ b/inventory/hosts.example @@ -281,6 +281,16 @@ openshift_master_identity_providers=[{'name': 'htpasswd_auth', 'login': 'true', # # GCE #openshift_cloudprovider_kind=gce +# +# vSphere +#openshift_cloudprovider_kind=vsphere +#openshift_cloudprovider_vsphere_username=username +#openshift_cloudprovider_vsphere_password=password +#openshift_cloudprovider_vsphere_host=vcenter_host or vsphere_host +#openshift_cloudprovider_vsphere_datacenter=datacenter +#openshift_cloudprovider_vsphere_datastore=datastore +#openshift_cloudprovider_vsphere_folder=optional_folder_name + # Project Configuration #osm_project_request_message='' -- cgit v1.2.3 From 89197481ce9f593ff5f7c2b37f0efbbfca61431e Mon Sep 17 00:00:00 2001 From: Michael Gugino Date: Mon, 18 Dec 2017 18:09:40 -0500 Subject: Remove oauth_template bits from openshift_facts This commit moves some deprecated variable logic outside of openshift_facts and into role defaults. --- inventory/hosts.example | 3 +++ roles/openshift_facts/library/openshift_facts.py | 21 --------------------- roles/openshift_master/defaults/main.yml | 9 +++++++++ roles/openshift_master/templates/master.yaml.v1.j2 | 4 ++-- roles/openshift_master_facts/tasks/main.yml | 2 -- 5 files changed, 14 insertions(+), 25 deletions(-) (limited to 'inventory') diff --git a/inventory/hosts.example b/inventory/hosts.example index d857cd1a7..6c9181891 100644 --- a/inventory/hosts.example +++ b/inventory/hosts.example @@ -84,6 +84,9 @@ openshift_release=v3.7 # Configure extensions in the master config for console customization # See: https://docs.openshift.org/latest/install_config/web_console_customization.html#serving-static-files +#openshift_master_oauth_templates: +# login: /path/to/login-template.html +# openshift_master_oauth_template is deprecated. Use openshift_master_oauth_templates instead. #openshift_master_oauth_template=/path/to/login-template.html # Configure imagePolicyConfig in the master config diff --git a/roles/openshift_facts/library/openshift_facts.py b/roles/openshift_facts/library/openshift_facts.py index d659286dc..d7c358a2f 100755 --- a/roles/openshift_facts/library/openshift_facts.py +++ b/roles/openshift_facts/library/openshift_facts.py @@ -656,26 +656,6 @@ def set_nodename(facts): return facts -def migrate_oauth_template_facts(facts): - """ - Migrate an old oauth template fact to a newer format if it's present. - - The legacy 'oauth_template' fact was just a filename, and assumed you were - setting the 'login' template. - - The new pluralized 'oauth_templates' fact is a dict mapping the template - name to a filename. - - Simplify the code after this by merging the old fact into the new. - """ - if 'master' in facts and 'oauth_template' in facts['master']: - if 'oauth_templates' not in facts['master']: - facts['master']['oauth_templates'] = {"login": facts['master']['oauth_template']} - elif 'login' not in facts['master']['oauth_templates']: - facts['master']['oauth_templates']['login'] = facts['master']['oauth_template'] - return facts - - def format_url(use_ssl, hostname, port, path=''): """ Format url based on ssl flag, hostname, port and path @@ -1387,7 +1367,6 @@ class OpenShiftFacts(object): facts = merge_facts(facts, local_facts, additive_facts_to_overwrite) - facts = migrate_oauth_template_facts(facts) facts['current_config'] = get_current_config(facts) facts = set_url_facts_if_unset(facts) facts = set_identity_providers_if_unset(facts) diff --git a/roles/openshift_master/defaults/main.yml b/roles/openshift_master/defaults/main.yml index 5d292ffd0..4030e1bfa 100644 --- a/roles/openshift_master/defaults/main.yml +++ b/roles/openshift_master/defaults/main.yml @@ -82,6 +82,15 @@ openshift_master_valid_grant_methods: openshift_master_is_scaleup_host: False +# openshift_master_oauth_template is deprecated. Should be added to deprecations +# and removed. +openshift_master_oauth_template: False +openshift_master_oauth_templates_default: + login: "{{ openshift_master_oauth_template }}" +openshift_master_oauth_templates: "{{ openshift_master_oauth_template | ternary(openshift_master_oauth_templates_default, False) }}" +# Here we combine openshift_master_oath_template into 'login' key of openshift_master_oath_templates, if not present. +l_openshift_master_oauth_templates: "{{ openshift_master_oauth_templates | default(openshift_master_oauth_templates_default) }}" + # These defaults assume forcing journald persistence, fsync to disk once # a second, rate-limiting to 10,000 logs a second, no forwarding to # syslog or wall, using 8GB of disk space maximum, using 10MB journal diff --git a/roles/openshift_master/templates/master.yaml.v1.j2 b/roles/openshift_master/templates/master.yaml.v1.j2 index c224ad714..14023ea73 100644 --- a/roles/openshift_master/templates/master.yaml.v1.j2 +++ b/roles/openshift_master/templates/master.yaml.v1.j2 @@ -152,8 +152,8 @@ oauthConfig: {% if 'oauth_always_show_provider_selection' in openshift.master %} alwaysShowProviderSelection: {{ openshift.master.oauth_always_show_provider_selection }} {% endif %} -{% if 'oauth_templates' in openshift.master %} - templates:{{ openshift.master.oauth_templates | lib_utils_to_padded_yaml(level=2) }} +{% if l_openshift_master_oauth_templates %} + templates:{{ l_openshift_master_oauth_templates | lib_utils_to_padded_yaml(level=2) }} {% endif %} assetPublicURL: {{ openshift.master.public_console_url }}/ grantConfig: diff --git a/roles/openshift_master_facts/tasks/main.yml b/roles/openshift_master_facts/tasks/main.yml index ad9a21c96..85d0ac25c 100644 --- a/roles/openshift_master_facts/tasks/main.yml +++ b/roles/openshift_master_facts/tasks/main.yml @@ -74,8 +74,6 @@ master_count: "{{ openshift_master_count | default(None) }}" admission_plugin_config: "{{openshift_master_admission_plugin_config }}" kube_admission_plugin_config: "{{openshift_master_kube_admission_plugin_config | default(None) }}" # deprecated, merged with admission_plugin_config - oauth_template: "{{ openshift_master_oauth_template | default(None) }}" # deprecated in origin 1.2 / OSE 3.2 - oauth_templates: "{{ openshift_master_oauth_templates | default(None) }}" oauth_always_show_provider_selection: "{{ openshift_master_oauth_always_show_provider_selection | default(None) }}" image_policy_config: "{{ openshift_master_image_policy_config | default(None) }}" dynamic_provisioning_enabled: "{{ openshift_master_dynamic_provisioning_enabled | default(None) }}" -- cgit v1.2.3 From e3cf9edff6d0186b09b1a112592f283fab6857d0 Mon Sep 17 00:00:00 2001 From: Michael Gugino Date: Tue, 19 Dec 2017 16:36:47 -0500 Subject: Remove references to deployment_type Move openshift_deployment_type check into sanity_check action plugin. Remove compatibility for deployment_type. deployment_type has been deprecated for some time now. --- .papr.inventory | 2 +- inventory/hosts.example | 2 +- playbooks/byo/rhel_subscribe.yml | 2 +- .../upgrades/pre/verify_cluster.yml | 5 --- .../upgrades/pre/verify_upgrade_targets.yml | 2 +- .../openshift-cluster/upgrades/v3_6/upgrade.yml | 2 +- .../upgrades/v3_6/upgrade_control_plane.yml | 2 +- .../upgrades/v3_6/upgrade_nodes.yml | 2 +- playbooks/init/base_packages.yml | 37 +++++++++++++++++++ playbooks/init/facts.yml | 41 +++++----------------- playbooks/init/repos.yml | 2 +- playbooks/openshift-glusterfs/README.md | 2 +- .../openshift-master/private/additional_config.yml | 2 +- playbooks/openshift-master/private/config.yml | 2 +- playbooks/prerequisites.yml | 4 ++- roles/ansible_service_broker/tasks/install.yml | 2 +- roles/container_runtime/meta/main.yml | 1 + roles/contiv_facts/tasks/main.yml | 2 +- roles/lib_utils/action_plugins/sanity_checks.py | 18 +++++++++- roles/openshift_facts/defaults/main.yml | 4 +-- roles/openshift_logging_curator/tasks/main.yaml | 2 +- .../tasks/main.yaml | 4 +-- .../openshift_logging_eventrouter/tasks/main.yaml | 4 +-- roles/openshift_logging_fluentd/tasks/main.yaml | 4 +-- roles/openshift_logging_kibana/tasks/main.yaml | 4 +-- roles/openshift_logging_mux/tasks/main.yaml | 4 +-- roles/openshift_metrics/tasks/main.yaml | 4 +-- roles/openshift_node/tasks/main.yml | 2 +- roles/openshift_node/tasks/upgrade.yml | 1 - roles/openshift_prometheus/tasks/main.yaml | 2 +- roles/openshift_repos/tasks/main.yaml | 2 +- roles/openshift_sanitize_inventory/tasks/main.yml | 26 -------------- roles/openshift_sanitize_inventory/vars/main.yml | 3 -- roles/openshift_service_catalog/tasks/install.yml | 4 +-- .../openshift_storage_glusterfs/defaults/main.yml | 8 ++--- roles/template_service_broker/tasks/install.yml | 4 +-- .../package_availability_missing_required.yml | 2 +- .../playbooks/package_availability_succeeds.yml | 2 +- .../playbooks/package_version_matches.yml | 2 +- .../playbooks/package_version_mismatches.yml | 2 +- 40 files changed, 109 insertions(+), 113 deletions(-) create mode 100644 playbooks/init/base_packages.yml (limited to 'inventory') diff --git a/.papr.inventory b/.papr.inventory index aa4324c21..c678e76aa 100644 --- a/.papr.inventory +++ b/.papr.inventory @@ -6,7 +6,7 @@ etcd [OSEv3:vars] ansible_ssh_user=root ansible_python_interpreter=/usr/bin/python3 -deployment_type=origin +openshift_deployment_type=origin openshift_image_tag="{{ lookup('env', 'OPENSHIFT_IMAGE_TAG') }}" openshift_master_default_subdomain="{{ lookup('env', 'RHCI_ocp_node1_IP') }}.xip.io" openshift_check_min_host_disk_gb=1.5 diff --git a/inventory/hosts.example b/inventory/hosts.example index d857cd1a7..b009b4fc8 100644 --- a/inventory/hosts.example +++ b/inventory/hosts.example @@ -941,7 +941,7 @@ openshift_master_identity_providers=[{'name': 'htpasswd_auth', 'login': 'true', #openshift_master_audit_config={"enabled": true, "auditFilePath": "/var/log/openpaas-oscp-audit/openpaas-oscp-audit.log", "maximumFileRetentionDays": 14, "maximumFileSizeMegabytes": 500, "maximumRetainedFiles": 5} # Enable origin repos that point at Centos PAAS SIG, defaults to true, only used -# by deployment_type=origin +# by openshift_deployment_type=origin #openshift_enable_origin_repo=false # Validity of the auto-generated OpenShift certificates in days. diff --git a/playbooks/byo/rhel_subscribe.yml b/playbooks/byo/rhel_subscribe.yml index dc9d0a139..f70f05bac 100644 --- a/playbooks/byo/rhel_subscribe.yml +++ b/playbooks/byo/rhel_subscribe.yml @@ -6,7 +6,7 @@ roles: - role: rhel_subscribe when: - - deployment_type == 'openshift-enterprise' + - openshift_deployment_type == 'openshift-enterprise' - ansible_distribution == "RedHat" - rhsub_user is defined - rhsub_pass is defined diff --git a/playbooks/common/openshift-cluster/upgrades/pre/verify_cluster.yml b/playbooks/common/openshift-cluster/upgrades/pre/verify_cluster.yml index 4713f8633..693ab2d96 100644 --- a/playbooks/common/openshift-cluster/upgrades/pre/verify_cluster.yml +++ b/playbooks/common/openshift-cluster/upgrades/pre/verify_cluster.yml @@ -5,11 +5,6 @@ hosts: oo_first_master gather_facts: no tasks: - - fail: - msg: > - This upgrade is only supported for origin and openshift-enterprise - deployment types - when: deployment_type not in ['origin','openshift-enterprise'] # Error out in situations where the user has older versions specified in their # inventory in any of the openshift_release, openshift_image_tag, and diff --git a/playbooks/common/openshift-cluster/upgrades/pre/verify_upgrade_targets.yml b/playbooks/common/openshift-cluster/upgrades/pre/verify_upgrade_targets.yml index 95c37c38c..b0b5a7e4b 100644 --- a/playbooks/common/openshift-cluster/upgrades/pre/verify_upgrade_targets.yml +++ b/playbooks/common/openshift-cluster/upgrades/pre/verify_upgrade_targets.yml @@ -49,5 +49,5 @@ fail: msg: "This upgrade playbook must be run against OpenShift {{ openshift_upgrade_min }} or later" when: - - deployment_type == 'origin' + - openshift_deployment_type == 'origin' - openshift.common.version is version_compare(openshift_upgrade_min,'<') diff --git a/playbooks/common/openshift-cluster/upgrades/v3_6/upgrade.yml b/playbooks/common/openshift-cluster/upgrades/v3_6/upgrade.yml index a5ad3801d..d520c6aee 100644 --- a/playbooks/common/openshift-cluster/upgrades/v3_6/upgrade.yml +++ b/playbooks/common/openshift-cluster/upgrades/v3_6/upgrade.yml @@ -13,7 +13,7 @@ tasks: - set_fact: openshift_upgrade_target: '3.6' - openshift_upgrade_min: "{{ '1.5' if deployment_type == 'origin' else '3.5' }}" + openshift_upgrade_min: "{{ '1.5' if openshift_deployment_type == 'origin' else '3.5' }}" - import_playbook: ../pre/config.yml vars: diff --git a/playbooks/common/openshift-cluster/upgrades/v3_6/upgrade_control_plane.yml b/playbooks/common/openshift-cluster/upgrades/v3_6/upgrade_control_plane.yml index 1498db4c5..a956fdde5 100644 --- a/playbooks/common/openshift-cluster/upgrades/v3_6/upgrade_control_plane.yml +++ b/playbooks/common/openshift-cluster/upgrades/v3_6/upgrade_control_plane.yml @@ -20,7 +20,7 @@ tasks: - set_fact: openshift_upgrade_target: '3.6' - openshift_upgrade_min: "{{ '1.5' if deployment_type == 'origin' else '3.5' }}" + openshift_upgrade_min: "{{ '1.5' if openshift_deployment_type == 'origin' else '3.5' }}" - import_playbook: ../pre/config.yml vars: diff --git a/playbooks/common/openshift-cluster/upgrades/v3_6/upgrade_nodes.yml b/playbooks/common/openshift-cluster/upgrades/v3_6/upgrade_nodes.yml index 6958652d8..4febe76ee 100644 --- a/playbooks/common/openshift-cluster/upgrades/v3_6/upgrade_nodes.yml +++ b/playbooks/common/openshift-cluster/upgrades/v3_6/upgrade_nodes.yml @@ -15,7 +15,7 @@ tasks: - set_fact: openshift_upgrade_target: '3.6' - openshift_upgrade_min: "{{ '1.5' if deployment_type == 'origin' else '3.5' }}" + openshift_upgrade_min: "{{ '1.5' if openshift_deployment_type == 'origin' else '3.5' }}" - import_playbook: ../pre/config.yml vars: diff --git a/playbooks/init/base_packages.yml b/playbooks/init/base_packages.yml new file mode 100644 index 000000000..f7007087c --- /dev/null +++ b/playbooks/init/base_packages.yml @@ -0,0 +1,37 @@ +--- +- name: Ensure that all non-node hosts are accessible + hosts: oo_masters_to_config:oo_etcd_to_config:oo_lb_to_config:oo_nfs_to_config + any_errors_fatal: true + tasks: + - when: + - not openshift_is_atomic | bool + block: + - name: Ensure openshift-ansible installer package deps are installed + package: + name: "{{ item }}" + state: present + with_items: + - iproute + - "{{ 'python3-dbus' if ansible_distribution == 'Fedora' else 'dbus-python' }}" + - "{{ 'python3-PyYAML' if ansible_distribution == 'Fedora' else 'PyYAML' }}" + - yum-utils + register: result + until: result is succeeded + + - name: Ensure various deps for running system containers are installed + package: + name: "{{ item }}" + state: present + with_items: + - atomic + - ostree + - runc + when: + - > + (openshift_use_system_containers | default(False)) | bool + or (openshift_use_etcd_system_container | default(False)) | bool + or (openshift_use_openvswitch_system_container | default(False)) | bool + or (openshift_use_node_system_container | default(False)) | bool + or (openshift_use_master_system_container | default(False)) | bool + register: result + until: result is succeeded diff --git a/playbooks/init/facts.yml b/playbooks/init/facts.yml index 1a5e3b513..9e411a551 100644 --- a/playbooks/init/facts.yml +++ b/playbooks/init/facts.yml @@ -21,6 +21,14 @@ path: /run/ostree-booted register: ostree_booted + # TODO(michaelgugino) remove this line once CI is updated. + - name: set openshift_deployment_type if unset + set_fact: + openshift_deployment_type: "{{ deployment_type }}" + when: + - openshift_deployment_type is undefined + - deployment_type is defined + - name: initialize_facts set fact openshift_is_atomic and openshift_is_containerized set_fact: openshift_is_atomic: "{{ ostree_booted.stat.exists }}" @@ -48,39 +56,6 @@ - l_atomic_docker_version.stdout | replace('"', '') is version_compare('1.12','>=') msg: Installation on Atomic Host requires Docker 1.12 or later. Please upgrade and restart the Atomic Host. - - when: - - not openshift_is_atomic | bool - block: - - name: Ensure openshift-ansible installer package deps are installed - package: - name: "{{ item }}" - state: present - with_items: - - iproute - - "{{ 'python3-dbus' if ansible_distribution == 'Fedora' else 'dbus-python' }}" - - "{{ 'python3-PyYAML' if ansible_distribution == 'Fedora' else 'PyYAML' }}" - - yum-utils - register: result - until: result is succeeded - - - name: Ensure various deps for running system containers are installed - package: - name: "{{ item }}" - state: present - with_items: - - atomic - - ostree - - runc - when: - - > - (openshift_use_system_containers | default(False)) | bool - or (openshift_use_etcd_system_container | default(False)) | bool - or (openshift_use_openvswitch_system_container | default(False)) | bool - or (openshift_use_node_system_container | default(False)) | bool - or (openshift_use_master_system_container | default(False)) | bool - register: result - until: result is succeeded - - name: Gather Cluster facts openshift_facts: role: common diff --git a/playbooks/init/repos.yml b/playbooks/init/repos.yml index 66786a41a..866c889b6 100644 --- a/playbooks/init/repos.yml +++ b/playbooks/init/repos.yml @@ -8,7 +8,7 @@ name: rhel_subscribe when: - ansible_distribution == 'RedHat' - - deployment_type == 'openshift-enterprise' + - openshift_deployment_type == 'openshift-enterprise' - rhsub_user is defined - rhsub_pass is defined - name: initialize openshift repos diff --git a/playbooks/openshift-glusterfs/README.md b/playbooks/openshift-glusterfs/README.md index 107bbfff6..19c381490 100644 --- a/playbooks/openshift-glusterfs/README.md +++ b/playbooks/openshift-glusterfs/README.md @@ -63,7 +63,7 @@ glusterfs [OSEv3:vars] ansible_ssh_user=root -deployment_type=origin +openshift_deployment_type=origin [masters] master diff --git a/playbooks/openshift-master/private/additional_config.yml b/playbooks/openshift-master/private/additional_config.yml index 81bb8cc5c..85be0e600 100644 --- a/playbooks/openshift-master/private/additional_config.yml +++ b/playbooks/openshift-master/private/additional_config.yml @@ -31,7 +31,7 @@ - role: cockpit when: - not openshift_is_atomic | bool - - deployment_type == 'openshift-enterprise' + - openshift_deployment_type == 'openshift-enterprise' - osm_use_cockpit is undefined or osm_use_cockpit | bool - openshift.common.deployment_subtype != 'registry' - role: flannel_register diff --git a/playbooks/openshift-master/private/config.yml b/playbooks/openshift-master/private/config.yml index 3093444b4..e53a6f093 100644 --- a/playbooks/openshift-master/private/config.yml +++ b/playbooks/openshift-master/private/config.yml @@ -47,7 +47,7 @@ state: absent when: - rpmgenerated_config.stat.exists == true - - deployment_type == 'openshift-enterprise' + - openshift_deployment_type == 'openshift-enterprise' with_items: - master - node diff --git a/playbooks/prerequisites.yml b/playbooks/prerequisites.yml index 113d68e0f..7802f83d9 100644 --- a/playbooks/prerequisites.yml +++ b/playbooks/prerequisites.yml @@ -3,11 +3,13 @@ vars: skip_verison: True -- import_playbook: validate_hostnames.yml +- import_playbook: init/validate_hostnames.yml when: not (skip_validate_hostnames | default(False)) - import_playbook: init/repos.yml +- import_playbook: init/base_packages.yml + # This is required for container runtime for crio, only needs to run once. - name: Configure os_firewall hosts: oo_masters_to_config:oo_etcd_to_config:oo_lb_to_config:oo_nfs_to_config:oo_nodes_to_config diff --git a/roles/ansible_service_broker/tasks/install.yml b/roles/ansible_service_broker/tasks/install.yml index 4ca47d074..ba2f7293b 100644 --- a/roles/ansible_service_broker/tasks/install.yml +++ b/roles/ansible_service_broker/tasks/install.yml @@ -4,7 +4,7 @@ - name: Set default image variables based on deployment type include_vars: "{{ item }}" with_first_found: - - "{{ openshift_deployment_type | default(deployment_type) }}.yml" + - "{{ openshift_deployment_type }}.yml" - "default_images.yml" - name: set ansible_service_broker facts diff --git a/roles/container_runtime/meta/main.yml b/roles/container_runtime/meta/main.yml index 5c4c569de..3bc2607fb 100644 --- a/roles/container_runtime/meta/main.yml +++ b/roles/container_runtime/meta/main.yml @@ -12,3 +12,4 @@ galaxy_info: dependencies: - role: lib_openshift - role: lib_utils +- role: openshift_facts diff --git a/roles/contiv_facts/tasks/main.yml b/roles/contiv_facts/tasks/main.yml index c6f8ad1d6..ced04759d 100644 --- a/roles/contiv_facts/tasks/main.yml +++ b/roles/contiv_facts/tasks/main.yml @@ -70,4 +70,4 @@ when: has_rpm - include_tasks: fedora-install.yml - when: not is_atomic and ansible_distribution == "Fedora" + when: not openshift_is_atomic and ansible_distribution == "Fedora" diff --git a/roles/lib_utils/action_plugins/sanity_checks.py b/roles/lib_utils/action_plugins/sanity_checks.py index 2ddcf77e4..1bf332678 100644 --- a/roles/lib_utils/action_plugins/sanity_checks.py +++ b/roles/lib_utils/action_plugins/sanity_checks.py @@ -5,6 +5,9 @@ appropriately and no conflicting options have been provided. from ansible.plugins.action import ActionBase from ansible import errors +# Valid values for openshift_deployment_type +VALID_DEPLOYMENT_TYPES = ('origin', 'openshift-enterprise') + # Tuple of variable names and default values if undefined. NET_PLUGIN_LIST = (('openshift_use_openshift_sdn', True), ('openshift_use_flannel', False), @@ -16,7 +19,10 @@ NET_PLUGIN_LIST = (('openshift_use_openshift_sdn', True), def to_bool(var_to_check): """Determine a boolean value given the multiple ways bools can be specified in ansible.""" - yes_list = (True, 1, "True", "1", "true", "Yes", "yes") + # http://yaml.org/type/bool.html + yes_list = (True, 1, "True", "1", "true", "TRUE", + "Yes", "yes", "Y", "y", "YES", + "on", "ON", "On") return var_to_check in yes_list @@ -30,6 +36,15 @@ class ActionModule(ActionBase): return None return self._templar.template(res) + def check_openshift_deployment_type(self, hostvars, host): + """Ensure a valid openshift_deployment_type is set""" + openshift_deployment_type = self.template_var(hostvars, host, + 'openshift_deployment_type') + if openshift_deployment_type not in VALID_DEPLOYMENT_TYPES: + type_strings = ", ".join(VALID_DEPLOYMENT_TYPES) + msg = "openshift_deployment_type must be defined and one of {}".format(type_strings) + raise errors.AnsibleModuleError(msg) + def check_python_version(self, hostvars, host, distro): """Ensure python version is 3 for Fedora and python 2 for others""" ansible_python = self.template_var(hostvars, host, 'ansible_python') @@ -73,6 +88,7 @@ class ActionModule(ActionBase): def run_checks(self, hostvars, host): """Execute the hostvars validations against host""" distro = self.template_var(hostvars, host, 'ansible_distribution') + self.check_openshift_deployment_type(hostvars, host) self.check_python_version(hostvars, host, distro) self.network_plugin_check(hostvars, host) self.check_hostname_vars(hostvars, host) diff --git a/roles/openshift_facts/defaults/main.yml b/roles/openshift_facts/defaults/main.yml index a4252afb0..980350d14 100644 --- a/roles/openshift_facts/defaults/main.yml +++ b/roles/openshift_facts/defaults/main.yml @@ -5,8 +5,8 @@ openshift_cli_image_dict: origin: 'openshift/origin' openshift-enterprise: 'openshift3/ose' -repoquery_cmd: "{{ ansible_pkg_mgr == 'dnf' | ternary('dnf repoquery --latest-limit 1 -d 0', 'repoquery --plugins') }}" -repoquery_installed: "{{ ansible_pkg_mgr == 'dnf' | ternary('dnf repoquery --latest-limit 1 -d 0 --disableexcludes=all --installed', 'repoquery --plugins --installed') }}" +repoquery_cmd: "{{ (ansible_pkg_mgr == 'dnf') | ternary('dnf repoquery --latest-limit 1 -d 0', 'repoquery --plugins') }}" +repoquery_installed: "{{ (ansible_pkg_mgr == 'dnf') | ternary('dnf repoquery --latest-limit 1 -d 0 --disableexcludes=all --installed', 'repoquery --plugins --installed') }}" openshift_hosted_images_dict: origin: 'openshift/origin-${component}:${version}' diff --git a/roles/openshift_logging_curator/tasks/main.yaml b/roles/openshift_logging_curator/tasks/main.yaml index e7ef5ff22..524e239b7 100644 --- a/roles/openshift_logging_curator/tasks/main.yaml +++ b/roles/openshift_logging_curator/tasks/main.yaml @@ -2,7 +2,7 @@ - name: Set default image variables based on deployment_type include_vars: "{{ var_file_name }}" with_first_found: - - "{{ openshift_deployment_type | default(deployment_type) }}.yml" + - "{{ openshift_deployment_type }}.yml" - "default_images.yml" loop_control: loop_var: var_file_name diff --git a/roles/openshift_logging_elasticsearch/tasks/main.yaml b/roles/openshift_logging_elasticsearch/tasks/main.yaml index 7790dc435..6ddeb122e 100644 --- a/roles/openshift_logging_elasticsearch/tasks/main.yaml +++ b/roles/openshift_logging_elasticsearch/tasks/main.yaml @@ -15,10 +15,10 @@ elasticsearch_name: "{{ 'logging-elasticsearch' ~ ( (openshift_logging_elasticsearch_ops_deployment | default(false) | bool) | ternary('-ops', '')) }}" es_component: "{{ 'es' ~ ( (openshift_logging_elasticsearch_ops_deployment | default(false) | bool) | ternary('-ops', '') ) }}" -- name: Set default image variables based on deployment_type +- name: Set default image variables based on openshift_deployment_type include_vars: "{{ var_file_name }}" with_first_found: - - "{{ openshift_deployment_type | default(deployment_type) }}.yml" + - "{{ openshift_deployment_type }}.yml" - "default_images.yml" loop_control: loop_var: var_file_name diff --git a/roles/openshift_logging_eventrouter/tasks/main.yaml b/roles/openshift_logging_eventrouter/tasks/main.yaml index 96b181d61..31780a343 100644 --- a/roles/openshift_logging_eventrouter/tasks/main.yaml +++ b/roles/openshift_logging_eventrouter/tasks/main.yaml @@ -1,8 +1,8 @@ --- -- name: Set default image variables based on deployment_type +- name: Set default image variables based on openshift_deployment_type include_vars: "{{ var_file_name }}" with_first_found: - - "{{ openshift_deployment_type | default(deployment_type) }}.yml" + - "{{ openshift_deployment_type }}.yml" - "default_images.yml" loop_control: loop_var: var_file_name diff --git a/roles/openshift_logging_fluentd/tasks/main.yaml b/roles/openshift_logging_fluentd/tasks/main.yaml index 87eedfb4b..08d7561ac 100644 --- a/roles/openshift_logging_fluentd/tasks/main.yaml +++ b/roles/openshift_logging_fluentd/tasks/main.yaml @@ -34,10 +34,10 @@ msg: WARNING Use of openshift_logging_mux_client_mode=minimal is not recommended due to current scaling issues when: openshift_logging_mux_client_mode is defined and openshift_logging_mux_client_mode == 'minimal' -- name: Set default image variables based on deployment_type +- name: Set default image variables based on openshift_deployment_type include_vars: "{{ var_file_name }}" with_first_found: - - "{{ openshift_deployment_type | default(deployment_type) }}.yml" + - "{{ openshift_deployment_type }}.yml" - "default_images.yml" loop_control: loop_var: var_file_name diff --git a/roles/openshift_logging_kibana/tasks/main.yaml b/roles/openshift_logging_kibana/tasks/main.yaml index a00248d11..3c3bd902e 100644 --- a/roles/openshift_logging_kibana/tasks/main.yaml +++ b/roles/openshift_logging_kibana/tasks/main.yaml @@ -1,9 +1,9 @@ --- # fail is we don't have an endpoint for ES to connect to? -- name: Set default image variables based on deployment_type +- name: Set default image variables based on openshift_deployment_type include_vars: "{{ var_file_name }}" with_first_found: - - "{{ openshift_deployment_type | default(deployment_type) }}.yml" + - "{{ openshift_deployment_type }}.yml" - "default_images.yml" loop_control: loop_var: var_file_name diff --git a/roles/openshift_logging_mux/tasks/main.yaml b/roles/openshift_logging_mux/tasks/main.yaml index 68948bce2..59a6301d7 100644 --- a/roles/openshift_logging_mux/tasks/main.yaml +++ b/roles/openshift_logging_mux/tasks/main.yaml @@ -7,10 +7,10 @@ msg: Operations logs destination is required when: not openshift_logging_mux_ops_host or openshift_logging_mux_ops_host == '' -- name: Set default image variables based on deployment_type +- name: Set default image variables based on openshift_deployment_type include_vars: "{{ var_file_name }}" with_first_found: - - "{{ openshift_deployment_type | default(deployment_type) }}.yml" + - "{{ openshift_deployment_type }}.yml" - "default_images.yml" loop_control: loop_var: var_file_name diff --git a/roles/openshift_metrics/tasks/main.yaml b/roles/openshift_metrics/tasks/main.yaml index 9dfe360bb..b67077bca 100644 --- a/roles/openshift_metrics/tasks/main.yaml +++ b/roles/openshift_metrics/tasks/main.yaml @@ -9,10 +9,10 @@ - "'not installed' not in passlib_result.stdout" msg: "python-passlib rpm must be installed on control host" -- name: Set default image variables based on deployment_type +- name: Set default image variables based on openshift_deployment_type include_vars: "{{ item }}" with_first_found: - - "{{ openshift_deployment_type | default(deployment_type) }}.yml" + - "{{ openshift_deployment_type }}.yml" - "default_images.yml" - name: Set metrics image facts diff --git a/roles/openshift_node/tasks/main.yml b/roles/openshift_node/tasks/main.yml index 2daa6c75f..eb362816a 100644 --- a/roles/openshift_node/tasks/main.yml +++ b/roles/openshift_node/tasks/main.yml @@ -3,7 +3,7 @@ msg: "SELinux is disabled, This deployment type requires that SELinux is enabled." when: - (not ansible_selinux or ansible_selinux.status != 'enabled') - - deployment_type == 'openshift-enterprise' + - openshift_deployment_type == 'openshift-enterprise' - not openshift_use_crio - include_tasks: dnsmasq_install.yml diff --git a/roles/openshift_node/tasks/upgrade.yml b/roles/openshift_node/tasks/upgrade.yml index f62bde784..02e417937 100644 --- a/roles/openshift_node/tasks/upgrade.yml +++ b/roles/openshift_node/tasks/upgrade.yml @@ -5,7 +5,6 @@ # - node_config_hook # - openshift_pkg_version # - openshift_is_containerized -# - deployment_type # - openshift_release # tasks file for openshift_node_upgrade diff --git a/roles/openshift_prometheus/tasks/main.yaml b/roles/openshift_prometheus/tasks/main.yaml index 38798e1f5..b859eb111 100644 --- a/roles/openshift_prometheus/tasks/main.yaml +++ b/roles/openshift_prometheus/tasks/main.yaml @@ -1,5 +1,5 @@ --- -- name: Set default image variables based on deployment_type +- name: Set default image variables based on openshift_deployment_type include_vars: "{{ item }}" with_first_found: - "{{ openshift_deployment_type }}.yml" diff --git a/roles/openshift_repos/tasks/main.yaml b/roles/openshift_repos/tasks/main.yaml index 35206049f..911005bb6 100644 --- a/roles/openshift_repos/tasks/main.yaml +++ b/roles/openshift_repos/tasks/main.yaml @@ -40,7 +40,7 @@ - include_tasks: rhel_repos.yml when: - ansible_distribution == 'RedHat' - - deployment_type == 'openshift-enterprise' + - openshift_deployment_type == 'openshift-enterprise' - rhsub_user is defined - rhsub_pass is defined diff --git a/roles/openshift_sanitize_inventory/tasks/main.yml b/roles/openshift_sanitize_inventory/tasks/main.yml index 651d896cf..62d460272 100644 --- a/roles/openshift_sanitize_inventory/tasks/main.yml +++ b/roles/openshift_sanitize_inventory/tasks/main.yml @@ -3,37 +3,11 @@ # the user would also be aware of any deprecated variables they should note to adjust - include_tasks: deprecations.yml -- name: Abort when conflicting deployment type variables are set - when: - - deployment_type is defined - - openshift_deployment_type is defined - - openshift_deployment_type != deployment_type - fail: - msg: |- - openshift_deployment_type is set to "{{ openshift_deployment_type }}". - deployment_type is set to "{{ deployment_type }}". - To avoid unexpected results, this conflict is not allowed. - deployment_type is deprecated in favor of openshift_deployment_type. - Please specify only openshift_deployment_type, or make both the same. - - name: Standardize on latest variable names set_fact: - # goal is to deprecate deployment_type in favor of openshift_deployment_type. - # both will be accepted for now, but code should refer to the new name. - # TODO: once this is well-documented, add deprecation notice if using old name. - deployment_type: "{{ openshift_deployment_type | default(deployment_type) | default | string }}" - openshift_deployment_type: "{{ openshift_deployment_type | default(deployment_type) | default | string }}" deployment_subtype: "{{ openshift_deployment_subtype | default(deployment_subtype) | default('basic') | string }}" openshift_deployment_subtype: "{{ openshift_deployment_subtype | default(deployment_subtype) | default('basic') | string }}" -- name: Abort when deployment type is invalid - # this variable is required; complain early and clearly if it is invalid. - when: openshift_deployment_type not in known_openshift_deployment_types - fail: - msg: |- - Please set openshift_deployment_type to one of: - {{ known_openshift_deployment_types | join(', ') }} - - name: Normalize openshift_release set_fact: # Normalize release if provided, e.g. "v3.5" => "3.5" diff --git a/roles/openshift_sanitize_inventory/vars/main.yml b/roles/openshift_sanitize_inventory/vars/main.yml index 0fc2372d2..df15948d2 100644 --- a/roles/openshift_sanitize_inventory/vars/main.yml +++ b/roles/openshift_sanitize_inventory/vars/main.yml @@ -1,7 +1,4 @@ --- -# origin uses community packages named 'origin' -# openshift-enterprise uses Red Hat packages named 'atomic-openshift' -known_openshift_deployment_types: ['origin', 'openshift-enterprise'] __deprecation_header: "[DEPRECATION WARNING]:" diff --git a/roles/openshift_service_catalog/tasks/install.yml b/roles/openshift_service_catalog/tasks/install.yml index 452d869f6..cfecaa12c 100644 --- a/roles/openshift_service_catalog/tasks/install.yml +++ b/roles/openshift_service_catalog/tasks/install.yml @@ -6,10 +6,10 @@ register: mktemp changed_when: False -- name: Set default image variables based on deployment_type +- name: Set default image variables based on openshift_deployment_type include_vars: "{{ item }}" with_first_found: - - "{{ openshift_deployment_type | default(deployment_type) }}.yml" + - "{{ openshift_deployment_type }}.yml" - "default_images.yml" - name: Set service_catalog image facts diff --git a/roles/openshift_storage_glusterfs/defaults/main.yml b/roles/openshift_storage_glusterfs/defaults/main.yml index da34fab2a..4cbe262d2 100644 --- a/roles/openshift_storage_glusterfs/defaults/main.yml +++ b/roles/openshift_storage_glusterfs/defaults/main.yml @@ -6,16 +6,16 @@ openshift_storage_glusterfs_nodeselector: "glusterfs={{ openshift_storage_gluste openshift_storage_glusterfs_use_default_selector: False openshift_storage_glusterfs_storageclass: True openshift_storage_glusterfs_storageclass_default: False -openshift_storage_glusterfs_image: "{{ 'rhgs3/rhgs-server-rhel7' | quote if deployment_type == 'openshift-enterprise' else 'gluster/gluster-centos' | quote }}" +openshift_storage_glusterfs_image: "{{ 'rhgs3/rhgs-server-rhel7' | quote if openshift_deployment_type == 'openshift-enterprise' else 'gluster/gluster-centos' | quote }}" openshift_storage_glusterfs_version: 'latest' openshift_storage_glusterfs_block_deploy: True -openshift_storage_glusterfs_block_image: "{{ 'rhgs3/rhgs-gluster-block-prov-rhel7' | quote if deployment_type == 'openshift-enterprise' else 'gluster/glusterblock-provisioner' | quote }}" +openshift_storage_glusterfs_block_image: "{{ 'rhgs3/rhgs-gluster-block-prov-rhel7' | quote if openshift_deployment_type == 'openshift-enterprise' else 'gluster/glusterblock-provisioner' | quote }}" openshift_storage_glusterfs_block_version: 'latest' openshift_storage_glusterfs_block_host_vol_create: True openshift_storage_glusterfs_block_host_vol_size: 100 openshift_storage_glusterfs_block_host_vol_max: 15 openshift_storage_glusterfs_s3_deploy: True -openshift_storage_glusterfs_s3_image: "{{ 'rhgs3/rhgs-gluster-s3-server-rhel7' | quote if deployment_type == 'openshift-enterprise' else 'gluster/gluster-object' | quote }}" +openshift_storage_glusterfs_s3_image: "{{ 'rhgs3/rhgs-gluster-s3-server-rhel7' | quote if openshift_deployment_type == 'openshift-enterprise' else 'gluster/gluster-object' | quote }}" openshift_storage_glusterfs_s3_version: 'latest' openshift_storage_glusterfs_s3_account: "{{ omit }}" openshift_storage_glusterfs_s3_user: "{{ omit }}" @@ -29,7 +29,7 @@ openshift_storage_glusterfs_heketi_is_native: "{{ openshift_storage_glusterfs_is openshift_storage_glusterfs_heketi_is_missing: True openshift_storage_glusterfs_heketi_deploy_is_missing: True openshift_storage_glusterfs_heketi_cli: 'heketi-cli' -openshift_storage_glusterfs_heketi_image: "{{ 'rhgs3/rhgs-volmanager-rhel7' | quote if deployment_type == 'openshift-enterprise' else 'heketi/heketi' | quote }}" +openshift_storage_glusterfs_heketi_image: "{{ 'rhgs3/rhgs-volmanager-rhel7' | quote if openshift_deployment_type == 'openshift-enterprise' else 'heketi/heketi' | quote }}" openshift_storage_glusterfs_heketi_version: 'latest' openshift_storage_glusterfs_heketi_admin_key: "{{ omit }}" openshift_storage_glusterfs_heketi_user_key: "{{ omit }}" diff --git a/roles/template_service_broker/tasks/install.yml b/roles/template_service_broker/tasks/install.yml index 2fc9779d6..765263db5 100644 --- a/roles/template_service_broker/tasks/install.yml +++ b/roles/template_service_broker/tasks/install.yml @@ -1,9 +1,9 @@ --- # Fact setting -- name: Set default image variables based on deployment type +- name: Set default image variables based on openshift_deployment_type include_vars: "{{ item }}" with_first_found: - - "{{ openshift_deployment_type | default(deployment_type) }}.yml" + - "{{ openshift_deployment_type }}.yml" - "default_images.yml" - name: set template_service_broker facts diff --git a/test/integration/openshift_health_checker/preflight/playbooks/package_availability_missing_required.yml b/test/integration/openshift_health_checker/preflight/playbooks/package_availability_missing_required.yml index 006a71bd9..451ac0972 100644 --- a/test/integration/openshift_health_checker/preflight/playbooks/package_availability_missing_required.yml +++ b/test/integration/openshift_health_checker/preflight/playbooks/package_availability_missing_required.yml @@ -4,7 +4,7 @@ vars: image: preflight-aos-package-checks l_host_vars: - deployment_type: openshift-enterprise + openshift_deployment_type: openshift-enterprise - name: Fail as required packages cannot be installed hosts: all diff --git a/test/integration/openshift_health_checker/preflight/playbooks/package_availability_succeeds.yml b/test/integration/openshift_health_checker/preflight/playbooks/package_availability_succeeds.yml index b4f18e3b5..e37487f13 100644 --- a/test/integration/openshift_health_checker/preflight/playbooks/package_availability_succeeds.yml +++ b/test/integration/openshift_health_checker/preflight/playbooks/package_availability_succeeds.yml @@ -3,7 +3,7 @@ vars: image: preflight-aos-package-checks l_host_vars: - deployment_type: origin + openshift_deployment_type: origin - name: Succeeds as Origin packages are public hosts: all diff --git a/test/integration/openshift_health_checker/preflight/playbooks/package_version_matches.yml b/test/integration/openshift_health_checker/preflight/playbooks/package_version_matches.yml index 4e2b8a50c..9c845e1e5 100644 --- a/test/integration/openshift_health_checker/preflight/playbooks/package_version_matches.yml +++ b/test/integration/openshift_health_checker/preflight/playbooks/package_version_matches.yml @@ -3,7 +3,7 @@ vars: image: preflight-aos-package-checks l_host_vars: - deployment_type: openshift-enterprise + openshift_deployment_type: openshift-enterprise openshift_release: 3.2 - name: Success when AOS version matches openshift_release diff --git a/test/integration/openshift_health_checker/preflight/playbooks/package_version_mismatches.yml b/test/integration/openshift_health_checker/preflight/playbooks/package_version_mismatches.yml index e1f8d74e6..9ae811939 100644 --- a/test/integration/openshift_health_checker/preflight/playbooks/package_version_mismatches.yml +++ b/test/integration/openshift_health_checker/preflight/playbooks/package_version_mismatches.yml @@ -4,7 +4,7 @@ vars: image: preflight-aos-package-checks l_host_vars: - deployment_type: openshift-enterprise + openshift_deployment_type: openshift-enterprise openshift_release: 3.2 - name: Failure when AOS version doesn't match openshift_release -- cgit v1.2.3 From daf331ab053de1e685b2fa9ca62a5c9f7381edc0 Mon Sep 17 00:00:00 2001 From: Ture Karlsson Date: Fri, 5 Jan 2018 15:24:51 +0100 Subject: Provide example on how to use osm_etcd_image in a disconnected and containerized installation Example of how to use osm_etcd_image in accordance to: https://docs.openshift.com/container-platform/3.7/install_config/install/rpm_vs_containerized.html --- inventory/hosts.example | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'inventory') diff --git a/inventory/hosts.example b/inventory/hosts.example index bc85d1020..d2e5f81cf 100644 --- a/inventory/hosts.example +++ b/inventory/hosts.example @@ -197,6 +197,10 @@ openshift_release=v3.7 #openshift_additional_repos=[{'id': 'openshift-origin-copr', 'name': 'OpenShift Origin COPR', 'baseurl': 'https://copr-be.cloud.fedoraproject.org/results/maxamillion/origin-next/epel-7-$basearch/', 'enabled': 1, 'gpgcheck': 1, 'gpgkey': 'https://copr-be.cloud.fedoraproject.org/results/maxamillion/origin-next/pubkey.gpg'}] #openshift_repos_enable_testing=false +# If the image for etcd needs to be pulled from anywhere else than registry.access.redhat.com, e.g. in +# a disconnected and containerized installation, use osm_etcd_image to specify the image to use: +#osm_etcd_image=rhel7/etcd + # htpasswd auth openshift_master_identity_providers=[{'name': 'htpasswd_auth', 'login': 'true', 'challenge': 'true', 'kind': 'HTPasswdPasswordIdentityProvider', 'filename': '/etc/origin/master/htpasswd'}] # Defining htpasswd users -- cgit v1.2.3 From 7beec759c63cd01ca0da32d0ac14427e38ef829a Mon Sep 17 00:00:00 2001 From: Michael Scherer Date: Mon, 8 Jan 2018 02:55:58 +0100 Subject: Fix error in variable in comment --- inventory/hosts.example | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'inventory') diff --git a/inventory/hosts.example b/inventory/hosts.example index bc85d1020..8c2590078 100644 --- a/inventory/hosts.example +++ b/inventory/hosts.example @@ -128,7 +128,7 @@ openshift_release=v3.7 #openshift_crio_systemcontainer_image_override="registry.example.com/cri-o:latest" # NOTE: The following crio docker-gc items are tech preview and likely shouldn't be used # unless you know what you are doing!! -# The following two variables are used when opneshift_use_crio is True +# The following two variables are used when openshift_use_crio is True # and cleans up after builds that pass through docker. # Enable docker garbage collection when using cri-o #openshift_crio_enable_docker_gc=false -- cgit v1.2.3 From f3e3f5dabe616aa07273fa91b6767bf756f41592 Mon Sep 17 00:00:00 2001 From: Corey Daley Date: Tue, 2 Jan 2018 09:49:39 -0500 Subject: Ability to specify override tolerations via the buildconfig overrider Trello: https://trello.com/c/LNxlMjjU/1435-5-ability-to-specify-default-tolerations-via-the-buildconfig-defaulter-builds --- inventory/hosts.example | 1 + roles/openshift_buildoverrides/vars/main.yml | 1 + 2 files changed, 2 insertions(+) (limited to 'inventory') diff --git a/inventory/hosts.example b/inventory/hosts.example index 05293269d..508e7cc9d 100644 --- a/inventory/hosts.example +++ b/inventory/hosts.example @@ -899,6 +899,7 @@ openshift_master_identity_providers=[{'name': 'htpasswd_auth', 'login': 'true', #openshift_buildoverrides_image_labels=[{'name':'imagelabelname1','value':'imagelabelvalue1'}] #openshift_buildoverrides_nodeselectors={'nodelabel1':'nodelabelvalue1'} #openshift_buildoverrides_annotations={'annotationkey1':'annotationvalue1'} +#openshift_buildoverrides_tolerations=[{'key':'mykey1','value':'myvalue1','effect':'NoSchedule','operator':'Equal'}] # Or you may optionally define your own build overrides configuration serialized as json #openshift_buildoverrides_json='{"BuildOverrides":{"configuration":{"apiVersion":"v1","kind":"BuildDefaultsConfig","forcePull":"true"}}}' diff --git a/roles/openshift_buildoverrides/vars/main.yml b/roles/openshift_buildoverrides/vars/main.yml index cf49a6ebf..df53280c8 100644 --- a/roles/openshift_buildoverrides/vars/main.yml +++ b/roles/openshift_buildoverrides/vars/main.yml @@ -9,3 +9,4 @@ buildoverrides_yaml: imageLabels: "{{ openshift_buildoverrides_image_labels | default(None) }}" nodeSelector: "{{ openshift_buildoverrides_nodeselectors | default(None) }}" annotations: "{{ openshift_buildoverrides_annotations | default(None) }}" + tolerations: "{{ openshift_buildoverrides_tolerations | default(None) }}" -- cgit v1.2.3 From 08b7b7ed82d2a53b8dd5c95ace028666268342ea Mon Sep 17 00:00:00 2001 From: Joel Pearson Date: Fri, 5 Jan 2018 10:27:26 +1100 Subject: Add documentation example --- inventory/hosts.example | 3 +++ 1 file changed, 3 insertions(+) (limited to 'inventory') diff --git a/inventory/hosts.example b/inventory/hosts.example index 05293269d..3fc868c94 100644 --- a/inventory/hosts.example +++ b/inventory/hosts.example @@ -286,6 +286,9 @@ openshift_master_identity_providers=[{'name': 'htpasswd_auth', 'login': 'true', #openshift_cloudprovider_openstack_region=region #openshift_cloudprovider_openstack_lb_subnet_id=subnet_id # +# Note: If you're getting a "BS API version autodetection failed" when provisioning cinder volumes you may need this setting +#openshift_cloudprovider_openstack_blockstorage_version=v2 +# # GCE #openshift_cloudprovider_kind=gce -- cgit v1.2.3 From 0841917f05cfad2701164edbb271167c277d3300 Mon Sep 17 00:00:00 2001 From: Scott Dodson Date: Thu, 10 Aug 2017 09:25:36 -0400 Subject: Add the ability to specify a timeout for node drain operations --- inventory/hosts.example | 8 ++++++++ .../openshift-cluster/upgrades/docker/docker_upgrade.yml | 12 +++++++++--- .../openshift-cluster/upgrades/upgrade_control_plane.yml | 12 +++++++++--- .../common/openshift-cluster/upgrades/upgrade_nodes.yml | 12 +++++++++--- .../openshift-cluster/upgrades/upgrade_scale_group.yml | 4 ++-- 5 files changed, 37 insertions(+), 11 deletions(-) (limited to 'inventory') diff --git a/inventory/hosts.example b/inventory/hosts.example index bc85d1020..b07e0d159 100644 --- a/inventory/hosts.example +++ b/inventory/hosts.example @@ -991,6 +991,14 @@ openshift_master_identity_providers=[{'name': 'htpasswd_auth', 'login': 'true', # where as this would not # openshift_upgrade_nodes_serial=4 openshift_upgrade_nodes_max_fail_percentage=50 # +# A timeout to wait for nodes to drain pods can be specified to ensure that the +# upgrade continues even if nodes fail to drain pods in the allowed time. The +# default value of 0 will wait indefinitely allowing the admin to investigate +# the root cause and ensuring that disruption budgets are respected. If the +# a timeout of 0 is used there will also be one attempt to re-try draining the +# node. If a non zero timeout is specified there will be no attempt to retry. +#openshift_upgrade_nodes_drain_timeout=0 +# # Multiple data migrations take place and if they fail they will fail the upgrade # You may wish to disable these or make them non fatal # diff --git a/playbooks/common/openshift-cluster/upgrades/docker/docker_upgrade.yml b/playbooks/common/openshift-cluster/upgrades/docker/docker_upgrade.yml index ffb11670d..8392e21ee 100644 --- a/playbooks/common/openshift-cluster/upgrades/docker/docker_upgrade.yml +++ b/playbooks/common/openshift-cluster/upgrades/docker/docker_upgrade.yml @@ -51,13 +51,19 @@ - name: Drain Node for Kubelet upgrade command: > - {{ openshift_client_binary }} adm drain {{ openshift.node.nodename }} --config={{ openshift.common.config_base }}/master/admin.kubeconfig --force --delete-local-data --ignore-daemonsets + {{ hostvars[groups.oo_first_master.0]['first_master_client_binary'] }} adm drain {{ openshift.node.nodename | lower }} + --config={{ openshift.common.config_base }}/master/admin.kubeconfig + --force --delete-local-data --ignore-daemonsets + --timeout={{ openshift_upgrade_nodes_drain_timeout | default(0) }}s delegate_to: "{{ groups.oo_first_master.0 }}" when: l_docker_upgrade is defined and l_docker_upgrade | bool and inventory_hostname in groups.oo_nodes_to_upgrade register: l_docker_upgrade_drain_result until: not (l_docker_upgrade_drain_result is failed) - retries: 60 - delay: 60 + retries: "{{ 1 if ( openshift_upgrade_nodes_drain_timeout | default(0) | int ) == 0 else 0 }}" + delay: 5 + failed_when: + - l_docker_upgrade_drain_result is failed + - openshift_upgrade_nodes_drain_timeout | default(0) | int == 0 - include_tasks: tasks/upgrade.yml when: l_docker_upgrade is defined and l_docker_upgrade | bool diff --git a/playbooks/common/openshift-cluster/upgrades/upgrade_control_plane.yml b/playbooks/common/openshift-cluster/upgrades/upgrade_control_plane.yml index 91d496ff4..3f2ba8969 100644 --- a/playbooks/common/openshift-cluster/upgrades/upgrade_control_plane.yml +++ b/playbooks/common/openshift-cluster/upgrades/upgrade_control_plane.yml @@ -291,12 +291,18 @@ - name: Drain Node for Kubelet upgrade command: > - {{ hostvars[groups.oo_first_master.0]['first_master_client_binary'] }} adm drain {{ openshift.node.nodename | lower }} --config={{ openshift.common.config_base }}/master/admin.kubeconfig --force --delete-local-data --ignore-daemonsets + {{ hostvars[groups.oo_first_master.0]['first_master_client_binary'] }} adm drain {{ openshift.node.nodename | lower }} + --config={{ openshift.common.config_base }}/master/admin.kubeconfig + --force --delete-local-data --ignore-daemonsets + --timeout={{ openshift_upgrade_nodes_drain_timeout | default(0) }}s delegate_to: "{{ groups.oo_first_master.0 }}" register: l_upgrade_control_plane_drain_result until: not (l_upgrade_control_plane_drain_result is failed) - retries: 60 - delay: 60 + retries: "{{ 1 if ( openshift_upgrade_nodes_drain_timeout | default(0) | int ) == 0 else 0 }}" + delay: 5 + failed_when: + - l_upgrade_control_plane_drain_result is failed + - openshift_upgrade_nodes_drain_timeout | default(0) | int == 0 roles: - openshift_facts diff --git a/playbooks/common/openshift-cluster/upgrades/upgrade_nodes.yml b/playbooks/common/openshift-cluster/upgrades/upgrade_nodes.yml index aba179c2b..856c8328c 100644 --- a/playbooks/common/openshift-cluster/upgrades/upgrade_nodes.yml +++ b/playbooks/common/openshift-cluster/upgrades/upgrade_nodes.yml @@ -35,12 +35,18 @@ - name: Drain Node for Kubelet upgrade command: > - {{ hostvars[groups.oo_first_master.0]['first_master_client_binary'] }} adm drain {{ openshift.node.nodename | lower }} --config={{ openshift.common.config_base }}/master/admin.kubeconfig --force --delete-local-data --ignore-daemonsets + {{ hostvars[groups.oo_first_master.0]['first_master_client_binary'] }} adm drain {{ openshift.node.nodename | lower }} + --config={{ openshift.common.config_base }}/master/admin.kubeconfig + --force --delete-local-data --ignore-daemonsets + --timeout={{ openshift_upgrade_nodes_drain_timeout | default(0) }}s delegate_to: "{{ groups.oo_first_master.0 }}" register: l_upgrade_nodes_drain_result until: not (l_upgrade_nodes_drain_result is failed) - retries: 60 - delay: 60 + retries: "{{ 1 if ( openshift_upgrade_nodes_drain_timeout | default(0) | int ) == 0 else 0 }}" + delay: 5 + failed_when: + - l_upgrade_nodes_drain_result is failed + - openshift_upgrade_nodes_drain_timeout | default(0) | int == 0 post_tasks: - import_role: diff --git a/playbooks/common/openshift-cluster/upgrades/upgrade_scale_group.yml b/playbooks/common/openshift-cluster/upgrades/upgrade_scale_group.yml index 6d59bfd0b..e259b5d09 100644 --- a/playbooks/common/openshift-cluster/upgrades/upgrade_scale_group.yml +++ b/playbooks/common/openshift-cluster/upgrades/upgrade_scale_group.yml @@ -50,11 +50,11 @@ delegate_to: "{{ groups.oo_first_master.0 }}" register: l_upgrade_nodes_drain_result until: not (l_upgrade_nodes_drain_result is failed) - retries: "{{ 1 if openshift_upgrade_nodes_drain_timeout | default(0) == '0' else 0 | int }}" + retries: "{{ 1 if ( openshift_upgrade_nodes_drain_timeout | default(0) | int ) == 0 else 0 }}" delay: 5 failed_when: - l_upgrade_nodes_drain_result is failed - - openshift_upgrade_nodes_drain_timeout | default(0) == '0' + - openshift_upgrade_nodes_drain_timeout | default(0) | int == 0 # Alright, let's clean up! - name: clean up the old scale group -- cgit v1.2.3 From 8fcf4b127af3b96c5cb174157441436d8b11dafd Mon Sep 17 00:00:00 2001 From: Eldad Marciano Date: Mon, 20 Nov 2017 16:41:43 +0200 Subject: Add new grafana playbook. this patch introduce a new playbook and roles to deploy grafana automaticly using openshift-ansible capabilities. this patch will abstract the grafana deployment process specially for openshift. --- inventory/hosts.grafana.example | 12 + playbooks/openshift-grafana/config.yml | 4 + playbooks/openshift-grafana/private/config.yml | 6 + playbooks/openshift-grafana/private/filter_plugins | 1 + playbooks/openshift-grafana/private/lookup_plugins | 1 + playbooks/openshift-grafana/private/roles | 1 + roles/openshift_grafana/defaults/main.yml | 12 + .../openshift_grafana/files/grafana-ocp-oauth.yml | 661 +++ roles/openshift_grafana/files/grafana-ocp.yml | 76 + .../files/openshift-cluster-monitoring.json | 5138 ++++++++++++++++++++ roles/openshift_grafana/meta/main.yml | 13 + roles/openshift_grafana/tasks/gf-permissions.yml | 12 + roles/openshift_grafana/tasks/main.yml | 122 + 13 files changed, 6059 insertions(+) create mode 100644 inventory/hosts.grafana.example create mode 100644 playbooks/openshift-grafana/config.yml create mode 100644 playbooks/openshift-grafana/private/config.yml create mode 120000 playbooks/openshift-grafana/private/filter_plugins create mode 120000 playbooks/openshift-grafana/private/lookup_plugins create mode 120000 playbooks/openshift-grafana/private/roles create mode 100644 roles/openshift_grafana/defaults/main.yml create mode 100644 roles/openshift_grafana/files/grafana-ocp-oauth.yml create mode 100644 roles/openshift_grafana/files/grafana-ocp.yml create mode 100644 roles/openshift_grafana/files/openshift-cluster-monitoring.json create mode 100644 roles/openshift_grafana/meta/main.yml create mode 100644 roles/openshift_grafana/tasks/gf-permissions.yml create mode 100644 roles/openshift_grafana/tasks/main.yml (limited to 'inventory') diff --git a/inventory/hosts.grafana.example b/inventory/hosts.grafana.example new file mode 100644 index 000000000..a5999578f --- /dev/null +++ b/inventory/hosts.grafana.example @@ -0,0 +1,12 @@ +[OSEv3:children] +masters +nodes + +[OSEv3:vars] +# Grafana Configuration +#gf_datasource_name="example" +#gf_prometheus_namespace="openshift-metrics" +#gf_oauth=true + +[masters] +master diff --git a/playbooks/openshift-grafana/config.yml b/playbooks/openshift-grafana/config.yml new file mode 100644 index 000000000..c7814207c --- /dev/null +++ b/playbooks/openshift-grafana/config.yml @@ -0,0 +1,4 @@ +--- +- import_playbook: ../init/main.yml + +- import_playbook: private/config.yml diff --git a/playbooks/openshift-grafana/private/config.yml b/playbooks/openshift-grafana/private/config.yml new file mode 100644 index 000000000..ac753d63b --- /dev/null +++ b/playbooks/openshift-grafana/private/config.yml @@ -0,0 +1,6 @@ +--- +- name: Deploy grafana server + hosts: masters + tasks: + - include_role: + name: openshift_grafana diff --git a/playbooks/openshift-grafana/private/filter_plugins b/playbooks/openshift-grafana/private/filter_plugins new file mode 120000 index 000000000..99a95e4ca --- /dev/null +++ b/playbooks/openshift-grafana/private/filter_plugins @@ -0,0 +1 @@ +../../../filter_plugins \ No newline at end of file diff --git a/playbooks/openshift-grafana/private/lookup_plugins b/playbooks/openshift-grafana/private/lookup_plugins new file mode 120000 index 000000000..ac79701db --- /dev/null +++ b/playbooks/openshift-grafana/private/lookup_plugins @@ -0,0 +1 @@ +../../../lookup_plugins \ No newline at end of file diff --git a/playbooks/openshift-grafana/private/roles b/playbooks/openshift-grafana/private/roles new file mode 120000 index 000000000..e2b799b9d --- /dev/null +++ b/playbooks/openshift-grafana/private/roles @@ -0,0 +1 @@ +../../../roles/ \ No newline at end of file diff --git a/roles/openshift_grafana/defaults/main.yml b/roles/openshift_grafana/defaults/main.yml new file mode 100644 index 000000000..7fd7a085d --- /dev/null +++ b/roles/openshift_grafana/defaults/main.yml @@ -0,0 +1,12 @@ +--- +gf_body_tmp: + name: grafana_name + type: prometheus + typeLogoUrl: '' + access: proxy + url: prometheus_url + basicAuth: false + withCredentials: false + jsonData: + tlsSkipVerify: true + token: satoken diff --git a/roles/openshift_grafana/files/grafana-ocp-oauth.yml b/roles/openshift_grafana/files/grafana-ocp-oauth.yml new file mode 100644 index 000000000..82fa89004 --- /dev/null +++ b/roles/openshift_grafana/files/grafana-ocp-oauth.yml @@ -0,0 +1,661 @@ +--- +kind: Template +apiVersion: v1 +metadata: + name: grafana-ocp + annotations: + "openshift.io/display-name": Grafana ocp + description: | + Grafana server with patched Prometheus datasource. + iconClass: icon-cogs + tags: "metrics,monitoring,grafana,prometheus" +parameters: +- description: The location of the proxy image + name: IMAGE_GF + value: mrsiano/grafana-ocp:latest +- description: The location of the proxy image + name: IMAGE_PROXY + value: openshift/oauth-proxy:v1.0.0 +- description: External URL for the grafana route + name: ROUTE_URL + value: "" +- description: The namespace to instantiate heapster under. Defaults to 'grafana'. + name: NAMESPACE + value: grafana +- description: The session secret for the proxy + name: SESSION_SECRET + generate: expression + from: "[a-zA-Z0-9]{43}" +objects: +- apiVersion: v1 + kind: ServiceAccount + metadata: + name: grafana-ocp + namespace: "${NAMESPACE}" + annotations: + serviceaccounts.openshift.io/oauth-redirectreference.primary: '{"kind":"OAuthRedirectReference","apiVersion":"v1","reference":{"kind":"Route","name":"grafana-ocp"}}' +- apiVersion: authorization.openshift.io/v1 + kind: ClusterRoleBinding + metadata: + name: gf-cluster-reader + roleRef: + name: cluster-reader + subjects: + - kind: ServiceAccount + name: grafana-ocp + namespace: "${NAMESPACE}" +- apiVersion: route.openshift.io/v1 + kind: Route + metadata: + name: grafana-ocp + namespace: "${NAMESPACE}" + spec: + host: "${ROUTE_URL}" + to: + name: grafana-ocp + tls: + termination: Reencrypt +- apiVersion: v1 + kind: Service + metadata: + name: grafana-ocp + annotations: + prometheus.io/scrape: "true" + prometheus.io/scheme: https + service.alpha.openshift.io/serving-cert-secret-name: gf-tls + namespace: "${NAMESPACE}" + labels: + metrics-infra: grafana-ocp + name: grafana-ocp + spec: + ports: + - name: grafana-ocp + port: 443 + protocol: TCP + targetPort: 8443 + selector: + app: grafana-ocp +- apiVersion: v1 + kind: Secret + metadata: + name: gf-proxy + namespace: "${NAMESPACE}" + stringData: + session_secret: "${SESSION_SECRET}=" +# Deploy Prometheus behind an oauth proxy +- apiVersion: extensions/v1beta1 + kind: Deployment + metadata: + labels: + app: grafana-ocp + name: grafana-ocp + namespace: "${NAMESPACE}" + spec: + replicas: 1 + selector: + matchLabels: + app: grafana-ocp + template: + metadata: + labels: + app: grafana-ocp + name: grafana-ocp-app + spec: + serviceAccountName: grafana-ocp + containers: + - name: oauth-proxy + image: ${IMAGE_PROXY} + imagePullPolicy: IfNotPresent + ports: + - containerPort: 8443 + name: web + args: + - -https-address=:8443 + - -http-address= + - -email-domain=* + - -client-id=system:serviceaccount:${NAMESPACE}:grafana-ocp + - -upstream=http://localhost:3000 + - -provider=openshift +# - '-openshift-delegate-urls={"/api/datasources": {"resource": "namespace", "verb": "get", "resourceName": "grafana-ocp", "namespace": "${NAMESPACE}"}}' + - '-openshift-sar={"namespace": "${NAMESPACE}", "verb": "list", "resource": "services"}' + - -tls-cert=/etc/tls/private/tls.crt + - -tls-key=/etc/tls/private/tls.key + - -client-secret-file=/var/run/secrets/kubernetes.io/serviceaccount/token + - -cookie-secret-file=/etc/proxy/secrets/session_secret + - -skip-auth-regex=^/metrics,/api/datasources,/api/dashboards + volumeMounts: + - mountPath: /etc/tls/private + name: gf-tls + - mountPath: /etc/proxy/secrets + name: secrets + + - name: grafana-ocp + image: ${IMAGE_GF} + ports: + - name: grafana-http + containerPort: 3000 + volumeMounts: + - mountPath: "/root/go/src/github.com/grafana/grafana/data" + name: gf-data + - mountPath: "/root/go/src/github.com/grafana/grafana/conf" + name: gfconfig + - mountPath: /etc/tls/private + name: gf-tls + - mountPath: /etc/proxy/secrets + name: secrets + command: + - "./bin/grafana-server" + + volumes: + - name: gfconfig + configMap: + name: gf-config + - name: secrets + secret: + secretName: gf-proxy + - name: gf-tls + secret: + secretName: gf-tls + - emptyDir: {} + name: gf-data +- apiVersion: v1 + kind: ConfigMap + metadata: + name: gf-config + namespace: "${NAMESPACE}" + data: + defaults.ini: |- + ##################### Grafana Configuration Defaults ##################### + # + # Do not modify this file in grafana installs + # + + # possible values : production, development + app_mode = production + + # instance name, defaults to HOSTNAME environment variable value or hostname if HOSTNAME var is empty + instance_name = ${HOSTNAME} + + #################################### Paths ############################### + [paths] + # Path to where grafana can store temp files, sessions, and the sqlite3 db (if that is used) + # + data = data + # + # Directory where grafana can store logs + # + logs = data/log + # + # Directory where grafana will automatically scan and look for plugins + # + plugins = data/plugins + + #################################### Server ############################## + [server] + # Protocol (http, https, socket) + protocol = http + + # The ip address to bind to, empty will bind to all interfaces + http_addr = + + # The http port to use + http_port = 3000 + + # The public facing domain name used to access grafana from a browser + domain = localhost + + # Redirect to correct domain if host header does not match domain + # Prevents DNS rebinding attacks + enforce_domain = false + + # The full public facing url + root_url = %(protocol)s://%(domain)s:%(http_port)s/ + + # Log web requests + router_logging = false + + # the path relative working path + static_root_path = public + + # enable gzip + enable_gzip = false + + # https certs & key file + cert_file = /etc/tls/private/tls.crt + cert_key = /etc/tls/private/tls.key + + # Unix socket path + socket = /tmp/grafana.sock + + #################################### Database ############################ + [database] + # You can configure the database connection by specifying type, host, name, user and password + # as separate properties or as on string using the url property. + + # Either "mysql", "postgres" or "sqlite3", it's your choice + type = sqlite3 + host = 127.0.0.1:3306 + name = grafana + user = root + # If the password contains # or ; you have to wrap it with triple quotes. Ex """#password;""" + password = + # Use either URL or the previous fields to configure the database + # Example: mysql://user:secret@host:port/database + url = + + # Max idle conn setting default is 2 + max_idle_conn = 2 + + # Max conn setting default is 0 (mean not set) + max_open_conn = + + # For "postgres", use either "disable", "require" or "verify-full" + # For "mysql", use either "true", "false", or "skip-verify". + ssl_mode = disable + + ca_cert_path = + client_key_path = + client_cert_path = + server_cert_name = + + # For "sqlite3" only, path relative to data_path setting + path = grafana.db + + #################################### Session ############################# + [session] + # Either "memory", "file", "redis", "mysql", "postgres", "memcache", default is "file" + provider = file + + # Provider config options + # memory: not have any config yet + # file: session dir path, is relative to grafana data_path + # redis: config like redis server e.g. `addr=127.0.0.1:6379,pool_size=100,db=grafana` + # postgres: user=a password=b host=localhost port=5432 dbname=c sslmode=disable + # mysql: go-sql-driver/mysql dsn config string, examples: + # `user:password@tcp(127.0.0.1:3306)/database_name` + # `user:password@unix(/var/run/mysqld/mysqld.sock)/database_name` + # memcache: 127.0.0.1:11211 + + + provider_config = sessions + + # Session cookie name + cookie_name = grafana_sess + + # If you use session in https only, default is false + cookie_secure = false + + # Session life time, default is 86400 + session_life_time = 86400 + gc_interval_time = 86400 + + #################################### Data proxy ########################### + [dataproxy] + + # This enables data proxy logging, default is false + logging = false + + #################################### Analytics ########################### + [analytics] + # Server reporting, sends usage counters to stats.grafana.org every 24 hours. + # No ip addresses are being tracked, only simple counters to track + # running instances, dashboard and error counts. It is very helpful to us. + # Change this option to false to disable reporting. + reporting_enabled = true + + # Set to false to disable all checks to https://grafana.com + # for new versions (grafana itself and plugins), check is used + # in some UI views to notify that grafana or plugin update exists + # This option does not cause any auto updates, nor send any information + # only a GET request to https://grafana.com to get latest versions + check_for_updates = true + + # Google Analytics universal tracking code, only enabled if you specify an id here + google_analytics_ua_id = + + # Google Tag Manager ID, only enabled if you specify an id here + google_tag_manager_id = + + #################################### Security ############################ + [security] + # default admin user, created on startup + admin_user = admin + + # default admin password, can be changed before first start of grafana, or in profile settings + admin_password = admin + + # used for signing + secret_key = SW2YcwTIb9zpOOhoPsMm + + # Auto-login remember days + login_remember_days = 7 + cookie_username = grafana_user + cookie_remember_name = grafana_remember + + # disable gravatar profile images + disable_gravatar = false + + # data source proxy whitelist (ip_or_domain:port separated by spaces) + data_source_proxy_whitelist = + + [snapshots] + # snapshot sharing options + external_enabled = true + external_snapshot_url = https://snapshots-origin.raintank.io + external_snapshot_name = Publish to snapshot.raintank.io + + # remove expired snapshot + snapshot_remove_expired = true + + # remove snapshots after 90 days + snapshot_TTL_days = 90 + + #################################### Users #################################### + [users] + # disable user signup / registration + allow_sign_up = true + + # Allow non admin users to create organizations + allow_org_create = true + + # Set to true to automatically assign new users to the default organization (id 1) + auto_assign_org = true + + # Default role new users will be automatically assigned (if auto_assign_org above is set to true) + auto_assign_org_role = Admin + + # Require email validation before sign up completes + verify_email_enabled = false + + # Background text for the user field on the login page + login_hint = email or username + + # Default UI theme ("dark" or "light") + default_theme = dark + + # External user management + external_manage_link_url = + external_manage_link_name = + external_manage_info = + + [auth] + # Set to true to disable (hide) the login form, useful if you use OAuth + disable_login_form = true + + # Set to true to disable the signout link in the side menu. useful if you use auth.proxy + disable_signout_menu = true + + #################################### Anonymous Auth ###################### + [auth.anonymous] + # enable anonymous access + enabled = true + + # specify organization name that should be used for unauthenticated users + org_name = Main Org. + + # specify role for unauthenticated users + org_role = Admin + + #################################### Github Auth ######################### + [auth.github] + enabled = false + allow_sign_up = true + client_id = some_id + client_secret = some_secret + scopes = user:email + auth_url = https://github.com/login/oauth/authorize + token_url = https://github.com/login/oauth/access_token + api_url = https://api.github.com/user + team_ids = + allowed_organizations = + + #################################### Google Auth ######################### + [auth.google] + enabled = false + allow_sign_up = true + client_id = some_client_id + client_secret = some_client_secret + scopes = https://www.googleapis.com/auth/userinfo.profile https://www.googleapis.com/auth/userinfo.email + auth_url = https://accounts.google.com/o/oauth2/auth + token_url = https://accounts.google.com/o/oauth2/token + api_url = https://www.googleapis.com/oauth2/v1/userinfo + allowed_domains = + hosted_domain = + + #################################### Grafana.com Auth #################### + # legacy key names (so they work in env variables) + [auth.grafananet] + enabled = false + allow_sign_up = true + client_id = some_id + client_secret = some_secret + scopes = user:email + allowed_organizations = + + [auth.grafana_com] + enabled = false + allow_sign_up = true + client_id = some_id + client_secret = some_secret + scopes = user:email + allowed_organizations = + + #################################### Generic OAuth ####################### + [auth.generic_oauth] + name = OAuth + enabled = false + allow_sign_up = true + client_id = some_id + client_secret = some_secret + scopes = user:email + auth_url = + token_url = + api_url = + team_ids = + allowed_organizations = + + #################################### Basic Auth ########################## + [auth.basic] + enabled = false + + #################################### Auth Proxy ########################## + [auth.proxy] + enabled = true + header_name = X-WEBAUTH-USER + header_property = username + auto_sign_up = true + ldap_sync_ttl = 60 + whitelist = + + #################################### Auth LDAP ########################### + [auth.ldap] + enabled = false + config_file = /etc/grafana/ldap.toml + allow_sign_up = true + + #################################### SMTP / Emailing ##################### + [smtp] + enabled = false + host = localhost:25 + user = + # If the password contains # or ; you have to wrap it with trippel quotes. Ex """#password;""" + password = + cert_file = + key_file = + skip_verify = false + from_address = admin@grafana.localhost + from_name = Grafana + ehlo_identity = + + [emails] + welcome_email_on_sign_up = false + templates_pattern = emails/*.html + + #################################### Logging ########################## + [log] + # Either "console", "file", "syslog". Default is console and file + # Use space to separate multiple modes, e.g. "console file" + mode = console file + + # Either "debug", "info", "warn", "error", "critical", default is "info" + level = error + + # optional settings to set different levels for specific loggers. Ex filters = sqlstore:debug + filters = + + # For "console" mode only + [log.console] + level = + + # log line format, valid options are text, console and json + format = console + + # For "file" mode only + [log.file] + level = + + # log line format, valid options are text, console and json + format = text + + # This enables automated log rotate(switch of following options), default is true + log_rotate = true + + # Max line number of single file, default is 1000000 + max_lines = 1000000 + + # Max size shift of single file, default is 28 means 1 << 28, 256MB + max_size_shift = 28 + + # Segment log daily, default is true + daily_rotate = true + + # Expired days of log file(delete after max days), default is 7 + max_days = 7 + + [log.syslog] + level = + + # log line format, valid options are text, console and json + format = text + + # Syslog network type and address. This can be udp, tcp, or unix. If left blank, the default unix endpoints will be used. + network = + address = + + # Syslog facility. user, daemon and local0 through local7 are valid. + facility = + + # Syslog tag. By default, the process' argv[0] is used. + tag = + + + #################################### AMQP Event Publisher ################ + [event_publisher] + enabled = false + rabbitmq_url = amqp://localhost/ + exchange = grafana_events + + #################################### Dashboard JSON files ################ + [dashboards.json] + enabled = false + path = /var/lib/grafana/dashboards + + #################################### Usage Quotas ######################## + [quota] + enabled = false + + #### set quotas to -1 to make unlimited. #### + # limit number of users per Org. + org_user = 10 + + # limit number of dashboards per Org. + org_dashboard = 100 + + # limit number of data_sources per Org. + org_data_source = 10 + + # limit number of api_keys per Org. + org_api_key = 10 + + # limit number of orgs a user can create. + user_org = 10 + + # Global limit of users. + global_user = -1 + + # global limit of orgs. + global_org = -1 + + # global limit of dashboards + global_dashboard = -1 + + # global limit of api_keys + global_api_key = -1 + + # global limit on number of logged in users. + global_session = -1 + + #################################### Alerting ############################ + [alerting] + # Disable alerting engine & UI features + enabled = true + # Makes it possible to turn off alert rule execution but alerting UI is visible + execute_alerts = true + + #################################### Internal Grafana Metrics ############ + # Metrics available at HTTP API Url /api/metrics + [metrics] + enabled = true + interval_seconds = 10 + + # Send internal Grafana metrics to graphite + [metrics.graphite] + # Enable by setting the address setting (ex localhost:2003) + address = + prefix = prod.grafana.%(instance_name)s. + + [grafana_net] + url = https://grafana.com + + [grafana_com] + url = https://grafana.com + + #################################### Distributed tracing ############ + [tracing.jaeger] + # jaeger destination (ex localhost:6831) + address = + # tag that will always be included in when creating new spans. ex (tag1:value1,tag2:value2) + always_included_tag = + # Type specifies the type of the sampler: const, probabilistic, rateLimiting, or remote + sampler_type = const + # jaeger samplerconfig param + # for "const" sampler, 0 or 1 for always false/true respectively + # for "probabilistic" sampler, a probability between 0 and 1 + # for "rateLimiting" sampler, the number of spans per second + # for "remote" sampler, param is the same as for "probabilistic" + # and indicates the initial sampling rate before the actual one + # is received from the mothership + sampler_param = 1 + + #################################### External Image Storage ############## + [external_image_storage] + # You can choose between (s3, webdav, gcs) + provider = + + [external_image_storage.s3] + bucket_url = + bucket = + region = + path = + access_key = + secret_key = + + [external_image_storage.webdav] + url = + username = + password = + public_url = + + [external_image_storage.gcs] + key_file = + bucket = diff --git a/roles/openshift_grafana/files/grafana-ocp.yml b/roles/openshift_grafana/files/grafana-ocp.yml new file mode 100644 index 000000000..bc7b4b286 --- /dev/null +++ b/roles/openshift_grafana/files/grafana-ocp.yml @@ -0,0 +1,76 @@ +--- +kind: Template +apiVersion: v1 +metadata: + name: grafana-ocp + annotations: + "openshift.io/display-name": Grafana ocp + description: | + Grafana server with patched Prometheus datasource. + iconClass: icon-cogs + tags: "metrics,monitoring,grafana,prometheus" +parameters: +- description: External URL for the grafana route + name: ROUTE_URL + value: "" +- description: The namespace to instantiate heapster under. Defaults to 'grafana'. + name: NAMESPACE + value: grafana +objects: +- apiVersion: route.openshift.io/v1 + kind: Route + metadata: + name: grafana-ocp + namespace: "${NAMESPACE}" + spec: + host: "${ROUTE_URL}" + to: + name: grafana-ocp +- apiVersion: v1 + kind: Service + metadata: + name: grafana-ocp + namespace: "${NAMESPACE}" + labels: + metrics-infra: grafana-ocp + name: grafana-ocp + spec: + selector: + name: grafana-ocp + ports: + - port: 8082 + protocol: TCP + targetPort: grafana-http +- apiVersion: v1 + kind: ReplicationController + metadata: + name: grafana-ocp + namespace: "${NAMESPACE}" + labels: + metrics-infra: grafana-ocp + name: grafana-ocp + spec: + selector: + name: grafana-ocp + replicas: 1 + template: + version: v1 + metadata: + labels: + metrics-infra: grafana-ocp + name: grafana-ocp + spec: + volumes: + - name: data + emptyDir: {} + containers: + - image: "mrsiano/grafana-ocp:latest" + name: grafana-ocp + ports: + - name: grafana-http + containerPort: 3000 + volumeMounts: + - name: data + mountPath: "/root/go/src/github.com/grafana/grafana/data" + command: + - "./bin/grafana-server" diff --git a/roles/openshift_grafana/files/openshift-cluster-monitoring.json b/roles/openshift_grafana/files/openshift-cluster-monitoring.json new file mode 100644 index 000000000..f59ca997f --- /dev/null +++ b/roles/openshift_grafana/files/openshift-cluster-monitoring.json @@ -0,0 +1,5138 @@ +{ + "dashboard": { + "description": "Monitors Openshift cluster using Prometheus. Shows overall cluster CPU / Memory / Filesystem usage as well as individual pod, containers, systemd services statistics. Uses cAdvisor metrics only.", + "editable": true, + "gnetId": 315, + "graphTooltip": 0, + "hideControls": false, + "id": null, + "links": [], + "rows": [ + { + "collapse": false, + "height": "200px", + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PR}", + "decimals": 2, + "editable": true, + "error": false, + "fill": 1, + "grid": {}, + "height": "200px", + "id": 32, + "legend": { + "alignAsTable": false, + "avg": true, + "current": true, + "max": false, + "min": false, + "rightSide": false, + "show": false, + "sideWidth": 200, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 12, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum (irate (container_network_receive_bytes_total{kubernetes_io_hostname=~\"^$Node$\"}[2m]))", + "format": "time_series", + "instant": false, + "interval": "1s", + "intervalFactor": 1, + "legendFormat": "Received", + "metric": "network", + "refId": "A", + "step": 1 + }, + { + "expr": "- sum (irate (container_network_transmit_bytes_total{kubernetes_io_hostname=~\"^$Node$\"}[2m]))", + "format": "time_series", + "interval": "1s", + "intervalFactor": 1, + "legendFormat": "Sent", + "metric": "network", + "refId": "B", + "step": 1 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Network I/O pressure", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "transparent": false, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Network I/O pressure", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": true, + "colors": [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "datasource": "${DS_PR}", + "editable": true, + "error": false, + "format": "percent", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": true, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "height": "180px", + "id": 4, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 4, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "sum (container_memory_working_set_bytes{id=\"/\",kubernetes_io_hostname=~\"^$Node$\"}) / sum (machine_memory_bytes{kubernetes_io_hostname=~\"^$Node$\"}) * 100", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "refId": "A", + "step": 20 + } + ], + "thresholds": "", + "title": "Cluster memory usage", + "transparent": false, + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": true, + "colors": [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "datasource": "${DS_PR}", + "decimals": 2, + "editable": true, + "error": false, + "format": "percent", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": true, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "height": "180px", + "id": 6, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 4, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "sum (irate (container_cpu_usage_seconds_total{id=\"/\",kubernetes_io_hostname=~\"^$Node$\"}[2m])) / sum (machine_cpu_cores{kubernetes_io_hostname=~\"^$Node$\"}) * 100", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "refId": "A", + "step": 20 + } + ], + "thresholds": "", + "title": "Cluster CPU usage ", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": true, + "colors": [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "datasource": "${DS_PR}", + "decimals": 2, + "editable": true, + "error": false, + "format": "percent", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": true, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "height": "180px", + "id": 7, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 4, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "sum (container_fs_usage_bytes{device=~\"^/dev/mapper/docker_.*\",id=\"/\",kubernetes_io_hostname=~\"^$Node$\"}) / sum (container_fs_limit_bytes{device=~\"^/dev/mapper/docker_.*\",id=\"/\",kubernetes_io_hostname=~\"^$Node$\"}) * 100", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "", + "metric": "", + "refId": "A", + "step": 20 + } + ], + "thresholds": "", + "title": "Cluster filesystem usage", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "datasource": "${DS_PR}", + "decimals": 2, + "editable": true, + "error": false, + "format": "bytes", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "height": "1px", + "id": 9, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "20%", + "prefix": "", + "prefixFontSize": "20%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 2, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "sum (container_memory_working_set_bytes{id=\"/\",kubernetes_io_hostname=~\"^$Node$\"})", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "refId": "A", + "step": 20 + } + ], + "thresholds": "", + "title": "Used", + "type": "singlestat", + "valueFontSize": "50%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "datasource": "${DS_PR}", + "decimals": 2, + "editable": true, + "error": false, + "format": "bytes", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "height": "1px", + "id": 10, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 2, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "sum (machine_memory_bytes{kubernetes_io_hostname=~\"^$Node$\"})", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "refId": "A", + "step": 20 + } + ], + "thresholds": "", + "title": "Total", + "type": "singlestat", + "valueFontSize": "50%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "datasource": "${DS_PR}", + "decimals": 2, + "editable": true, + "error": false, + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "height": "1px", + "id": 11, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": " cores", + "postfixFontSize": "30%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 2, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "sum (irate (container_cpu_usage_seconds_total{id=\"/\",kubernetes_io_hostname=~\"^$Node$\"}[2m]))", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "refId": "A", + "step": 20 + } + ], + "thresholds": "", + "title": "Used", + "type": "singlestat", + "valueFontSize": "50%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "datasource": "${DS_PR}", + "decimals": 2, + "editable": true, + "error": false, + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "height": "1px", + "id": 12, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": " cores", + "postfixFontSize": "30%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 2, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "sum (machine_cpu_cores{kubernetes_io_hostname=~\"^$Node$\"})", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "refId": "A", + "step": 20 + } + ], + "thresholds": "", + "title": "Total", + "type": "singlestat", + "valueFontSize": "50%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "datasource": "${DS_PR}", + "decimals": 2, + "editable": true, + "error": false, + "format": "bytes", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "height": "1px", + "id": 13, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 2, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "sum (container_fs_usage_bytes{device=~\"^/dev/mapper/docker_.*$\",id=\"/\",kubernetes_io_hostname=~\"^$Node$\"})", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "refId": "A", + "step": 20 + } + ], + "thresholds": "", + "title": "Used", + "type": "singlestat", + "valueFontSize": "50%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "datasource": "${DS_PR}", + "decimals": 2, + "editable": true, + "error": false, + "format": "bytes", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "height": "1px", + "id": 14, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 2, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "sum (container_fs_limit_bytes{device=~\"^/dev/mapper/docker_.*$\",id=\"/\",kubernetes_io_hostname=~\"^$Node$\"})", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 1, + "refId": "A", + "step": 20 + } + ], + "thresholds": "", + "title": "Total", + "type": "singlestat", + "valueFontSize": "50%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Total usage", + "titleSize": "h6" + }, + { + "collapse": true, + "height": 250, + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PR}", + "fill": 1, + "id": 33, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 12, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum (irate (container_cpu_usage_seconds_total{id=\"/\",kubernetes_io_hostname=~\"^$Node$\"}[2m])) / sum (machine_cpu_cores{kubernetes_io_hostname=~\"^$Node$\"}) ", + "format": "time_series", + "hide": false, + "interval": "1s", + "intervalFactor": 1, + "legendFormat": "overall cpu usage", + "refId": "A", + "step": 1 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Cluster CPU Usage", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "percent", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Dashboard Row", + "titleSize": "h6" + }, + { + "collapse": true, + "height": "250px", + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PR}", + "decimals": 3, + "editable": true, + "error": false, + "fill": 0, + "grid": {}, + "height": "", + "id": 17, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 12, + "stack": false, + "steppedLine": true, + "targets": [ + { + "expr": "sum (irate (container_cpu_usage_seconds_total{image!=\"\",name=~\"^k8s_.*\",kubernetes_io_hostname=~\"^$Node$\"}[2m])) by (pod_name) * 100", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{ pod_name }}", + "metric": "container_cpu", + "refId": "A", + "step": 2 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Pods CPU usage ", + "tooltip": { + "msResolution": true, + "shared": true, + "sort": 2, + "value_type": "cumulative" + }, + "transparent": false, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "percent", + "label": "% Usage", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Pods CPU usage", + "titleSize": "h6" + }, + { + "collapse": true, + "height": "250px", + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PR}", + "decimals": 3, + "editable": true, + "error": false, + "fill": 0, + "grid": {}, + "height": "", + "id": 24, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "hideEmpty": false, + "hideZero": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": null, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 12, + "stack": false, + "steppedLine": true, + "targets": [ + { + "expr": "sum (irate (container_cpu_usage_seconds_total{image!=\"\",name=~\"^k8s_.*\",container_name!=\"POD\",kubernetes_io_hostname=~\"^$Node$\"}[2m])) by (container_name, pod_name)", + "format": "time_series", + "hide": false, + "interval": "1s", + "intervalFactor": 1, + "legendFormat": "pod: {{ pod_name }} | {{ container_name }}", + "metric": "container_cpu", + "refId": "A", + "step": 2 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Containers Cores Usage", + "tooltip": { + "msResolution": true, + "shared": true, + "sort": 2, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "none", + "label": "cores", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Containers CPU usage", + "titleSize": "h6" + }, + { + "collapse": true, + "height": "250px", + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PR}", + "decimals": 3, + "editable": true, + "error": false, + "fill": 0, + "grid": {}, + "height": "", + "id": 23, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 12, + "stack": false, + "steppedLine": true, + "targets": [ + { + "expr": "sum (irate (container_cpu_usage_seconds_total{id!=\"/\",kubernetes_io_hostname=~\"^$Node$\"}[2m])) by (id)", + "format": "time_series", + "hide": false, + "interval": "1s", + "intervalFactor": 1, + "legendFormat": "{{ id }}", + "metric": "container_cpu", + "refId": "A", + "step": 2 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "System services CPU usage ", + "tooltip": { + "msResolution": true, + "shared": true, + "sort": 2, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "none", + "label": "cores", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "System services CPU usage", + "titleSize": "h6" + }, + { + "collapse": true, + "height": 411, + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PR}", + "decimals": 3, + "editable": true, + "error": false, + "fill": 0, + "grid": {}, + "id": 34, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 12, + "stack": false, + "steppedLine": true, + "targets": [ + { + "expr": "sum (irate (container_memory_usage_bytes{id!=\"/\",kubernetes_io_hostname=~\"^$Node$\"}[2m])) by (id)", + "format": "time_series", + "hide": false, + "interval": "1s", + "intervalFactor": 1, + "legendFormat": "{{ id }}", + "metric": "container_cpu", + "refId": "A", + "step": 2 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "All processes Memory usage ", + "tooltip": { + "msResolution": true, + "shared": true, + "sort": 2, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "label": "cores", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "All processes CPU usage", + "titleSize": "h6" + }, + { + "collapse": true, + "height": "250px", + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PR}", + "decimals": 2, + "editable": true, + "error": false, + "fill": 0, + "grid": {}, + "id": 25, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": 200, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 12, + "stack": false, + "steppedLine": true, + "targets": [ + { + "expr": "sum (container_memory_working_set_bytes{image!=\"\",name=~\"^k8s_.*\",kubernetes_io_hostname=~\"^$Node$\"}) by (pod_name)", + "format": "time_series", + "interval": "1s", + "intervalFactor": 1, + "legendFormat": "{{ pod_name }}", + "metric": "container_memory_usage:sort_desc", + "refId": "A", + "step": 10 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Pods memory usage", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 2, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Pods memory usage", + "titleSize": "h6" + }, + { + "collapse": true, + "height": "250px", + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PR}", + "decimals": 2, + "editable": true, + "error": false, + "fill": 0, + "grid": {}, + "id": 26, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": 200, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 12, + "stack": false, + "steppedLine": true, + "targets": [ + { + "expr": "sum (container_memory_rss{systemd_service_name=\"\",kubernetes_io_hostname=~\"^$Node$\"}) by (systemd_service_name)", + "format": "time_series", + "hide": false, + "interval": "1s", + "intervalFactor": 1, + "legendFormat": "{{ systemd_service_name }}", + "metric": "container_memory_usage:sort_desc", + "refId": "B", + "step": 2 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "System services memory usage", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 2, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "System services memory usage", + "titleSize": "h6" + }, + { + "collapse": true, + "height": "250px", + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PR}", + "decimals": 2, + "editable": true, + "error": false, + "fill": 0, + "grid": {}, + "id": 27, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": 200, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 12, + "stack": false, + "steppedLine": true, + "targets": [ + { + "expr": "sum (container_memory_working_set_bytes{image!=\"\",name=~\"^k8s_.*\",container_name!=\"POD\",kubernetes_io_hostname=~\"^$Node$\"}) by (container_name, pod_name)", + "format": "time_series", + "interval": "1s", + "intervalFactor": 1, + "legendFormat": "pod: {{ pod_name }} | {{ container_name }}", + "metric": "container_memory_usage:sort_desc", + "refId": "A", + "step": 10 + }, + { + "expr": "sum (container_memory_working_set_bytes{image!=\"\",name!~\"^k8s_.*\",kubernetes_io_hostname=~\"^$Node$\"}) by (kubernetes_io_hostname, name, image)", + "format": "time_series", + "hide": false, + "interval": "1s", + "intervalFactor": 1, + "legendFormat": "docker: {{ kubernetes_io_hostname }} | {{ image }} ({{ name }})", + "metric": "container_memory_usage:sort_desc", + "refId": "B", + "step": 10 + }, + { + "expr": "sum (container_memory_working_set_bytes{rkt_container_name!=\"\",kubernetes_io_hostname=~\"^$Node$\"}) by (kubernetes_io_hostname, rkt_container_name)", + "format": "time_series", + "hide": false, + "interval": "1s", + "intervalFactor": 1, + "legendFormat": "rkt: {{ kubernetes_io_hostname }} | {{ rkt_container_name }}", + "metric": "container_memory_usage:sort_desc", + "refId": "C", + "step": 10 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Containers memory usage", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 2, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Containers memory usage", + "titleSize": "h6" + }, + { + "collapse": true, + "height": "500px", + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PR}", + "decimals": 2, + "editable": true, + "error": false, + "fill": 0, + "grid": {}, + "id": 28, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": 200, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 12, + "stack": false, + "steppedLine": true, + "targets": [ + { + "expr": "sum (container_memory_working_set_bytes{id!=\"/\",kubernetes_io_hostname=~\"^$Node$\"}) by (id)", + "interval": "1s", + "intervalFactor": 1, + "legendFormat": "{{ id }}", + "metric": "container_memory_usage:sort_desc", + "refId": "A", + "step": 1 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "All processes memory usage", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 2, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "All processes memory usage", + "titleSize": "h6" + }, + { + "collapse": true, + "height": "250px", + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PR}", + "decimals": 2, + "editable": true, + "error": false, + "fill": 1, + "grid": {}, + "id": 30, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": 200, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 12, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum (irate (container_network_receive_bytes_total{image!=\"\",name=~\"^k8s_.*\",kubernetes_io_hostname=~\"^$Node$\"}[2m])) by (container_name, pod_name)", + "format": "time_series", + "hide": false, + "interval": "1s", + "intervalFactor": 1, + "legendFormat": "-> pod: {{ pod_name }} | {{ container_name }}", + "metric": "network", + "refId": "B", + "step": 1 + }, + { + "expr": "- sum (irate (container_network_transmit_bytes_total{image!=\"\",name=~\"^k8s_.*\",kubernetes_io_hostname=~\"^$Node$\"}[2m])) by (container_name, pod_name)", + "format": "time_series", + "hide": false, + "interval": "1s", + "intervalFactor": 1, + "legendFormat": "<- pod: {{ pod_name }} | {{ container_name }}", + "metric": "network", + "refId": "D", + "step": 1 + }, + { + "expr": "sum (irate (container_network_receive_bytes_total{image!=\"\",name!~\"^k8s_.*\",kubernetes_io_hostname=~\"^$Node$\"}[2m])) by (kubernetes_io_hostname, name, image)", + "format": "time_series", + "hide": false, + "interval": "1s", + "intervalFactor": 1, + "legendFormat": "-> docker: {{ kubernetes_io_hostname }} | {{ image }} ({{ name }})", + "metric": "network", + "refId": "A", + "step": 1 + }, + { + "expr": "- sum (irate (container_network_transmit_bytes_total{image!=\"\",name!~\"^k8s_.*\",kubernetes_io_hostname=~\"^$Node$\"}[2m])) by (kubernetes_io_hostname, name, image)", + "format": "time_series", + "hide": false, + "interval": "1s", + "intervalFactor": 1, + "legendFormat": "<- docker: {{ kubernetes_io_hostname }} | {{ image }} ({{ name }})", + "metric": "network", + "refId": "C", + "step": 1 + }, + { + "expr": "sum (irate (container_network_transmit_bytes_total{rkt_container_name!=\"\",kubernetes_io_hostname=~\"^$Node$\"}[2m])) by (kubernetes_io_hostname, rkt_container_name)", + "format": "time_series", + "hide": false, + "interval": "1s", + "intervalFactor": 1, + "legendFormat": "-> rkt: {{ kubernetes_io_hostname }} | {{ rkt_container_name }}", + "metric": "network", + "refId": "E", + "step": 1 + }, + { + "expr": "- sum (irate (container_network_transmit_bytes_total{rkt_container_name!=\"\",kubernetes_io_hostname=~\"^$Node$\"}[2m])) by (kubernetes_io_hostname, rkt_container_name)", + "format": "time_series", + "hide": false, + "interval": "1s", + "intervalFactor": 1, + "legendFormat": "<- rkt: {{ kubernetes_io_hostname }} | {{ rkt_container_name }}", + "metric": "network", + "refId": "F", + "step": 1 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Containers network I/O ", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 2, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Containers network I/O", + "titleSize": "h6" + }, + { + "collapse": true, + "height": 277, + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PR}", + "decimals": 2, + "editable": true, + "error": false, + "fill": 1, + "grid": {}, + "id": 16, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": 200, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 12, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum (irate (container_network_receive_bytes_total{image!=\"\",name=~\"^k8s_.*\",kubernetes_io_hostname=~\"^$Node$\"}[2m])) by (pod_name)", + "format": "time_series", + "interval": "1s", + "intervalFactor": 1, + "legendFormat": "-> {{ pod_name }}", + "metric": "network", + "refId": "A", + "step": 1 + }, + { + "expr": "- sum (irate (container_network_transmit_bytes_total{image!=\"\",name=~\"^k8s_.*\",kubernetes_io_hostname=~\"^$Node$\"}[2m])) by (pod_name)", + "format": "time_series", + "interval": "1s", + "intervalFactor": 1, + "legendFormat": "<- {{ pod_name }}", + "metric": "network", + "refId": "B", + "step": 1 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Pods network I/O ", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 2, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Pods network I/O", + "titleSize": "h6" + }, + { + "collapse": true, + "height": "500px", + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PR}", + "decimals": 2, + "editable": true, + "error": false, + "fill": 1, + "grid": {}, + "id": 29, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": 200, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 12, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum (irate (container_network_receive_bytes_total{id!=\"/\",kubernetes_io_hostname=~\"^$Node$\"}[2m])) by (id)", + "format": "time_series", + "instant": true, + "interval": "1s", + "intervalFactor": 1, + "legendFormat": "-> {{ id }}", + "metric": "network", + "refId": "A", + "step": 1 + }, + { + "expr": "- sum (irate (container_network_transmit_bytes_total{id!=\"/\",kubernetes_io_hostname=~\"^$Node$\"}[2m])) by (id)", + "format": "time_series", + "interval": "1s", + "intervalFactor": 1, + "legendFormat": "<- {{ id }}", + "metric": "network", + "refId": "B", + "step": 1 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "All processes network I/O ", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 2, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "All processes network I/O", + "titleSize": "h6" + }, + { + "collapse": true, + "height": 250, + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PR}", + "fill": 1, + "id": 35, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sort": "avg", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 12, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(openshift_build_total) by (phase,reason)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{ phase }} | {{ reason }}", + "refId": "A", + "step": 1 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "openshift_build_total", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PR}", + "fill": 1, + "id": 54, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "count(openshift_build_active_time_seconds{phase=\"running\"} offset 10m)", + "format": "time_series", + "intervalFactor": 2, + "refId": "A", + "step": 2 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Returns the number of builds that have been running for more than 10 minutes (600 seconds).", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PR}", + "fill": 1, + "id": 55, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "count(openshift_build_active_time_seconds{phase=\"pending\"} offset 10m)", + "format": "time_series", + "intervalFactor": 2, + "refId": "A", + "step": 2 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Returns the number of build that have been waiting at least 10 minutes (600 seconds) to start.", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PR}", + "fill": 1, + "id": 56, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(openshift_build_total{phase=\"Failed\"})", + "format": "time_series", + "intervalFactor": 2, + "refId": "A", + "step": 2 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Returns the number of failed builds, regardless of the failure reason.", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PR}", + "fill": 1, + "id": 57, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "openshift_build_total{phase=\"Failed\",reason=\"FetchSourceFailed\"}", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{ instance }}", + "refId": "A", + "step": 1 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Returns the number of failed builds because of problems retrieving source from the associated Git repository.", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": true, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PR}", + "fill": 1, + "id": 58, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": false, + "linewidth": 1, + "links": [], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(openshift_build_total{phase=\"Complete\"})", + "format": "time_series", + "intervalFactor": 2, + "refId": "A", + "step": 2 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Returns the number of successfully completed builds.", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PR}", + "fill": 0, + "id": 59, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sort": "avg", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 1, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "openshift_build_total{phase=\"Failed\"} offset 5m", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{ reason }}", + "refId": "A", + "step": 2 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Returns the failed builds totals, per failure reason, from 5 minutes ago.", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "OpenShift Builds", + "titleSize": "h6" + }, + { + "collapse": true, + "height": 250, + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PR}", + "fill": 1, + "id": 36, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(openshift_sdn_pod_setup_latency_sum)", + "format": "time_series", + "intervalFactor": 2, + "refId": "A", + "step": 1 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "openshift_sdn_pod_setup_latency_sum", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PR}", + "fill": 1, + "id": 41, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(openshift_sdn_pod_teardown_latency{quantile=\"0.9\"}) by (instance)", + "format": "time_series", + "intervalFactor": 2, + "refId": "A", + "step": 1 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "openshift_sdn_pod_teardown_latency", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PR}", + "fill": 1, + "id": 50, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sort": "avg", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "topk(10, (sum by (pod_name) (irate(container_network_receive_bytes_total{pod_name!=\"\"}[5m]))))", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{ pod_name }}", + "refId": "A", + "step": 1 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Top 10 pods doing the most receive network traffic", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "decbytes", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PR}", + "fill": 1, + "id": 37, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sort": "avg", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "openshift_sdn_pod_ips", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{ instance }} | {{ role }}", + "refId": "A", + "step": 1 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "openshift_sdn_pod_ips", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PR}", + "fill": 1, + "id": 39, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "garbage_collector_monitoring_route:openshift:io_v1_rate_limiter_use", + "format": "time_series", + "intervalFactor": 2, + "refId": "A", + "step": 1 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "garbage_collector_monitoring_route:openshift:io_v1_rate_limiter_use", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PR}", + "fill": 1, + "id": 42, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sort": "avg", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "openshift_sdn_arp_cache_entries", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{ role }} | {{ instance }}", + "refId": "A", + "step": 1 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "openshift_sdn_arp_cache_entries", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PR}", + "fill": 1, + "id": 40, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "openshift_sdn_arp_cache_entries", + "format": "time_series", + "intervalFactor": 2, + "refId": "A", + "step": 1 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "openshift_sdn_arp_cache_entries", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "OpenShift SDN", + "titleSize": "h6" + }, + { + "collapse": true, + "height": 250, + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PR}", + "fill": 1, + "id": 44, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sort": "avg", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "irate(kubelet_pleg_relist_latency_microseconds{kubernetes_io_hostname=~\"$Node\",quantile=\"0.9\"}[2m])", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{ role }} | {{ instance }}", + "refId": "A", + "step": 4 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "kubelet_pleg_relist", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "µs", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PR}", + "fill": 1, + "id": 51, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sort": "avg", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "irate(kubelet_docker_operations_latency_microseconds{quantile=\"0.9\"}[2m])", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{ operation_type }}", + "refId": "A", + "step": 4 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "kubelet_docker_operations_latency_microseconds{quantile=\"0.9\"}", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "µs", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PR}", + "fill": 1, + "id": 52, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sort": "avg", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "kubelet_docker_operations_timeout", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{ operation_type }}", + "refId": "A", + "step": 4 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Returns a running count (not a rate) of docker operations that have timed out since the kubelet was started.", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "none", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PR}", + "fill": 1, + "id": 53, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sort": "avg", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "kubelet_docker_operations_errors", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{ operation_type }}", + "refId": "A", + "step": 2 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Returns a running count (not a rate) of docker operations that have failed since the kubelet was started.", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "none", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Kubelet", + "titleSize": "h6" + }, + { + "collapse": true, + "height": 250, + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PR}", + "fill": 1, + "id": 46, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sort": "avg", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "irate(scrape_samples_scraped[2m])", + "format": "time_series", + "hide": false, + "intervalFactor": 2, + "legendFormat": "{{ kubernetes_name }} | {{ instance }} ", + "refId": "A", + "step": 4 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "scrape_samples_scraped", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "none", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PR}", + "fill": 1, + "id": 68, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sort_desc(sum without (cpu) (irate(container_cpu_usage_seconds_total{container_name=\"prometheus\"}[5m])))", + "format": "time_series", + "interval": "1s", + "intervalFactor": 1, + "refId": "A", + "step": 2 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "CPU per instance of Prometheus container.", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "none", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Prometheus", + "titleSize": "h6" + }, + { + "collapse": true, + "height": 250, + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PR}", + "fill": 1, + "id": 48, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sort": "avg", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sort_desc(sum without (instance,type,client,contentType) (irate(apiserver_request_count{verb!~\"GET|LIST|WATCH\"}[2m]))) > 0", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{ resource }} || {{ verb }}", + "refId": "A", + "step": 4 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Number of mutating API requests being made to the control plane.", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "none", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PR}", + "fill": 1, + "id": 49, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sort": "avg", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sort_desc(sum without (instance,type,client,contentType) (irate(apiserver_request_count{verb=~\"GET|LIST|WATCH\"}[2m]))) > 0", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{ resource }} || {{ pod }}", + "refId": "A", + "step": 4 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Number of non-mutating API requests being made to the control plane.", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "none", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PR}", + "fill": 1, + "id": 74, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "endpoint_queue_latency", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": " quantile {{ quantile }}", + "refId": "A", + "step": 4 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "endpoint_queue_latency", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "API Server", + "titleSize": "h6" + }, + { + "collapse": true, + "height": 250, + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PR}", + "fill": 1, + "id": 61, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "etcd_disk_wal_fsync_duration_seconds_count", + "format": "time_series", + "intervalFactor": 2, + "refId": "A", + "step": 10 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "etcd_disk_wal_fsync_duration_seconds_count", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "none", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "etcd", + "titleSize": "h6" + }, + { + "collapse": true, + "height": 250, + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PR}", + "fill": 1, + "id": 62, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 12, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(changes(container_start_time_seconds[10m]))", + "format": "time_series", + "intervalFactor": 2, + "refId": "A", + "step": 2 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "The number of containers that start or restart over the last ten minutes.", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "none", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Changes in your cluster", + "titleSize": "h6" + }, + { + "collapse": true, + "height": 250, + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PR}", + "fill": 1, + "id": 63, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(machine_cpu_cores)", + "format": "time_series", + "intervalFactor": 2, + "refId": "A", + "step": 4 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Total number of cores in the cluster.", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "none", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PR}", + "fill": 1, + "id": 64, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(sort_desc(irate(container_cpu_usage_seconds_total{id=\"/\"}[5m])))", + "format": "time_series", + "intervalFactor": 2, + "refId": "A", + "step": 4 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Total number of consumed cores.", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "none", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PR}", + "fill": 1, + "id": 65, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sort_desc(sum by (kubernetes_io_hostname,type) (irate(container_cpu_usage_seconds_total{id=\"/\"}[5m])))", + "format": "time_series", + "intervalFactor": 2, + "refId": "A", + "step": 4 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "CPU consumed per node in the cluster.", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "none", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PR}", + "fill": 1, + "id": 66, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sort_desc(sum by (cpu,id,pod_name,container_name) (irate(container_cpu_usage_seconds_total{role=\"infra\"}[5m])))", + "format": "time_series", + "hide": false, + "intervalFactor": 2, + "refId": "A", + "step": 4 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "CPU consumption per system service or container on the infrastructure nodes.", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "none", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PR}", + "fill": 1, + "id": 67, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sort_desc(sum by (namespace) (irate(container_cpu_usage_seconds_total[5m])))", + "format": "time_series", + "intervalFactor": 2, + "refId": "A", + "step": 4 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "CPU consumed per namespace on the cluster.", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "none", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PR}", + "fill": 1, + "id": 47, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(irate(container_cpu_usage_seconds_total{id=\"/\"}[3m])) / sum(machine_cpu_cores)", + "format": "time_series", + "intervalFactor": 2, + "refId": "A", + "step": 4 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Percentage of total cluster CPU in use", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "percent", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PR}", + "fill": 1, + "id": 69, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(container_memory_rss) / sum(machine_memory_bytes)", + "format": "time_series", + "intervalFactor": 2, + "refId": "A", + "step": 4 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Percentage of total cluster memory in use", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "percent", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PR}", + "fill": 1, + "id": 70, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum by (kubernetes_io_hostname) (irate(container_cpu_usage_seconds_total{id=~\"/system.slice/(docker|etcd).service\"}[5m]))", + "format": "time_series", + "intervalFactor": 2, + "refId": "A", + "step": 4 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Aggregate CPU usage (seconds total) of etcd+docker", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "none", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "System and container CPU", + "titleSize": "h6" + }, + { + "collapse": true, + "height": 250, + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PR}", + "fill": 1, + "id": 71, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + { + "title": "Kubernetes Storage Metrics via Prometheus", + "type": "absolute", + "url": "https://docs.google.com/document/d/1Fh0T60T_y888LsRwC51CQHO75b2IZ3A34ZQS71s_F0g" + } + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "volumes_queue_latency", + "format": "time_series", + "intervalFactor": 2, + "refId": "A", + "step": 4 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "volumes_queue_latency", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "none", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PR}", + "fill": 1, + "id": 72, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sort": "avg", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [ + { + "title": "Kubernetes Storage Metrics via Prometheus", + "type": "absolute", + "url": "https://docs.google.com/document/d/1Fh0T60T_y888LsRwC51CQHO75b2IZ3A34ZQS71s_F0g" + } + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "irate(cloudprovider_gce_api_request_duration_seconds_count[2m])", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{ request }}", + "refId": "A", + "step": 4 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "cloudprovider_aws_api_request_duration_seconds_count", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "none", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PR}", + "fill": 1, + "id": 73, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sort": "avg", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [ + { + "title": "Kubernetes Storage Metrics via Prometheus", + "type": "absolute", + "url": "https://docs.google.com/document/d/1Fh0T60T_y888LsRwC51CQHO75b2IZ3A34ZQS71s_F0g" + } + ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum (irate(storage_operation_duration_seconds_sum{kubernetes_io_hostname=~\"$Node\"}[2m])) by (operation_name,kubernetes_io_hostname)", + "format": "time_series", + "interval": "1s", + "intervalFactor": 1, + "legendFormat": "{{ operation_name }} || {{ kubernetes_io_hostname }}", + "refId": "A", + "step": 2 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "storage_operation_duration_seconds_sum", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "OpenShift Volumes", + "titleSize": "h6" + } + ], + "schemaVersion": 14, + "style": "dark", + "tags": [ + "kubernetes", + "openshift" + ], + "templating": { + "list": [ + { + "allValue": ".*", + "current": {}, + "datasource": "${DS_PR}", + "hide": 0, + "includeAll": true, + "label": null, + "multi": false, + "name": "Node", + "options": [], + "query": "label_values(kubernetes_io_hostname)", + "refresh": 1, + "regex": "", + "sort": 0, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-30m", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "1s", + "2m", + "20s", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "browser", + "title": "openshift cluster monitoring", + "version": 6 + } +} diff --git a/roles/openshift_grafana/meta/main.yml b/roles/openshift_grafana/meta/main.yml new file mode 100644 index 000000000..8dea6f197 --- /dev/null +++ b/roles/openshift_grafana/meta/main.yml @@ -0,0 +1,13 @@ +--- +galaxy_info: + author: Eldad Marciano + description: Setup grafana pod + company: Red Hat, Inc. + license: Apache License, Version 2.0 + min_ansible_version: 2.3 + platforms: + - name: EL + versions: + - 7 + categories: + - metrics diff --git a/roles/openshift_grafana/tasks/gf-permissions.yml b/roles/openshift_grafana/tasks/gf-permissions.yml new file mode 100644 index 000000000..9d3c741ee --- /dev/null +++ b/roles/openshift_grafana/tasks/gf-permissions.yml @@ -0,0 +1,12 @@ +--- +- name: Create gf user on htpasswd + command: htpasswd -c /etc/origin/master/htpasswd gfadmin + +- name: Make sure master config use HTPasswdPasswordIdentityProvider + command: "sed -ie 's|AllowAllPasswordIdentityProvider|HTPasswdPasswordIdentityProvider\n file: /etc/origin/master/htpasswd|' /etc/origin/master/master-config.yaml" + +- name: Grant permission for gfuser + command: oc adm policy add-cluster-role-to-user cluster-reader gfadmin + +- name: Restart mater api + command: systemctl restart atomic-openshift-master-api.service diff --git a/roles/openshift_grafana/tasks/main.yml b/roles/openshift_grafana/tasks/main.yml new file mode 100644 index 000000000..6a06d40a9 --- /dev/null +++ b/roles/openshift_grafana/tasks/main.yml @@ -0,0 +1,122 @@ +--- +- name: Create grafana namespace + oc_project: + state: present + name: grafana + +- name: Configure Grafana Permissions + include_tasks: tasks/gf-permissions.yml + when: gf_oauth | default(false) | bool == true + +# TODO: we should grab this yaml file from openshift/origin +- name: Templatize grafana yaml + template: src=grafana-ocp.yaml dest=/tmp/grafana-ocp.yaml + register: + cl_file: /tmp/grafana-ocp.yaml + when: gf_oauth | default(false) | bool == false + +# TODO: we should grab this yaml file from openshift/origin +- name: Templatize grafana yaml + template: src=grafana-ocp-oauth.yaml dest=/tmp/grafana-ocp-oauth.yaml + register: + cl_file: /tmp/grafana-ocp-oauth.yaml + when: gf_oauth | default(false) | bool == true + +- name: Process the grafana file + oc_process: + namespace: grafana + template_name: "{{ cl_file }}" + create: True + when: gf_oauth | default(false) | bool == true + +- name: Wait to grafana be running + command: oc rollout status deployment/grafana-ocp + +- name: oc adm policy add-role-to-user view -z grafana-ocp -n {{ gf_prometheus_namespace }} + oc_adm_policy_user: + user: grafana-ocp + resource_kind: cluster-role + resource_name: view + state: present + role_namespace: "{{ gf_prometheus_namespace }}" + +- name: Get grafana route + oc_obj: + kind: route + name: grafana + namespace: grafana + register: route + +- name: Get prometheus route + oc_obj: + kind: route + name: prometheus + namespace: "{{ gf_prometheus_namespace }}" + register: route + +- name: Get the prometheus SA + oc_serviceaccount_secret: + state: list + service_account: prometheus + namespace: "{{ gf_prometheus_namespace }}" + register: sa + +- name: Get the management SA bearer token + set_fact: + management_token: "{{ sa.results | oo_filter_sa_secrets }}" + +- name: Ensure the SA bearer token value is read + oc_secret: + state: list + name: "{{ management_token }}" + namespace: "{{ gf_prometheus_namespace }}" + no_log: True + register: sa_secret + +- name: Get the SA bearer token for prometheus + set_fact: + token: "{{ sa_secret.results.encoded.token }}" + +- name: Convert to json + var: + ds_json: "{{ gf_body_tmp }} | to_json }}" + +- name: Set protocol type + var: + protocol: "{{ 'https' if {{ gf_oauth }} == true else 'http' }}" + +- name: Add gf datasrouce + uri: + url: "{{ protocol }}://{{ route }}/api/datasources" + user: admin + password: admin + method: POST + body: "{{ ds_json | regex_replace('grafana_name', {{ gf_datasource_name }}) | regex_replace('prometheus_url', 'https://'{{ prometheus }} ) | regex_replace('satoken', {{ token }}) }}" + headers: + Content-Type: "Content-Type: application/json" + register: add_ds + +- name: Regex setup ds name + replace: + path: "{{ lookup('file', 'openshift-cluster-monitoring.json') }}" + regexp: '${DS_PR}' + replace: '{{ gf_datasource_name }}' + backup: yes + +- name: Add new dashboard + uri: + url: "{{ protocol }}://{{ route }}/api/dashboards/db" + user: admin + password: admin + method: POST + body: "{{ lookup('file', 'openshift-cluster-monitoring.json') }}" + headers: + Content-Type: "Content-Type: application/json" + register: add_ds + +- name: Regex json tear down + replace: + path: "{{ lookup('file', 'openshift-cluster-monitoring.json') }}" + regexp: '${DS_PR}' + replace: '{{ gf_datasource_name }}' + backup: yes -- cgit v1.2.3