diff options
Diffstat (limited to 'roles')
29 files changed, 566 insertions, 138 deletions
diff --git a/roles/openshift_excluder/tasks/install.yml b/roles/openshift_excluder/tasks/install.yml index d09358bee..3a866cedf 100644 --- a/roles/openshift_excluder/tasks/install.yml +++ b/roles/openshift_excluder/tasks/install.yml @@ -1,14 +1,24 @@ --- -- name: Install docker excluder - package: - name: "{{ r_openshift_excluder_service_type }}-docker-excluder{{ openshift_pkg_version | default('') | oo_image_tag_to_rpm_version(include_dash=True) + '*' }}" - state: "{{ r_openshift_excluder_docker_package_state }}" - when: - - r_openshift_excluder_enable_docker_excluder | bool - -- name: Install openshift excluder - package: - name: "{{ r_openshift_excluder_service_type }}-excluder{{ openshift_pkg_version | default('') | oo_image_tag_to_rpm_version(include_dash=True) + '*' }}" - state: "{{ r_openshift_excluder_package_state }}" - when: - - r_openshift_excluder_enable_openshift_excluder | bool + +- when: + - not openshift.common.is_atomic | bool + - r_openshift_excluder_install_ran is not defined + + block: + + - name: Install docker excluder + package: + name: "{{ r_openshift_excluder_service_type }}-docker-excluder{{ openshift_pkg_version | default('') | oo_image_tag_to_rpm_version(include_dash=True) + '*' }}" + state: "{{ r_openshift_excluder_docker_package_state }}" + when: + - r_openshift_excluder_enable_docker_excluder | bool + + - name: Install openshift excluder + package: + name: "{{ r_openshift_excluder_service_type }}-excluder{{ openshift_pkg_version | default('') | oo_image_tag_to_rpm_version(include_dash=True) + '*' }}" + state: "{{ r_openshift_excluder_package_state }}" + when: + - r_openshift_excluder_enable_openshift_excluder | bool + + - set_fact: + r_openshift_excluder_install_ran: True diff --git a/roles/openshift_facts/tasks/main.yml b/roles/openshift_facts/tasks/main.yml index 1b9bda67e..50ed3e964 100644 --- a/roles/openshift_facts/tasks/main.yml +++ b/roles/openshift_facts/tasks/main.yml @@ -24,12 +24,18 @@ msg: | openshift-ansible requires Python 3 for {{ ansible_distribution }}; For information on enabling Python 3 with Ansible, see https://docs.ansible.com/ansible/python_3_support.html - when: ansible_distribution == 'Fedora' and ansible_python['version']['major'] != 3 + when: + - ansible_distribution == 'Fedora' + - ansible_python['version']['major'] != 3 + - r_openshift_facts_ran is not defined - name: Validate python version fail: msg: "openshift-ansible requires Python 2 for {{ ansible_distribution }}" - when: ansible_distribution != 'Fedora' and ansible_python['version']['major'] != 2 + when: + - ansible_distribution != 'Fedora' + - ansible_python['version']['major'] != 2 + - r_openshift_facts_ran is not defined # Fail as early as possible if Atomic and old version of Docker - block: @@ -48,7 +54,9 @@ that: - l_atomic_docker_version.stdout | replace('"', '') | version_compare('1.12','>=') - when: l_is_atomic | bool + when: + - l_is_atomic | bool + - r_openshift_facts_ran is not defined - name: Load variables include_vars: "{{ item }}" @@ -59,7 +67,9 @@ - name: Ensure various deps are installed package: name={{ item }} state=present with_items: "{{ required_packages }}" - when: not l_is_atomic | bool + when: + - not l_is_atomic | bool + - r_openshift_facts_ran is not defined - name: Ensure various deps for running system containers are installed package: name={{ item }} state=present @@ -67,6 +77,7 @@ when: - not l_is_atomic | bool - l_any_system_container | bool + - r_openshift_facts_ran is not defined - name: Gather Cluster facts and set is_containerized if needed openshift_facts: @@ -99,3 +110,7 @@ - name: Set repoquery command set_fact: repoquery_cmd: "{{ 'dnf repoquery --latest-limit 1 -d 0' if ansible_pkg_mgr == 'dnf' else 'repoquery --plugins' }}" + +- name: Register that this already ran + set_fact: + r_openshift_facts_ran: True diff --git a/roles/openshift_health_checker/openshift_checks/docker_image_availability.py b/roles/openshift_health_checker/openshift_checks/docker_image_availability.py index 27e6fe383..60aacf715 100644 --- a/roles/openshift_health_checker/openshift_checks/docker_image_availability.py +++ b/roles/openshift_health_checker/openshift_checks/docker_image_availability.py @@ -1,8 +1,24 @@ -# pylint: disable=missing-docstring +"""Check that required Docker images are available.""" + from openshift_checks import OpenShiftCheck, get_var from openshift_checks.mixins import DockerHostMixin +NODE_IMAGE_SUFFIXES = ["haproxy-router", "docker-registry", "deployer", "pod"] +DEPLOYMENT_IMAGE_INFO = { + "origin": { + "namespace": "openshift", + "name": "origin", + "registry_console_image": "cockpit/kubernetes", + }, + "openshift-enterprise": { + "namespace": "openshift3", + "name": "ose", + "registry_console_image": "registry.access.redhat.com/openshift3/registry-console", + }, +} + + class DockerImageAvailability(DockerHostMixin, OpenShiftCheck): """Check that required Docker images are available. @@ -13,25 +29,13 @@ class DockerImageAvailability(DockerHostMixin, OpenShiftCheck): name = "docker_image_availability" tags = ["preflight"] - dependencies = ["skopeo", "python-docker-py"] - deployment_image_info = { - "origin": { - "namespace": "openshift", - "name": "origin", - }, - "openshift-enterprise": { - "namespace": "openshift3", - "name": "ose", - }, - } - @classmethod def is_active(cls, task_vars): """Skip hosts with unsupported deployment types.""" deployment_type = get_var(task_vars, "openshift_deployment_type") - has_valid_deployment_type = deployment_type in cls.deployment_image_info + has_valid_deployment_type = deployment_type in DEPLOYMENT_IMAGE_INFO return super(DockerImageAvailability, cls).is_active(task_vars) and has_valid_deployment_type @@ -70,51 +74,55 @@ class DockerImageAvailability(DockerHostMixin, OpenShiftCheck): return {"changed": changed} - def required_images(self, task_vars): - deployment_type = get_var(task_vars, "openshift_deployment_type") - image_info = self.deployment_image_info[deployment_type] - - openshift_release = get_var(task_vars, "openshift_release", default="latest") - openshift_image_tag = get_var(task_vars, "openshift_image_tag") - is_containerized = get_var(task_vars, "openshift", "common", "is_containerized") - - images = set(self.required_docker_images( - image_info["namespace"], - image_info["name"], - ["registry-console"] if "enterprise" in deployment_type else [], # include enterprise-only image names - openshift_release, - is_containerized, - )) - - # append images with qualified image tags to our list of required images. - # these are images with a (v0.0.0.0) tag, rather than a standard release - # format tag (v0.0). We want to check this set in both containerized and - # non-containerized installations. - images.update( - self.required_qualified_docker_images( - image_info["namespace"], - image_info["name"], - openshift_image_tag, - ), - ) - - return images - @staticmethod - def required_docker_images(namespace, name, additional_image_names, version, is_containerized): - if is_containerized: - return ["{}/{}:{}".format(namespace, name, version)] if name else [] - - # include additional non-containerized images specific to the current deployment type - return ["{}/{}:{}".format(namespace, img_name, version) for img_name in additional_image_names] - - @staticmethod - def required_qualified_docker_images(namespace, name, version): - # pylint: disable=invalid-name - return [ - "{}/{}-{}:{}".format(namespace, name, suffix, version) - for suffix in ["haproxy-router", "docker-registry", "deployer", "pod"] - ] + def required_images(task_vars): + """ + Determine which images we expect to need for this host. + Returns: a set of required images like 'openshift/origin:v3.6' + + The thorny issue of determining the image names from the variables is under consideration + via https://github.com/openshift/openshift-ansible/issues/4415 + + For now we operate as follows: + * For containerized components (master, node, ...) we look at the deployment type and + use openshift/origin or openshift3/ose as the base for those component images. The + version is openshift_image_tag as determined by the openshift_version role. + * For OpenShift-managed infrastructure (router, registry...) we use oreg_url if + it is defined; otherwise we again use the base that depends on the deployment type. + Registry is not included in constructed images. It may be in oreg_url or etcd image. + """ + required = set() + deployment_type = get_var(task_vars, "openshift_deployment_type") + host_groups = get_var(task_vars, "group_names") + image_tag = get_var(task_vars, "openshift_image_tag") + image_info = DEPLOYMENT_IMAGE_INFO[deployment_type] + if not image_info: + return required + + # template for images that run on top of OpenShift + image_url = "{}/{}-{}:{}".format(image_info["namespace"], image_info["name"], "${component}", "${version}") + image_url = get_var(task_vars, "oreg_url", default="") or image_url + if 'nodes' in host_groups: + for suffix in NODE_IMAGE_SUFFIXES: + required.add(image_url.replace("${component}", suffix).replace("${version}", image_tag)) + # The registry-console is for some reason not prefixed with ose- like the other components. + # Nor is it versioned the same, so just look for latest. + # Also a completely different name is used for Origin. + required.add(image_info["registry_console_image"]) + + # images for containerized components + if get_var(task_vars, "openshift", "common", "is_containerized"): + components = set() + if 'nodes' in host_groups: + components.update(["node", "openvswitch"]) + if 'masters' in host_groups: # name is "origin" or "ose" + components.add(image_info["name"]) + for component in components: + required.add("{}/{}:{}".format(image_info["namespace"], component, image_tag)) + if 'etcd' in host_groups: # special case, note it is the same for origin/enterprise + required.add("registry.access.redhat.com/rhel7/etcd") # and no image tag + + return required def local_images(self, images, task_vars): """Filter a list of images and return those available locally.""" @@ -124,7 +132,8 @@ class DockerImageAvailability(DockerHostMixin, OpenShiftCheck): ] def is_image_local(self, image, task_vars): - result = self.module_executor("docker_image_facts", {"name": image}, task_vars) + """Check if image is already in local docker index.""" + result = self.execute_module("docker_image_facts", {"name": image}, task_vars=task_vars) if result.get("failed", False): return False @@ -132,6 +141,7 @@ class DockerImageAvailability(DockerHostMixin, OpenShiftCheck): @staticmethod def known_docker_registries(task_vars): + """Build a list of docker registries available according to inventory vars.""" docker_facts = get_var(task_vars, "openshift", "docker") regs = set(docker_facts["additional_registries"]) @@ -147,17 +157,21 @@ class DockerImageAvailability(DockerHostMixin, OpenShiftCheck): """Inspect existing images using Skopeo and return all images successfully inspected.""" return [ image for image in images - if any(self.is_available_skopeo_image(image, registry, task_vars) for registry in registries) + if self.is_available_skopeo_image(image, registries, task_vars) ] - def is_available_skopeo_image(self, image, registry, task_vars): - """Uses Skopeo to determine if required image exists in a given registry.""" + def is_available_skopeo_image(self, image, registries, task_vars): + """Use Skopeo to determine if required image exists in known registry(s).""" + + # if image does already includes a registry, just use that + if image.count("/") > 1: + registry, image = image.split("/", 1) + registries = [registry] - cmd_str = "skopeo inspect docker://{registry}/{image}".format( - registry=registry, - image=image, - ) + for registry in registries: + args = {"_raw_params": "skopeo inspect docker://{}/{}".format(registry, image)} + result = self.execute_module("command", args, task_vars=task_vars) + if result.get("rc", 0) == 0 and not result.get("failed"): + return True - args = {"_raw_params": cmd_str} - result = self.module_executor("command", args, task_vars) - return not result.get("failed", False) and result.get("rc", 0) == 0 + return False diff --git a/roles/openshift_health_checker/openshift_checks/docker_storage.py b/roles/openshift_health_checker/openshift_checks/docker_storage.py index 7f1751b36..2bd615457 100644 --- a/roles/openshift_health_checker/openshift_checks/docker_storage.py +++ b/roles/openshift_health_checker/openshift_checks/docker_storage.py @@ -34,7 +34,7 @@ class DockerStorage(DockerHostMixin, OpenShiftCheck): } # attempt to get the docker info hash from the API - info = self.execute_module("docker_info", {}, task_vars) + info = self.execute_module("docker_info", {}, task_vars=task_vars) if info.get("failed"): return {"failed": True, "changed": changed, "msg": "Failed to query Docker API. Is docker running on this host?"} @@ -146,7 +146,7 @@ class DockerStorage(DockerHostMixin, OpenShiftCheck): vgs_cmd = "/sbin/vgs --noheadings -o vg_free --select vg_name=" + vg_name # should return free space like " 12.00g" if the VG exists; empty if it does not - ret = self.execute_module("command", {"_raw_params": vgs_cmd}, task_vars) + ret = self.execute_module("command", {"_raw_params": vgs_cmd}, task_vars=task_vars) if ret.get("failed") or ret.get("rc", 0) != 0: raise OpenShiftCheckException( "Is LVM installed? Failed to run /sbin/vgs " diff --git a/roles/openshift_health_checker/openshift_checks/mixins.py b/roles/openshift_health_checker/openshift_checks/mixins.py index 7f3d78cc4..2cb2e21aa 100644 --- a/roles/openshift_health_checker/openshift_checks/mixins.py +++ b/roles/openshift_health_checker/openshift_checks/mixins.py @@ -40,8 +40,11 @@ class DockerHostMixin(object): # NOTE: we would use the "package" module but it's actually an action plugin # and it's not clear how to invoke one of those. This is about the same anyway: - pkg_manager = get_var(task_vars, "ansible_pkg_mgr", default="yum") - result = self.module_executor(pkg_manager, {"name": self.dependencies, "state": "present"}, task_vars) + result = self.execute_module( + get_var(task_vars, "ansible_pkg_mgr", default="yum"), + {"name": self.dependencies, "state": "present"}, + task_vars=task_vars, + ) msg = result.get("msg", "") if result.get("failed"): if "No package matching" in msg: diff --git a/roles/openshift_health_checker/openshift_checks/ovs_version.py b/roles/openshift_health_checker/openshift_checks/ovs_version.py index 1e45ae3af..2dd045f1f 100644 --- a/roles/openshift_health_checker/openshift_checks/ovs_version.py +++ b/roles/openshift_health_checker/openshift_checks/ovs_version.py @@ -43,7 +43,7 @@ class OvsVersion(NotContainerizedMixin, OpenShiftCheck): }, ], } - return self.execute_module("rpm_version", args, task_vars) + return self.execute_module("rpm_version", args, task_vars=task_vars) def get_required_ovs_version(self, task_vars): """Return the correct Open vSwitch version for the current OpenShift version""" diff --git a/roles/openshift_health_checker/openshift_checks/package_availability.py b/roles/openshift_health_checker/openshift_checks/package_availability.py index a7eb720fd..e87567fe6 100644 --- a/roles/openshift_health_checker/openshift_checks/package_availability.py +++ b/roles/openshift_health_checker/openshift_checks/package_availability.py @@ -25,7 +25,7 @@ class PackageAvailability(NotContainerizedMixin, OpenShiftCheck): packages.update(self.node_packages(rpm_prefix)) args = {"packages": sorted(set(packages))} - return self.execute_module("check_yum_update", args, tmp, task_vars) + return self.execute_module("check_yum_update", args, tmp=tmp, task_vars=task_vars) @staticmethod def master_packages(rpm_prefix): @@ -36,7 +36,6 @@ class PackageAvailability(NotContainerizedMixin, OpenShiftCheck): "bash-completion", "cockpit-bridge", "cockpit-docker", - "cockpit-kubernetes", "cockpit-shell", "cockpit-ws", "etcd", diff --git a/roles/openshift_health_checker/openshift_checks/package_update.py b/roles/openshift_health_checker/openshift_checks/package_update.py index fd0c0a755..f432380c6 100644 --- a/roles/openshift_health_checker/openshift_checks/package_update.py +++ b/roles/openshift_health_checker/openshift_checks/package_update.py @@ -11,4 +11,4 @@ class PackageUpdate(NotContainerizedMixin, OpenShiftCheck): def run(self, tmp, task_vars): args = {"packages": []} - return self.execute_module("check_yum_update", args, tmp, task_vars) + return self.execute_module("check_yum_update", args, tmp=tmp, task_vars=task_vars) diff --git a/roles/openshift_health_checker/openshift_checks/package_version.py b/roles/openshift_health_checker/openshift_checks/package_version.py index 2e737818b..6a76bb93d 100644 --- a/roles/openshift_health_checker/openshift_checks/package_version.py +++ b/roles/openshift_health_checker/openshift_checks/package_version.py @@ -71,7 +71,7 @@ class PackageVersion(NotContainerizedMixin, OpenShiftCheck): ], } - return self.execute_module("aos_version", args, tmp, task_vars) + return self.execute_module("aos_version", args, tmp=tmp, task_vars=task_vars) def get_required_ovs_version(self, task_vars): """Return the correct Open vSwitch version for the current OpenShift version. diff --git a/roles/openshift_health_checker/test/docker_image_availability_test.py b/roles/openshift_health_checker/test/docker_image_availability_test.py index 197c65f51..0a7c0f8d3 100644 --- a/roles/openshift_health_checker/test/docker_image_availability_test.py +++ b/roles/openshift_health_checker/test/docker_image_availability_test.py @@ -31,15 +31,15 @@ def test_is_active(deployment_type, is_containerized, group_names, expect_active (False, True), ]) def test_all_images_available_locally(is_containerized, is_atomic): - def execute_module(module_name, args, task_vars): + def execute_module(module_name, module_args, task_vars): if module_name == "yum": return {"changed": True} assert module_name == "docker_image_facts" - assert 'name' in args - assert args['name'] + assert 'name' in module_args + assert module_args['name'] return { - 'images': [args['name']], + 'images': [module_args['name']], } result = DockerImageAvailability(execute_module=execute_module).run(tmp=None, task_vars=dict( @@ -52,8 +52,8 @@ def test_all_images_available_locally(is_containerized, is_atomic): docker=dict(additional_registries=["docker.io"]), ), openshift_deployment_type='origin', - openshift_release='v3.4', openshift_image_tag='3.4', + group_names=['nodes', 'masters'], )) assert not result.get('failed', False) @@ -64,7 +64,7 @@ def test_all_images_available_locally(is_containerized, is_atomic): True, ]) def test_all_images_available_remotely(available_locally): - def execute_module(module_name, args, task_vars): + def execute_module(module_name, module_args, task_vars): if module_name == 'docker_image_facts': return {'images': [], 'failed': available_locally} return {'changed': False} @@ -79,8 +79,8 @@ def test_all_images_available_remotely(available_locally): docker=dict(additional_registries=["docker.io", "registry.access.redhat.com"]), ), openshift_deployment_type='origin', - openshift_release='3.4', openshift_image_tag='v3.4', + group_names=['nodes', 'masters'], )) assert not result.get('failed', False) @@ -108,8 +108,8 @@ def test_all_images_unavailable(): docker=dict(additional_registries=["docker.io"]), ), openshift_deployment_type="openshift-enterprise", - openshift_release=None, - openshift_image_tag='latest' + openshift_image_tag='latest', + group_names=['nodes', 'masters'], )) assert actual['failed'] @@ -147,8 +147,8 @@ def test_skopeo_update_failure(message, extra_words): docker=dict(additional_registries=["unknown.io"]), ), openshift_deployment_type="openshift-enterprise", - openshift_release='', openshift_image_tag='', + group_names=['nodes', 'masters'], )) assert actual["failed"] @@ -177,8 +177,85 @@ def test_registry_availability(deployment_type, registries): docker=dict(additional_registries=registries), ), openshift_deployment_type=deployment_type, - openshift_release='', openshift_image_tag='', + group_names=['nodes', 'masters'], )) assert not actual.get("failed", False) + + +@pytest.mark.parametrize("deployment_type, is_containerized, groups, oreg_url, expected", [ + ( # standard set of stuff required on nodes + "origin", False, ['nodes'], None, + set([ + 'openshift/origin-pod:vtest', + 'openshift/origin-deployer:vtest', + 'openshift/origin-docker-registry:vtest', + 'openshift/origin-haproxy-router:vtest', + 'cockpit/kubernetes', # origin version of registry-console + ]) + ), + ( # set a different URL for images + "origin", False, ['nodes'], 'foo.io/openshift/origin-${component}:${version}', + set([ + 'foo.io/openshift/origin-pod:vtest', + 'foo.io/openshift/origin-deployer:vtest', + 'foo.io/openshift/origin-docker-registry:vtest', + 'foo.io/openshift/origin-haproxy-router:vtest', + 'cockpit/kubernetes', # AFAICS this is not built from the URL + ]) + ), + ( + "origin", True, ['nodes', 'masters', 'etcd'], None, + set([ + # images running on top of openshift + 'openshift/origin-pod:vtest', + 'openshift/origin-deployer:vtest', + 'openshift/origin-docker-registry:vtest', + 'openshift/origin-haproxy-router:vtest', + 'cockpit/kubernetes', + # containerized component images + 'openshift/origin:vtest', + 'openshift/node:vtest', + 'openshift/openvswitch:vtest', + 'registry.access.redhat.com/rhel7/etcd', + ]) + ), + ( # enterprise images + "openshift-enterprise", True, ['nodes'], 'foo.io/openshift3/ose-${component}:f13ac45', + set([ + 'foo.io/openshift3/ose-pod:f13ac45', + 'foo.io/openshift3/ose-deployer:f13ac45', + 'foo.io/openshift3/ose-docker-registry:f13ac45', + 'foo.io/openshift3/ose-haproxy-router:f13ac45', + # registry-console is not constructed/versioned the same as the others. + 'registry.access.redhat.com/openshift3/registry-console', + # containerized images aren't built from oreg_url + 'openshift3/node:vtest', + 'openshift3/openvswitch:vtest', + ]) + ), + ( + "openshift-enterprise", True, ['etcd', 'lb'], 'foo.io/openshift3/ose-${component}:f13ac45', + set([ + 'registry.access.redhat.com/rhel7/etcd', + # lb does not yet come in a containerized version + ]) + ), + +]) +def test_required_images(deployment_type, is_containerized, groups, oreg_url, expected): + task_vars = dict( + openshift=dict( + common=dict( + is_containerized=is_containerized, + is_atomic=False, + ), + ), + openshift_deployment_type=deployment_type, + group_names=groups, + oreg_url=oreg_url, + openshift_image_tag='vtest', + ) + + assert expected == DockerImageAvailability("DUMMY").required_images(task_vars) diff --git a/roles/openshift_health_checker/test/docker_storage_test.py b/roles/openshift_health_checker/test/docker_storage_test.py index 292a323db..876614b1d 100644 --- a/roles/openshift_health_checker/test/docker_storage_test.py +++ b/roles/openshift_health_checker/test/docker_storage_test.py @@ -77,7 +77,7 @@ non_atomic_task_vars = {"openshift": {"common": {"is_atomic": False}}} ), ]) def test_check_storage_driver(docker_info, failed, expect_msg): - def execute_module(module_name, args, tmp=None, task_vars=None): + def execute_module(module_name, module_args, tmp=None, task_vars=None): if module_name == "yum": return {} if module_name != "docker_info": @@ -187,7 +187,7 @@ def test_dm_usage(task_vars, driver_status, vg_free, success, expect_msg): ) ]) def test_vg_free(pool, command_returns, raises, returns): - def execute_module(module_name, args, tmp=None, task_vars=None): + def execute_module(module_name, module_args, tmp=None, task_vars=None): if module_name != "command": raise ValueError("not expecting module " + module_name) return command_returns diff --git a/roles/openshift_logging/README.md b/roles/openshift_logging/README.md index 0c60ef6fd..dd0f22d4b 100644 --- a/roles/openshift_logging/README.md +++ b/roles/openshift_logging/README.md @@ -55,6 +55,9 @@ When both `openshift_logging_install_logging` and `openshift_logging_upgrade_log - `openshift_logging_fluentd_use_journal`: NOTE: Fluentd will attempt to detect whether or not Docker is using the journald log driver when using the default of empty. - `openshift_logging_fluentd_journal_read_from_head`: If empty, Fluentd will use its internal default, which is false. - `openshift_logging_fluentd_hosts`: List of nodes that should be labeled for Fluentd to be deployed to. Defaults to ['--all']. +- `openshift_logging_fluentd_buffer_queue_limit`: Buffer queue limit for Fluentd. Defaults to 1024. +- `openshift_logging_fluentd_buffer_size_limit`: Buffer chunk limit for Fluentd. Defaults to 1m. + - `openshift_logging_es_host`: The name of the ES service Fluentd should send logs to. Defaults to 'logging-es'. - `openshift_logging_es_port`: The port for the ES service Fluentd should sent its logs to. Defaults to '9200'. @@ -155,3 +158,5 @@ Elasticsearch OPS too, if using an OPS cluster: - `openshift_logging_mux_namespaces`: Default `[]` - additional namespaces to create for _external_ mux clients to associate with their logs - users will need to set this +- `openshift_logging_mux_buffer_queue_limit`: Default `[1024]` - Buffer queue limit for Mux. +- `openshift_logging_mux_buffer_size_limit`: Default `[1m]` - Buffer chunk limit for Mux. diff --git a/roles/openshift_logging/defaults/main.yml b/roles/openshift_logging/defaults/main.yml index 3c343c9dc..66d880d23 100644 --- a/roles/openshift_logging/defaults/main.yml +++ b/roles/openshift_logging/defaults/main.yml @@ -76,6 +76,8 @@ openshift_logging_fluentd_use_journal: "{{ openshift_hosted_logging_use_journal openshift_logging_fluentd_journal_source: "{{ openshift_hosted_logging_journal_source | default('') }}" openshift_logging_fluentd_journal_read_from_head: "{{ openshift_hosted_logging_journal_read_from_head | default('') }}" openshift_logging_fluentd_hosts: ['--all'] +openshift_logging_fluentd_buffer_queue_limit: 1024 +openshift_logging_fluentd_buffer_size_limit: 1m openshift_logging_es_host: logging-es openshift_logging_es_port: 9200 diff --git a/roles/openshift_logging_fluentd/templates/fluentd.j2 b/roles/openshift_logging_fluentd/templates/fluentd.j2 index e185938e3..a5695ee26 100644 --- a/roles/openshift_logging_fluentd/templates/fluentd.j2 +++ b/roles/openshift_logging_fluentd/templates/fluentd.j2 @@ -93,6 +93,14 @@ spec: value: "{{ openshift_logging_fluentd_journal_source | default('') }}" - name: "JOURNAL_READ_FROM_HEAD" value: "{{ openshift_logging_fluentd_journal_read_from_head | lower }}" + - name: "BUFFER_QUEUE_LIMIT" + value: "{{ openshift_logging_fluentd_buffer_queue_limit }}" + - name: "BUFFER_SIZE_LIMIT" + value: "{{ openshift_logging_fluentd_buffer_size_limit }}" + - name: "FLUENTD_CPU_LIMIT" + value: "{{ openshift_logging_fluentd_cpu_limit }}" + - name: "FLUENTD_MEMORY_LIMIT" + value: "{{ openshift_logging_fluentd_memory_limit }}" volumes: - name: runlogjournal hostPath: diff --git a/roles/openshift_logging_mux/defaults/main.yml b/roles/openshift_logging_mux/defaults/main.yml index 10fa4372c..77e47d38c 100644 --- a/roles/openshift_logging_mux/defaults/main.yml +++ b/roles/openshift_logging_mux/defaults/main.yml @@ -10,7 +10,9 @@ openshift_logging_mux_namespace: logging ### Common settings openshift_logging_mux_nodeselector: "{{ openshift_hosted_logging_mux_nodeselector_label | default('') | map_from_pairs }}" openshift_logging_mux_cpu_limit: 500m -openshift_logging_mux_memory_limit: 1Gi +openshift_logging_mux_memory_limit: 2Gi +openshift_logging_mux_buffer_queue_limit: 1024 +openshift_logging_mux_buffer_size_limit: 1m openshift_logging_mux_replicas: 1 diff --git a/roles/openshift_logging_mux/templates/mux.j2 b/roles/openshift_logging_mux/templates/mux.j2 index 502cd3347..243698c6a 100644 --- a/roles/openshift_logging_mux/templates/mux.j2 +++ b/roles/openshift_logging_mux/templates/mux.j2 @@ -103,6 +103,14 @@ spec: value: "true" - name: MUX_ALLOW_EXTERNAL value: "{{ openshift_logging_mux_allow_external | default('false') }}" + - name: "BUFFER_QUEUE_LIMIT" + value: "{{ openshift_logging_mux_buffer_queue_limit }}" + - name: "BUFFER_SIZE_LIMIT" + value: "{{ openshift_logging_mux_buffer_size_limit }}" + - name: "MUX_CPU_LIMIT" + value: "{{ openshift_logging_mux_cpu_limit }}" + - name: "MUX_MEMORY_LIMIT" + value: "{{ openshift_logging_mux_memory_limit }}" volumes: - name: config configMap: diff --git a/roles/openshift_metrics/README.md b/roles/openshift_metrics/README.md index 84503217b..1f10de4a2 100644 --- a/roles/openshift_metrics/README.md +++ b/roles/openshift_metrics/README.md @@ -68,6 +68,9 @@ For default values, see [`defaults/main.yaml`](defaults/main.yaml). - `openshift_metrics_resolution`: How often metrics should be gathered. +- `openshift_metrics_install_hawkular_agent`: Install the Hawkular OpenShift Agent (HOSA). HOSA can be used + to collect custom metrics from your pods. This component is currently in tech-preview and is not installed by default. + ## Additional variables to control resource limits Each metrics component (hawkular, cassandra, heapster) can specify a cpu and memory limits and requests by setting the corresponding role variable: diff --git a/roles/openshift_metrics/defaults/main.yaml b/roles/openshift_metrics/defaults/main.yaml index 467db34c8..ba50566e9 100644 --- a/roles/openshift_metrics/defaults/main.yaml +++ b/roles/openshift_metrics/defaults/main.yaml @@ -31,6 +31,14 @@ openshift_metrics_heapster_requests_memory: 0.9375G openshift_metrics_heapster_requests_cpu: null openshift_metrics_heapster_nodeselector: "" +openshift_metrics_install_hawkular_agent: False +openshift_metrics_hawkular_agent_limits_memory: null +openshift_metrics_hawkular_agent_limits_cpu: null +openshift_metrics_hawkular_agent_requests_memory: null +openshift_metrics_hawkular_agent_requests_cpu: null +openshift_metrics_hawkular_agent_nodeselector: "" +openshift_metrics_hawkular_agent_namespace: "default" + openshift_metrics_hawkular_hostname: "hawkular-metrics.{{openshift_master_default_subdomain}}" openshift_metrics_duration: 7 diff --git a/roles/openshift_metrics/tasks/generate_hawkular_certificates.yaml b/roles/openshift_metrics/tasks/generate_hawkular_certificates.yaml index fb4fe2f03..7b81b3c10 100644 --- a/roles/openshift_metrics/tasks/generate_hawkular_certificates.yaml +++ b/roles/openshift_metrics/tasks/generate_hawkular_certificates.yaml @@ -73,6 +73,8 @@ {{ hawkular_secrets['hawkular-metrics.key'] }} tls.truststore.crt: > {{ hawkular_secrets['hawkular-cassandra.crt'] }} + ca.crt: > + {{ hawkular_secrets['ca.crt'] }} when: name not in metrics_secrets.stdout_lines changed_when: no diff --git a/roles/openshift_metrics/tasks/install_hosa.yaml b/roles/openshift_metrics/tasks/install_hosa.yaml new file mode 100644 index 000000000..cc533a68b --- /dev/null +++ b/roles/openshift_metrics/tasks/install_hosa.yaml @@ -0,0 +1,44 @@ +--- +- name: Generate Hawkular Agent (HOSA) Cluster Role + template: + src: hawkular_openshift_agent_role.j2 + dest: "{{mktemp.stdout}}/templates/metrics-hawkular-openshift-agent-role.yaml" + changed_when: no + +- name: Generate Hawkular Agent (HOSA) Service Account + template: + src: hawkular_openshift_agent_sa.j2 + dest: "{{mktemp.stdout}}/templates/metrics-hawkular-openshift-agent-sa.yaml" + changed_when: no + +- name: Generate Hawkular Agent (HOSA) Daemon Set + template: + src: hawkular_openshift_agent_ds.j2 + dest: "{{mktemp.stdout}}/templates/metrics-hawkular-openshift-agent-ds.yaml" + vars: + node_selector: "{{openshift_metrics_hawkular_agent_nodeselector | default('') }}" + changed_when: no + +- name: Generate the Hawkular Agent (HOSA) Configmap + template: + src: hawkular_openshift_agent_cm.j2 + dest: "{{mktemp.stdout}}/templates/metrics-hawkular-openshift-agent-cm.yaml" + changed_when: no + +- name: Generate role binding for the hawkular-openshift-agent service account + template: + src: rolebinding.j2 + dest: "{{ mktemp.stdout }}/templates/metrics-hawkular-agent-rolebinding.yaml" + vars: + cluster: True + obj_name: hawkular-openshift-agent-rb + labels: + metrics-infra: hawkular-agent + roleRef: + kind: ClusterRole + name: hawkular-openshift-agent + subjects: + - kind: ServiceAccount + name: hawkular-openshift-agent + namespace: "{{openshift_metrics_hawkular_agent_namespace}}" + changed_when: no diff --git a/roles/openshift_metrics/tasks/install_metrics.yaml b/roles/openshift_metrics/tasks/install_metrics.yaml index 74eb56713..fdf4ae57f 100644 --- a/roles/openshift_metrics/tasks/install_metrics.yaml +++ b/roles/openshift_metrics/tasks/install_metrics.yaml @@ -16,11 +16,19 @@ include: install_heapster.yaml when: openshift_metrics_heapster_standalone | bool -- find: paths={{ mktemp.stdout }}/templates patterns=*.yaml +- name: Install Hawkular OpenShift Agent (HOSA) + include: install_hosa.yaml + when: openshift_metrics_install_hawkular_agent | default(false) | bool + +- find: + paths: "{{ mktemp.stdout }}/templates" + patterns: "^(?!metrics-hawkular-openshift-agent).*.yaml" + use_regex: true register: object_def_files changed_when: no -- slurp: src={{item.path}} +- slurp: + src: "{{item.path}}" register: object_defs with_items: "{{object_def_files.files}}" changed_when: no @@ -34,6 +42,31 @@ file_content: "{{ item.content | b64decode | from_yaml }}" with_items: "{{ object_defs.results }}" +- find: + paths: "{{ mktemp.stdout }}/templates" + patterns: "^metrics-hawkular-openshift-agent.*.yaml" + use_regex: true + register: hawkular_agent_object_def_files + when: openshift_metrics_install_hawkular_agent | bool + changed_when: no + +- slurp: + src: "{{item.path}}" + register: hawkular_agent_object_defs + with_items: "{{ hawkular_agent_object_def_files.files }}" + when: openshift_metrics_install_hawkular_agent | bool + changed_when: no + +- name: Create Hawkular Agent objects + include: oc_apply.yaml + vars: + kubeconfig: "{{ mktemp.stdout }}/admin.kubeconfig" + namespace: "{{ openshift_metrics_hawkular_agent_namespace }}" + file_name: "{{ item.source }}" + file_content: "{{ item.content | b64decode | from_yaml }}" + with_items: "{{ hawkular_agent_object_defs.results }}" + when: openshift_metrics_install_hawkular_agent | bool + - include: update_master_config.yaml - command: > diff --git a/roles/openshift_metrics/tasks/main.yaml b/roles/openshift_metrics/tasks/main.yaml index 5d8506a73..0b5f23c24 100644 --- a/roles/openshift_metrics/tasks/main.yaml +++ b/roles/openshift_metrics/tasks/main.yaml @@ -44,6 +44,9 @@ - include: "{{ (openshift_metrics_install_metrics | bool) | ternary('install_metrics.yaml','uninstall_metrics.yaml') }}" +- include: uninstall_hosa.yaml + when: not openshift_metrics_install_hawkular_agent | bool + - name: Delete temp directory local_action: file path=local_tmp.stdout state=absent tags: metrics_cleanup diff --git a/roles/openshift_metrics/tasks/oc_apply.yaml b/roles/openshift_metrics/tasks/oc_apply.yaml index dd67703b4..1e1af40e8 100644 --- a/roles/openshift_metrics/tasks/oc_apply.yaml +++ b/roles/openshift_metrics/tasks/oc_apply.yaml @@ -14,7 +14,7 @@ command: > {{ openshift.common.client_binary }} --config={{ kubeconfig }} apply -f {{ file_name }} - -n {{ openshift_metrics_project }} + -n {{namespace}} register: generation_apply failed_when: "'error' in generation_apply.stderr" changed_when: no diff --git a/roles/openshift_metrics/tasks/uninstall_hosa.yaml b/roles/openshift_metrics/tasks/uninstall_hosa.yaml new file mode 100644 index 000000000..42ed02460 --- /dev/null +++ b/roles/openshift_metrics/tasks/uninstall_hosa.yaml @@ -0,0 +1,15 @@ +--- +- name: remove Hawkular Agent (HOSA) components + command: > + {{ openshift.common.client_binary }} -n {{ openshift_metrics_hawkular_agent_namespace }} --config={{ mktemp.stdout }}/admin.kubeconfig + delete --ignore-not-found --selector=metrics-infra=agent + all,sa,secrets,templates,routes,pvc,rolebindings,clusterrolebindings + register: delete_metrics + changed_when: delete_metrics.stdout != 'No resources found' + +- name: remove rolebindings + command: > + {{ openshift.common.client_binary }} -n {{ openshift_metrics_hawkular_agent_namespace }} --config={{ mktemp.stdout }}/admin.kubeconfig + delete --ignore-not-found + clusterrolebinding/hawkular-openshift-agent-rb + changed_when: delete_metrics.stdout != 'No resources found' diff --git a/roles/openshift_metrics/templates/hawkular_openshift_agent_cm.j2 b/roles/openshift_metrics/templates/hawkular_openshift_agent_cm.j2 new file mode 100644 index 000000000..bf472c066 --- /dev/null +++ b/roles/openshift_metrics/templates/hawkular_openshift_agent_cm.j2 @@ -0,0 +1,54 @@ +id: hawkular-openshift-agent +kind: ConfigMap +apiVersion: v1 +name: Hawkular OpenShift Agent Configuration +metadata: + name: hawkular-openshift-agent-configuration + labels: + metrics-infra: agent + namespace: {{openshift_metrics_hawkular_agent_namespace}} +data: + config.yaml: | + kubernetes: + tenant: ${POD:namespace_name} + hawkular_server: + url: https://hawkular-metrics.openshift-infra.svc.cluster.local + credentials: + username: secret:openshift-infra/hawkular-metrics-account/hawkular-metrics.username + password: secret:openshift-infra/hawkular-metrics-account/hawkular-metrics.password + ca_cert_file: secret:openshift-infra/hawkular-metrics-certs/ca.crt + emitter: + status_enabled: false + collector: + minimum_collection_interval: 10s + default_collection_interval: 30s + metric_id_prefix: pod/${POD:uid}/custom/ + tags: + metric_name: ${METRIC:name} + description: ${METRIC:description} + units: ${METRIC:units} + namespace_id: ${POD:namespace_uid} + namespace_name: ${POD:namespace_name} + node_name: ${POD:node_name} + pod_id: ${POD:uid} + pod_ip: ${POD:ip} + pod_name: ${POD:name} + pod_namespace: ${POD:namespace_name} + hostname: ${POD:hostname} + host_ip: ${POD:host_ip} + labels: ${POD:labels} + type: pod + collector: hawkular_openshift_agent + custom_metric: true + hawkular-openshift-agent: | + endpoints: + - type: prometheus + protocol: "http" + port: 8080 + path: /metrics + collection_interval: 30s + metrics: + - name: hawkular_openshift_agent_metric_data_points_collected_total + - name: hawkular_openshift_agent_monitored_endpoints + - name: hawkular_openshift_agent_monitored_pods + - name: hawkular_openshift_agent_monitored_metrics diff --git a/roles/openshift_metrics/templates/hawkular_openshift_agent_ds.j2 b/roles/openshift_metrics/templates/hawkular_openshift_agent_ds.j2 new file mode 100644 index 000000000..d65eaf9ae --- /dev/null +++ b/roles/openshift_metrics/templates/hawkular_openshift_agent_ds.j2 @@ -0,0 +1,91 @@ +apiVersion: extensions/v1beta1 +kind: DaemonSet +metadata: + name: hawkular-openshift-agent + labels: + name: hawkular-openshift-agent + metrics-infra: agent + namespace: {{openshift_metrics_hawkular_agent_namespace}} +spec: + selector: + matchLabels: + name: hawkular-openshift-agent + template: + metadata: + labels: + name: hawkular-openshift-agent + metrics-infra: agent + spec: + serviceAccount: hawkular-openshift-agent +{% if node_selector is iterable and node_selector | length > 0 %} + nodeSelector: +{% for key, value in node_selector.iteritems() %} + {{key}}: "{{value}}" +{% endfor %} +{% endif %} + containers: + - image: {{openshift_metrics_image_prefix}}metrics-hawkular-openshift-agent:{{openshift_metrics_image_version}} + imagePullPolicy: Always + name: hawkular-openshift-agent +{% if ((openshift_metrics_hawkular_agent_limits_cpu is defined and openshift_metrics_hawkular_agent_limits_cpu is not none) + or (openshift_metrics_hawkular_agent_limits_memory is defined and openshift_metrics_hawkular_agent_limits_memory is not none) + or (openshift_metrics_hawkular_agent_requests_cpu is defined and openshift_metrics_hawkular_agent_requests_cpu is not none) + or (openshift_metrics_hawkular_agent_requests_memory is defined and openshift_metrics_hawkular_agent_requests_memory is not none)) +%} + resources: +{% if (openshift_metrics_hawkular_agent_limits_cpu is not none + or openshift_metrics_hawkular_agent_limits_memory is not none) +%} + limits: +{% if openshift_metrics_hawkular_agent_limits_cpu is not none %} + cpu: "{{openshift_metrics_hawkular_agent_limits_cpu}}" +{% endif %} +{% if openshift_metrics_hawkular_agent_limits_memory is not none %} + memory: "{{openshift_metrics_hawkular_agent_limits_memory}}" +{% endif %} +{% endif %} +{% if (openshift_metrics_hawkular_agent_requests_cpu is not none + or openshift_metrics_hawkular_agent_requests_memory is not none) +%} + requests: +{% if openshift_metrics_hawkular_agent_requests_cpu is not none %} + cpu: "{{openshift_metrics_hawkular_agent_requests_cpu}}" +{% endif %} +{% if openshift_metrics_hawkular_agent_requests_memory is not none %} + memory: "{{openshift_metrics_hawkular_agent_requests_memory}}" +{% endif %} +{% endif %} +{% endif %} + + livenessProbe: + httpGet: + scheme: HTTP + path: /health + port: 8080 + initialDelaySeconds: 30 + periodSeconds: 30 + command: + - "hawkular-openshift-agent" + - "-config" + - "/hawkular-openshift-agent-configuration/config.yaml" + - "-v" + - "3" + env: + - name: K8S_POD_NAMESPACE + valueFrom: + fieldRef: + fieldPath: metadata.namespace + - name: K8S_POD_NAME + valueFrom: + fieldRef: + fieldPath: metadata.name + volumeMounts: + - name: hawkular-openshift-agent-configuration + mountPath: "/hawkular-openshift-agent-configuration" + volumes: + - name: hawkular-openshift-agent-configuration + configMap: + name: hawkular-openshift-agent-configuration + - name: hawkular-openshift-agent + configMap: + name: hawkular-openshift-agent-configuration diff --git a/roles/openshift_metrics/templates/hawkular_openshift_agent_role.j2 b/roles/openshift_metrics/templates/hawkular_openshift_agent_role.j2 new file mode 100644 index 000000000..24b8cd801 --- /dev/null +++ b/roles/openshift_metrics/templates/hawkular_openshift_agent_role.j2 @@ -0,0 +1,25 @@ +apiVersion: v1 +kind: ClusterRole +metadata: + name: hawkular-openshift-agent + labels: + metrics-infra: agent +rules: +- apiGroups: + - "" + resources: + - configmaps + - namespaces + - nodes + - pods + - projects + verbs: + - get + - list + - watch +- apiGroups: + - "" + resources: + - secrets + verbs: + - get diff --git a/roles/openshift_metrics/templates/hawkular_openshift_agent_sa.j2 b/roles/openshift_metrics/templates/hawkular_openshift_agent_sa.j2 new file mode 100644 index 000000000..ec604d73c --- /dev/null +++ b/roles/openshift_metrics/templates/hawkular_openshift_agent_sa.j2 @@ -0,0 +1,7 @@ +apiVersion: v1 +kind: ServiceAccount +metadata: + name: hawkular-openshift-agent + labels: + metrics-infra: agent + namespace: {{openshift_metrics_hawkular_agent_namespace}} diff --git a/roles/openshift_repos/tasks/main.yaml b/roles/openshift_repos/tasks/main.yaml index 023b1a9b7..8f8550e2d 100644 --- a/roles/openshift_repos/tasks/main.yaml +++ b/roles/openshift_repos/tasks/main.yaml @@ -4,7 +4,8 @@ path: /run/ostree-booted register: ostree_booted -- block: +- when: not ostree_booted.stat.exists + block: - name: Ensure libselinux-python is installed package: name=libselinux-python state=present @@ -24,41 +25,40 @@ - openshift_additional_repos | length == 0 notify: refresh cache - # Note: OpenShift repositories under CentOS may be shipped through the - # "centos-release-openshift-origin" package which configures the repository. - # This task matches the file names provided by the package so that they are - # not installed twice in different files and remains idempotent. - - name: Configure origin gpg keys if needed - copy: - src: "{{ item.src }}" - dest: "{{ item.dest }}" - with_items: - - src: origin/gpg_keys/openshift-ansible-CentOS-SIG-PaaS - dest: /etc/pki/rpm-gpg/RPM-GPG-KEY-CentOS-SIG-PaaS - - src: origin/repos/openshift-ansible-centos-paas-sig.repo - dest: /etc/yum.repos.d/CentOS-OpenShift-Origin.repo - notify: refresh cache - when: - - ansible_os_family == "RedHat" - - ansible_distribution != "Fedora" - - openshift_deployment_type == 'origin' - - openshift_enable_origin_repo | default(true) | bool - # Singleton block - - when: r_osr_first_run | default(true) + - when: r_openshift_repos_has_run is not defined block: + + # Note: OpenShift repositories under CentOS may be shipped through the + # "centos-release-openshift-origin" package which configures the repository. + # This task matches the file names provided by the package so that they are + # not installed twice in different files and remains idempotent. + - name: Configure origin gpg keys if needed + copy: + src: "{{ item.src }}" + dest: "{{ item.dest }}" + with_items: + - src: origin/gpg_keys/openshift-ansible-CentOS-SIG-PaaS + dest: /etc/pki/rpm-gpg/RPM-GPG-KEY-CentOS-SIG-PaaS + - src: origin/repos/openshift-ansible-centos-paas-sig.repo + dest: /etc/yum.repos.d/CentOS-OpenShift-Origin.repo + notify: refresh cache + when: + - ansible_os_family == "RedHat" + - ansible_distribution != "Fedora" + - openshift_deployment_type == 'origin' + - openshift_enable_origin_repo | default(true) | bool + - name: Ensure clean repo cache in the event repos have been changed manually debug: msg: "First run of openshift_repos" changed_when: true notify: refresh cache - - name: Set fact r_osr_first_run false + - name: Record that openshift_repos already ran set_fact: - r_osr_first_run: false + r_openshift_repos_has_run: True # Force running ALL handlers now, because we expect repo cache to be cleared # if changes have been made. - meta: flush_handlers - - when: not ostree_booted.stat.exists |