diff options
Diffstat (limited to 'roles/openshift_health_checker')
4 files changed, 484 insertions, 0 deletions
diff --git a/roles/openshift_health_checker/openshift_checks/docker_storage.py b/roles/openshift_health_checker/openshift_checks/docker_storage.py new file mode 100644 index 000000000..2dfe10a02 --- /dev/null +++ b/roles/openshift_health_checker/openshift_checks/docker_storage.py @@ -0,0 +1,110 @@ +# pylint: disable=missing-docstring +import json + +from openshift_checks import OpenShiftCheck, OpenShiftCheckException, get_var + + +class DockerStorage(OpenShiftCheck): + """Check Docker storage sanity. + + Check for thinpool usage during a containerized installation + """ + + name = "docker_storage" + tags = ["preflight"] + + max_thinpool_data_usage_percent = 90.0 + max_thinpool_meta_usage_percent = 90.0 + + @classmethod + def is_active(cls, task_vars): + """Only run on hosts that depend on Docker.""" + is_containerized = get_var(task_vars, "openshift", "common", "is_containerized") + is_node = "nodes" in get_var(task_vars, "group_names", default=[]) + return (super(DockerStorage, cls).is_active(task_vars) and is_containerized) or is_node + + def run(self, tmp, task_vars): + try: + self.max_thinpool_data_usage_percent = float(get_var(task_vars, "max_thinpool_data_usage_percent", + default=self.max_thinpool_data_usage_percent)) + self.max_thinpool_meta_usage_percent = float(get_var(task_vars, "max_thinpool_metadata_usage_percent", + default=self.max_thinpool_meta_usage_percent)) + except ValueError as err: + return { + "failed": True, + "msg": "Unable to convert thinpool data usage limit to float: {}".format(str(err)) + } + + err_msg = self.check_thinpool_usage(task_vars) + if err_msg: + return {"failed": True, "msg": err_msg} + + return {} + + def check_thinpool_usage(self, task_vars): + lvs = self.get_lvs_data(task_vars) + lv_data = self.extract_thinpool_obj(lvs) + + data_percent = self.get_thinpool_data_usage(lv_data) + metadata_percent = self.get_thinpool_metadata_usage(lv_data) + + if data_percent > self.max_thinpool_data_usage_percent: + msg = "thinpool data usage above maximum threshold of {threshold}%" + return msg.format(threshold=self.max_thinpool_data_usage_percent) + + if metadata_percent > self.max_thinpool_meta_usage_percent: + msg = "thinpool metadata usage above maximum threshold of {threshold}%" + return msg.format(threshold=self.max_thinpool_meta_usage_percent) + + return "" + + def get_lvs_data(self, task_vars): + lvs_cmd = "/sbin/lvs --select vg_name=docker --select lv_name=docker-pool --report-format json" + result = self.exec_cmd(lvs_cmd, task_vars) + + if result.get("failed", False): + msg = "no thinpool usage data returned by the host: {}" + raise OpenShiftCheckException(msg.format(result.get("msg", ""))) + + try: + data_json = json.loads(result.get("stdout", "")) + except ValueError as err: + raise OpenShiftCheckException("Invalid JSON value returned by lvs command: {}".format(str(err))) + + data = data_json.get("report") + if not data: + raise OpenShiftCheckException("no thinpool usage data returned by the host.") + + return data + + @staticmethod + def get_thinpool_data_usage(thinpool_lv_data): + data = thinpool_lv_data.get("data_percent") + if not data: + raise OpenShiftCheckException("no thinpool usage data returned by the host.") + + return float(data) + + @staticmethod + def get_thinpool_metadata_usage(thinpool_lv_data): + data = thinpool_lv_data.get("metadata_percent") + if not data: + raise OpenShiftCheckException("no thinpool usage data returned by the host.") + + return float(data) + + @staticmethod + def extract_thinpool_obj(thinpool_data): + if not thinpool_data or not thinpool_data[0]: + raise OpenShiftCheckException("no thinpool usage data returned by the host.") + + lv_data = thinpool_data[0].get("lv") + if not lv_data or not lv_data[0]: + raise OpenShiftCheckException("no thinpool usage data returned by the host.") + + return lv_data[0] + + def exec_cmd(self, cmd_str, task_vars): + return self.execute_module("command", { + "_raw_params": cmd_str, + }, task_vars) diff --git a/roles/openshift_health_checker/openshift_checks/docker_storage_driver.py b/roles/openshift_health_checker/openshift_checks/docker_storage_driver.py new file mode 100644 index 000000000..94ea7ba9c --- /dev/null +++ b/roles/openshift_health_checker/openshift_checks/docker_storage_driver.py @@ -0,0 +1,50 @@ +# pylint: disable=missing-docstring +from openshift_checks import OpenShiftCheck, get_var + + +class DockerStorageDriver(OpenShiftCheck): + """Check Docker storage driver compatibility. + + This check ensures that Docker is using a supported storage driver, + and that Loopback is not being used (if using devicemapper). + """ + + name = "docker_storage_driver" + tags = ["preflight"] + + storage_drivers = ["devicemapper", "overlay2"] + + @classmethod + def is_active(cls, task_vars): + """Skip non-containerized installations.""" + is_containerized = get_var(task_vars, "openshift", "common", "is_containerized") + return super(DockerStorageDriver, cls).is_active(task_vars) and is_containerized + + def run(self, tmp, task_vars): + info = self.execute_module("docker_info", {}, task_vars).get("info", {}) + + if not self.is_supported_storage_driver(info): + msg = "Unsupported Docker storage driver detected. Supported storage drivers: {drivers}" + return {"failed": True, "msg": msg.format(drivers=', '.join(self.storage_drivers))} + + if self.is_using_loopback_device(info): + msg = "Use of loopback devices is discouraged. Try running Docker with `--storage-opt dm.thinpooldev`" + return {"failed": True, "msg": msg} + + return {} + + def is_supported_storage_driver(self, docker_info): + return docker_info.get("Driver", "") in self.storage_drivers + + @staticmethod + def is_using_loopback_device(docker_info): + # Loopback device usage is only an issue if using devicemapper. + # Skip this check if using any other storage driver. + if docker_info.get("Driver", "") != "devicemapper": + return False + + for status in docker_info.get("DriverStatus", []): + if status[0] == "Data loop file": + return bool(status[1]) + + return False diff --git a/roles/openshift_health_checker/test/docker_storage_driver_test.py b/roles/openshift_health_checker/test/docker_storage_driver_test.py new file mode 100644 index 000000000..34a8f827a --- /dev/null +++ b/roles/openshift_health_checker/test/docker_storage_driver_test.py @@ -0,0 +1,81 @@ +import pytest + + +from openshift_checks.docker_storage_driver import DockerStorageDriver + + +@pytest.mark.parametrize('is_containerized,is_active', [ + (False, False), + (True, True), +]) +def test_is_active(is_containerized, is_active): + task_vars = dict( + openshift=dict(common=dict(is_containerized=is_containerized)), + ) + assert DockerStorageDriver.is_active(task_vars=task_vars) == is_active + + +@pytest.mark.parametrize('info,failed,extra_words', [ + ( + { + "Driver": "devicemapper", + "DriverStatus": [("Pool Name", "docker-docker--pool")], + }, + False, + [], + ), + ( + { + "Driver": "devicemapper", + "DriverStatus": [("Data loop file", "true")], + }, + True, + ["Use of loopback devices is discouraged"], + ), + ( + { + "Driver": "overlay2", + "DriverStatus": [] + }, + False, + [], + ), + ( + { + "Driver": "overlay", + }, + True, + ["Unsupported Docker storage driver"], + ), + ( + { + "Driver": "unsupported", + }, + True, + ["Unsupported Docker storage driver"], + ), +]) +def test_check_storage_driver(info, failed, extra_words): + def execute_module(module_name, args, tmp=None, task_vars=None): + if module_name != "docker_info": + return { + "changed": False, + } + + return { + "info": info + } + + task_vars = dict( + openshift=dict(common=dict(is_containerized=True)) + ) + + check = DockerStorageDriver(execute_module=execute_module).run(tmp=None, task_vars=task_vars) + + if failed: + assert check["failed"] + else: + assert not check.get("failed", False) + + for word in extra_words: + assert word in check["msg"] diff --git a/roles/openshift_health_checker/test/docker_storage_test.py b/roles/openshift_health_checker/test/docker_storage_test.py new file mode 100644 index 000000000..73c433383 --- /dev/null +++ b/roles/openshift_health_checker/test/docker_storage_test.py @@ -0,0 +1,243 @@ +import pytest +import json + + +from openshift_checks.docker_storage import DockerStorage, OpenShiftCheckException + + +@pytest.mark.parametrize('is_containerized,is_active', [ + (False, False), + (True, True), +]) +def test_is_active(is_containerized, is_active): + task_vars = dict( + openshift=dict(common=dict(is_containerized=is_containerized)), + ) + assert DockerStorage.is_active(task_vars=task_vars) == is_active + + +@pytest.mark.parametrize('stdout,message,failed,extra_words', [ + (None, "", True, ["no thinpool usage data"]), + ("", "", False, ["Invalid JSON value returned by lvs command"]), + (None, "invalid response", True, ["invalid response"]), + ("invalid", "invalid response", False, ["Invalid JSON value"]), +]) +def test_get_lvs_data_with_failed_response(stdout, message, failed, extra_words): + def execute_module(module_name, args, tmp=None, task_vars=None): + if module_name != "command": + return { + "changed": False, + } + + response = { + "stdout": stdout, + "msg": message, + "failed": failed, + } + + if stdout is None: + response.pop("stdout") + + return response + + task_vars = dict( + max_thinpool_data_usage_percent=90.0 + ) + + check = DockerStorage(execute_module=execute_module) + with pytest.raises(OpenShiftCheckException) as excinfo: + check.run(tmp=None, task_vars=task_vars) + + for word in extra_words: + assert word in str(excinfo.value) + + +@pytest.mark.parametrize('limit_percent,failed,extra_words', [ + ("90.0", False, []), + (80.0, False, []), + ("invalid percent", True, ["Unable to convert", "to float", "invalid percent"]), + ("90%", True, ["Unable to convert", "to float", "90%"]), +]) +def test_invalid_value_for_thinpool_usage_limit(limit_percent, failed, extra_words): + def execute_module(module_name, args, tmp=None, task_vars=None): + if module_name != "command": + return { + "changed": False, + } + + return { + "stdout": json.dumps({ + "report": [ + { + "lv": [ + {"lv_name": "docker-pool", "vg_name": "docker", "lv_attr": "twi-aot---", "lv_size": "6.95g", + "pool_lv": "", "origin": "", "data_percent": "58.96", "metadata_percent": "4.77", + "move_pv": "", "mirror_log": "", "copy_percent": "", "convert_lv": ""}, + ] + } + ] + }), + "failed": False, + } + + task_vars = dict( + max_thinpool_data_usage_percent=limit_percent + ) + + check = DockerStorage(execute_module=execute_module).run(tmp=None, task_vars=task_vars) + + if failed: + assert check["failed"] + + for word in extra_words: + assert word in check["msg"] + else: + assert not check.get("failed", False) + + +def test_get_lvs_data_with_valid_response(): + def execute_module(module_name, args, tmp=None, task_vars=None): + if module_name != "command": + return { + "changed": False, + } + + return { + "stdout": json.dumps({ + "report": [ + { + "lv": [ + {"lv_name": "docker-pool", "vg_name": "docker", "lv_attr": "twi-aot---", "lv_size": "6.95g", + "pool_lv": "", "origin": "", "data_percent": "58.96", "metadata_percent": "4.77", + "move_pv": "", "mirror_log": "", "copy_percent": "", "convert_lv": ""} + ] + } + ] + }) + } + + task_vars = dict( + max_thinpool_data_usage_percent="90" + ) + + check = DockerStorage(execute_module=execute_module).run(tmp=None, task_vars=task_vars) + assert not check.get("failed", False) + + +@pytest.mark.parametrize('response,extra_words', [ + ( + { + "report": [{}], + }, + ["no thinpool usage data"], + ), + ( + { + "report": [ + { + "lv": [ + {"vg_name": "docker", "lv_attr": "twi-aot---", "lv_size": "6.95g", + "move_pv": "", "mirror_log": "", "copy_percent": "", "convert_lv": ""} + ] + } + ], + }, + ["no thinpool usage data"], + ), + ( + { + "report": [ + { + "lv": [], + } + ], + }, + ["no thinpool usage data"], + ), + ( + { + "report": [ + { + "lv": [ + {"lv_name": "docker-pool", "vg_name": "docker", "lv_attr": "twi-aot---", "lv_size": "6.95g", + "pool_lv": "", "origin": "", "data_percent": "58.96", + "move_pv": "", "mirror_log": "", "copy_percent": "", "convert_lv": ""} + ] + } + ], + }, + ["no thinpool usage data"], + ), +]) +def test_get_lvs_data_with_incomplete_response(response, extra_words): + def execute_module(module_name, args, tmp=None, task_vars=None): + if module_name != "command": + return { + "changed": False, + } + + return { + "stdout": json.dumps(response) + } + + task_vars = dict( + max_thinpool_data_usage_percent=90.0 + ) + + check = DockerStorage(execute_module=execute_module) + with pytest.raises(OpenShiftCheckException) as excinfo: + check.run(tmp=None, task_vars=task_vars) + + assert "no thinpool usage data" in str(excinfo.value) + + +@pytest.mark.parametrize('response,extra_words', [ + ( + { + "report": [ + { + "lv": [ + {"lv_name": "docker-pool", "vg_name": "docker", "lv_attr": "twi-aot---", "lv_size": "6.95g", + "pool_lv": "", "origin": "", "data_percent": "100.0", "metadata_percent": "90.0", + "move_pv": "", "mirror_log": "", "copy_percent": "", "convert_lv": ""} + ] + } + ], + }, + ["thinpool data usage above maximum threshold"], + ), + ( + { + "report": [ + { + "lv": [ + {"lv_name": "docker-pool", "vg_name": "docker", "lv_attr": "twi-aot---", "lv_size": "6.95g", + "pool_lv": "", "origin": "", "data_percent": "10.0", "metadata_percent": "91.0", + "move_pv": "", "mirror_log": "", "copy_percent": "", "convert_lv": ""} + ] + } + ], + }, + ["thinpool metadata usage above maximum threshold"], + ), +]) +def test_get_lvs_data_with_high_thinpool_usage(response, extra_words): + def execute_module(module_name, args, tmp=None, task_vars=None): + if module_name != "command": + return { + "changed": False, + } + + return { + "stdout": json.dumps(response), + } + + task_vars = dict( + max_thinpool_data_usage_percent="90" + ) + + check = DockerStorage(execute_module=execute_module).run(tmp=None, task_vars=task_vars) + + assert check["failed"] + for word in extra_words: + assert word in check["msg"] |