summaryrefslogtreecommitdiffstats
path: root/roles/openshift_health_checker
diff options
context:
space:
mode:
Diffstat (limited to 'roles/openshift_health_checker')
-rw-r--r--roles/openshift_health_checker/openshift_checks/docker_storage.py110
-rw-r--r--roles/openshift_health_checker/openshift_checks/docker_storage_driver.py50
-rw-r--r--roles/openshift_health_checker/test/docker_storage_driver_test.py81
-rw-r--r--roles/openshift_health_checker/test/docker_storage_test.py243
4 files changed, 484 insertions, 0 deletions
diff --git a/roles/openshift_health_checker/openshift_checks/docker_storage.py b/roles/openshift_health_checker/openshift_checks/docker_storage.py
new file mode 100644
index 000000000..2dfe10a02
--- /dev/null
+++ b/roles/openshift_health_checker/openshift_checks/docker_storage.py
@@ -0,0 +1,110 @@
+# pylint: disable=missing-docstring
+import json
+
+from openshift_checks import OpenShiftCheck, OpenShiftCheckException, get_var
+
+
+class DockerStorage(OpenShiftCheck):
+ """Check Docker storage sanity.
+
+ Check for thinpool usage during a containerized installation
+ """
+
+ name = "docker_storage"
+ tags = ["preflight"]
+
+ max_thinpool_data_usage_percent = 90.0
+ max_thinpool_meta_usage_percent = 90.0
+
+ @classmethod
+ def is_active(cls, task_vars):
+ """Only run on hosts that depend on Docker."""
+ is_containerized = get_var(task_vars, "openshift", "common", "is_containerized")
+ is_node = "nodes" in get_var(task_vars, "group_names", default=[])
+ return (super(DockerStorage, cls).is_active(task_vars) and is_containerized) or is_node
+
+ def run(self, tmp, task_vars):
+ try:
+ self.max_thinpool_data_usage_percent = float(get_var(task_vars, "max_thinpool_data_usage_percent",
+ default=self.max_thinpool_data_usage_percent))
+ self.max_thinpool_meta_usage_percent = float(get_var(task_vars, "max_thinpool_metadata_usage_percent",
+ default=self.max_thinpool_meta_usage_percent))
+ except ValueError as err:
+ return {
+ "failed": True,
+ "msg": "Unable to convert thinpool data usage limit to float: {}".format(str(err))
+ }
+
+ err_msg = self.check_thinpool_usage(task_vars)
+ if err_msg:
+ return {"failed": True, "msg": err_msg}
+
+ return {}
+
+ def check_thinpool_usage(self, task_vars):
+ lvs = self.get_lvs_data(task_vars)
+ lv_data = self.extract_thinpool_obj(lvs)
+
+ data_percent = self.get_thinpool_data_usage(lv_data)
+ metadata_percent = self.get_thinpool_metadata_usage(lv_data)
+
+ if data_percent > self.max_thinpool_data_usage_percent:
+ msg = "thinpool data usage above maximum threshold of {threshold}%"
+ return msg.format(threshold=self.max_thinpool_data_usage_percent)
+
+ if metadata_percent > self.max_thinpool_meta_usage_percent:
+ msg = "thinpool metadata usage above maximum threshold of {threshold}%"
+ return msg.format(threshold=self.max_thinpool_meta_usage_percent)
+
+ return ""
+
+ def get_lvs_data(self, task_vars):
+ lvs_cmd = "/sbin/lvs --select vg_name=docker --select lv_name=docker-pool --report-format json"
+ result = self.exec_cmd(lvs_cmd, task_vars)
+
+ if result.get("failed", False):
+ msg = "no thinpool usage data returned by the host: {}"
+ raise OpenShiftCheckException(msg.format(result.get("msg", "")))
+
+ try:
+ data_json = json.loads(result.get("stdout", ""))
+ except ValueError as err:
+ raise OpenShiftCheckException("Invalid JSON value returned by lvs command: {}".format(str(err)))
+
+ data = data_json.get("report")
+ if not data:
+ raise OpenShiftCheckException("no thinpool usage data returned by the host.")
+
+ return data
+
+ @staticmethod
+ def get_thinpool_data_usage(thinpool_lv_data):
+ data = thinpool_lv_data.get("data_percent")
+ if not data:
+ raise OpenShiftCheckException("no thinpool usage data returned by the host.")
+
+ return float(data)
+
+ @staticmethod
+ def get_thinpool_metadata_usage(thinpool_lv_data):
+ data = thinpool_lv_data.get("metadata_percent")
+ if not data:
+ raise OpenShiftCheckException("no thinpool usage data returned by the host.")
+
+ return float(data)
+
+ @staticmethod
+ def extract_thinpool_obj(thinpool_data):
+ if not thinpool_data or not thinpool_data[0]:
+ raise OpenShiftCheckException("no thinpool usage data returned by the host.")
+
+ lv_data = thinpool_data[0].get("lv")
+ if not lv_data or not lv_data[0]:
+ raise OpenShiftCheckException("no thinpool usage data returned by the host.")
+
+ return lv_data[0]
+
+ def exec_cmd(self, cmd_str, task_vars):
+ return self.execute_module("command", {
+ "_raw_params": cmd_str,
+ }, task_vars)
diff --git a/roles/openshift_health_checker/openshift_checks/docker_storage_driver.py b/roles/openshift_health_checker/openshift_checks/docker_storage_driver.py
new file mode 100644
index 000000000..94ea7ba9c
--- /dev/null
+++ b/roles/openshift_health_checker/openshift_checks/docker_storage_driver.py
@@ -0,0 +1,50 @@
+# pylint: disable=missing-docstring
+from openshift_checks import OpenShiftCheck, get_var
+
+
+class DockerStorageDriver(OpenShiftCheck):
+ """Check Docker storage driver compatibility.
+
+ This check ensures that Docker is using a supported storage driver,
+ and that Loopback is not being used (if using devicemapper).
+ """
+
+ name = "docker_storage_driver"
+ tags = ["preflight"]
+
+ storage_drivers = ["devicemapper", "overlay2"]
+
+ @classmethod
+ def is_active(cls, task_vars):
+ """Skip non-containerized installations."""
+ is_containerized = get_var(task_vars, "openshift", "common", "is_containerized")
+ return super(DockerStorageDriver, cls).is_active(task_vars) and is_containerized
+
+ def run(self, tmp, task_vars):
+ info = self.execute_module("docker_info", {}, task_vars).get("info", {})
+
+ if not self.is_supported_storage_driver(info):
+ msg = "Unsupported Docker storage driver detected. Supported storage drivers: {drivers}"
+ return {"failed": True, "msg": msg.format(drivers=', '.join(self.storage_drivers))}
+
+ if self.is_using_loopback_device(info):
+ msg = "Use of loopback devices is discouraged. Try running Docker with `--storage-opt dm.thinpooldev`"
+ return {"failed": True, "msg": msg}
+
+ return {}
+
+ def is_supported_storage_driver(self, docker_info):
+ return docker_info.get("Driver", "") in self.storage_drivers
+
+ @staticmethod
+ def is_using_loopback_device(docker_info):
+ # Loopback device usage is only an issue if using devicemapper.
+ # Skip this check if using any other storage driver.
+ if docker_info.get("Driver", "") != "devicemapper":
+ return False
+
+ for status in docker_info.get("DriverStatus", []):
+ if status[0] == "Data loop file":
+ return bool(status[1])
+
+ return False
diff --git a/roles/openshift_health_checker/test/docker_storage_driver_test.py b/roles/openshift_health_checker/test/docker_storage_driver_test.py
new file mode 100644
index 000000000..34a8f827a
--- /dev/null
+++ b/roles/openshift_health_checker/test/docker_storage_driver_test.py
@@ -0,0 +1,81 @@
+import pytest
+
+
+from openshift_checks.docker_storage_driver import DockerStorageDriver
+
+
+@pytest.mark.parametrize('is_containerized,is_active', [
+ (False, False),
+ (True, True),
+])
+def test_is_active(is_containerized, is_active):
+ task_vars = dict(
+ openshift=dict(common=dict(is_containerized=is_containerized)),
+ )
+ assert DockerStorageDriver.is_active(task_vars=task_vars) == is_active
+
+
+@pytest.mark.parametrize('info,failed,extra_words', [
+ (
+ {
+ "Driver": "devicemapper",
+ "DriverStatus": [("Pool Name", "docker-docker--pool")],
+ },
+ False,
+ [],
+ ),
+ (
+ {
+ "Driver": "devicemapper",
+ "DriverStatus": [("Data loop file", "true")],
+ },
+ True,
+ ["Use of loopback devices is discouraged"],
+ ),
+ (
+ {
+ "Driver": "overlay2",
+ "DriverStatus": []
+ },
+ False,
+ [],
+ ),
+ (
+ {
+ "Driver": "overlay",
+ },
+ True,
+ ["Unsupported Docker storage driver"],
+ ),
+ (
+ {
+ "Driver": "unsupported",
+ },
+ True,
+ ["Unsupported Docker storage driver"],
+ ),
+])
+def test_check_storage_driver(info, failed, extra_words):
+ def execute_module(module_name, args, tmp=None, task_vars=None):
+ if module_name != "docker_info":
+ return {
+ "changed": False,
+ }
+
+ return {
+ "info": info
+ }
+
+ task_vars = dict(
+ openshift=dict(common=dict(is_containerized=True))
+ )
+
+ check = DockerStorageDriver(execute_module=execute_module).run(tmp=None, task_vars=task_vars)
+
+ if failed:
+ assert check["failed"]
+ else:
+ assert not check.get("failed", False)
+
+ for word in extra_words:
+ assert word in check["msg"]
diff --git a/roles/openshift_health_checker/test/docker_storage_test.py b/roles/openshift_health_checker/test/docker_storage_test.py
new file mode 100644
index 000000000..73c433383
--- /dev/null
+++ b/roles/openshift_health_checker/test/docker_storage_test.py
@@ -0,0 +1,243 @@
+import pytest
+import json
+
+
+from openshift_checks.docker_storage import DockerStorage, OpenShiftCheckException
+
+
+@pytest.mark.parametrize('is_containerized,is_active', [
+ (False, False),
+ (True, True),
+])
+def test_is_active(is_containerized, is_active):
+ task_vars = dict(
+ openshift=dict(common=dict(is_containerized=is_containerized)),
+ )
+ assert DockerStorage.is_active(task_vars=task_vars) == is_active
+
+
+@pytest.mark.parametrize('stdout,message,failed,extra_words', [
+ (None, "", True, ["no thinpool usage data"]),
+ ("", "", False, ["Invalid JSON value returned by lvs command"]),
+ (None, "invalid response", True, ["invalid response"]),
+ ("invalid", "invalid response", False, ["Invalid JSON value"]),
+])
+def test_get_lvs_data_with_failed_response(stdout, message, failed, extra_words):
+ def execute_module(module_name, args, tmp=None, task_vars=None):
+ if module_name != "command":
+ return {
+ "changed": False,
+ }
+
+ response = {
+ "stdout": stdout,
+ "msg": message,
+ "failed": failed,
+ }
+
+ if stdout is None:
+ response.pop("stdout")
+
+ return response
+
+ task_vars = dict(
+ max_thinpool_data_usage_percent=90.0
+ )
+
+ check = DockerStorage(execute_module=execute_module)
+ with pytest.raises(OpenShiftCheckException) as excinfo:
+ check.run(tmp=None, task_vars=task_vars)
+
+ for word in extra_words:
+ assert word in str(excinfo.value)
+
+
+@pytest.mark.parametrize('limit_percent,failed,extra_words', [
+ ("90.0", False, []),
+ (80.0, False, []),
+ ("invalid percent", True, ["Unable to convert", "to float", "invalid percent"]),
+ ("90%", True, ["Unable to convert", "to float", "90%"]),
+])
+def test_invalid_value_for_thinpool_usage_limit(limit_percent, failed, extra_words):
+ def execute_module(module_name, args, tmp=None, task_vars=None):
+ if module_name != "command":
+ return {
+ "changed": False,
+ }
+
+ return {
+ "stdout": json.dumps({
+ "report": [
+ {
+ "lv": [
+ {"lv_name": "docker-pool", "vg_name": "docker", "lv_attr": "twi-aot---", "lv_size": "6.95g",
+ "pool_lv": "", "origin": "", "data_percent": "58.96", "metadata_percent": "4.77",
+ "move_pv": "", "mirror_log": "", "copy_percent": "", "convert_lv": ""},
+ ]
+ }
+ ]
+ }),
+ "failed": False,
+ }
+
+ task_vars = dict(
+ max_thinpool_data_usage_percent=limit_percent
+ )
+
+ check = DockerStorage(execute_module=execute_module).run(tmp=None, task_vars=task_vars)
+
+ if failed:
+ assert check["failed"]
+
+ for word in extra_words:
+ assert word in check["msg"]
+ else:
+ assert not check.get("failed", False)
+
+
+def test_get_lvs_data_with_valid_response():
+ def execute_module(module_name, args, tmp=None, task_vars=None):
+ if module_name != "command":
+ return {
+ "changed": False,
+ }
+
+ return {
+ "stdout": json.dumps({
+ "report": [
+ {
+ "lv": [
+ {"lv_name": "docker-pool", "vg_name": "docker", "lv_attr": "twi-aot---", "lv_size": "6.95g",
+ "pool_lv": "", "origin": "", "data_percent": "58.96", "metadata_percent": "4.77",
+ "move_pv": "", "mirror_log": "", "copy_percent": "", "convert_lv": ""}
+ ]
+ }
+ ]
+ })
+ }
+
+ task_vars = dict(
+ max_thinpool_data_usage_percent="90"
+ )
+
+ check = DockerStorage(execute_module=execute_module).run(tmp=None, task_vars=task_vars)
+ assert not check.get("failed", False)
+
+
+@pytest.mark.parametrize('response,extra_words', [
+ (
+ {
+ "report": [{}],
+ },
+ ["no thinpool usage data"],
+ ),
+ (
+ {
+ "report": [
+ {
+ "lv": [
+ {"vg_name": "docker", "lv_attr": "twi-aot---", "lv_size": "6.95g",
+ "move_pv": "", "mirror_log": "", "copy_percent": "", "convert_lv": ""}
+ ]
+ }
+ ],
+ },
+ ["no thinpool usage data"],
+ ),
+ (
+ {
+ "report": [
+ {
+ "lv": [],
+ }
+ ],
+ },
+ ["no thinpool usage data"],
+ ),
+ (
+ {
+ "report": [
+ {
+ "lv": [
+ {"lv_name": "docker-pool", "vg_name": "docker", "lv_attr": "twi-aot---", "lv_size": "6.95g",
+ "pool_lv": "", "origin": "", "data_percent": "58.96",
+ "move_pv": "", "mirror_log": "", "copy_percent": "", "convert_lv": ""}
+ ]
+ }
+ ],
+ },
+ ["no thinpool usage data"],
+ ),
+])
+def test_get_lvs_data_with_incomplete_response(response, extra_words):
+ def execute_module(module_name, args, tmp=None, task_vars=None):
+ if module_name != "command":
+ return {
+ "changed": False,
+ }
+
+ return {
+ "stdout": json.dumps(response)
+ }
+
+ task_vars = dict(
+ max_thinpool_data_usage_percent=90.0
+ )
+
+ check = DockerStorage(execute_module=execute_module)
+ with pytest.raises(OpenShiftCheckException) as excinfo:
+ check.run(tmp=None, task_vars=task_vars)
+
+ assert "no thinpool usage data" in str(excinfo.value)
+
+
+@pytest.mark.parametrize('response,extra_words', [
+ (
+ {
+ "report": [
+ {
+ "lv": [
+ {"lv_name": "docker-pool", "vg_name": "docker", "lv_attr": "twi-aot---", "lv_size": "6.95g",
+ "pool_lv": "", "origin": "", "data_percent": "100.0", "metadata_percent": "90.0",
+ "move_pv": "", "mirror_log": "", "copy_percent": "", "convert_lv": ""}
+ ]
+ }
+ ],
+ },
+ ["thinpool data usage above maximum threshold"],
+ ),
+ (
+ {
+ "report": [
+ {
+ "lv": [
+ {"lv_name": "docker-pool", "vg_name": "docker", "lv_attr": "twi-aot---", "lv_size": "6.95g",
+ "pool_lv": "", "origin": "", "data_percent": "10.0", "metadata_percent": "91.0",
+ "move_pv": "", "mirror_log": "", "copy_percent": "", "convert_lv": ""}
+ ]
+ }
+ ],
+ },
+ ["thinpool metadata usage above maximum threshold"],
+ ),
+])
+def test_get_lvs_data_with_high_thinpool_usage(response, extra_words):
+ def execute_module(module_name, args, tmp=None, task_vars=None):
+ if module_name != "command":
+ return {
+ "changed": False,
+ }
+
+ return {
+ "stdout": json.dumps(response),
+ }
+
+ task_vars = dict(
+ max_thinpool_data_usage_percent="90"
+ )
+
+ check = DockerStorage(execute_module=execute_module).run(tmp=None, task_vars=task_vars)
+
+ assert check["failed"]
+ for word in extra_words:
+ assert word in check["msg"]