summaryrefslogtreecommitdiffstats
path: root/roles/openshift_health_checker
diff options
context:
space:
mode:
Diffstat (limited to 'roles/openshift_health_checker')
-rw-r--r--roles/openshift_health_checker/openshift_checks/etcd_volume.py58
-rw-r--r--roles/openshift_health_checker/test/etcd_volume_test.py149
2 files changed, 207 insertions, 0 deletions
diff --git a/roles/openshift_health_checker/openshift_checks/etcd_volume.py b/roles/openshift_health_checker/openshift_checks/etcd_volume.py
new file mode 100644
index 000000000..cbdf70092
--- /dev/null
+++ b/roles/openshift_health_checker/openshift_checks/etcd_volume.py
@@ -0,0 +1,58 @@
+# vim: expandtab:tabstop=4:shiftwidth=4
+"""
+Ansible module for warning about etcd volume size past a defined threshold.
+"""
+
+from openshift_checks import OpenShiftCheck, OpenShiftCheckException, get_var
+
+
+class EtcdVolume(OpenShiftCheck):
+ """Ensure disk size for an etcd host does not exceed a defined limit"""
+
+ name = "etcd_volume"
+ tags = ["etcd", "health"]
+
+ etcd_default_size_limit_percent = 0.9
+
+ def run(self, tmp, task_vars):
+ ansible_mounts = get_var(task_vars, "ansible_mounts")
+
+ etcd_mount_path = self._get_etcd_mount_path(ansible_mounts)
+ etcd_disk_size_available = int(etcd_mount_path["size_available"])
+ etcd_disk_size_total = int(etcd_mount_path["size_total"])
+ etcd_disk_size_used = etcd_disk_size_total - etcd_disk_size_available
+
+ size_limit_percent = get_var(
+ task_vars,
+ "etcd_disk_size_limit_percent",
+ default=self.etcd_default_size_limit_percent
+ )
+
+ if float(etcd_disk_size_used) / float(etcd_disk_size_total) > size_limit_percent:
+ msg = ("Current etcd volume usage ({actual:.2f} GB) for the volume \"{volume}\" "
+ "is greater than the storage limit ({limit:.2f} GB).")
+ msg = msg.format(
+ actual=self._to_gigabytes(etcd_disk_size_used),
+ volume=etcd_mount_path["mount"],
+ limit=self._to_gigabytes(size_limit_percent * etcd_disk_size_total),
+ )
+ return {"failed": True, "msg": msg}
+
+ return {"changed": False}
+
+ @staticmethod
+ def _get_etcd_mount_path(ansible_mounts):
+ supported_mnt_paths = ["/var/lib/etcd", "/var/lib", "/var", "/"]
+ available_mnts = {mnt.get("mount"): mnt for mnt in ansible_mounts}
+
+ for path in supported_mnt_paths:
+ if path in available_mnts:
+ return available_mnts[path]
+
+ paths = ', '.join(sorted(available_mnts)) or 'none'
+ msg = "Unable to determine available disk space. Paths mounted: {}.".format(paths)
+ raise OpenShiftCheckException(msg)
+
+ @staticmethod
+ def _to_gigabytes(byte_size):
+ return float(byte_size) / 10.0**9
diff --git a/roles/openshift_health_checker/test/etcd_volume_test.py b/roles/openshift_health_checker/test/etcd_volume_test.py
new file mode 100644
index 000000000..ff8d0d8d7
--- /dev/null
+++ b/roles/openshift_health_checker/test/etcd_volume_test.py
@@ -0,0 +1,149 @@
+import pytest
+
+from openshift_checks.etcd_volume import EtcdVolume, OpenShiftCheckException
+
+
+@pytest.mark.parametrize('ansible_mounts,extra_words', [
+ ([], ['none']), # empty ansible_mounts
+ ([{'mount': '/mnt'}], ['/mnt']), # missing relevant mount paths
+])
+def test_cannot_determine_available_disk(ansible_mounts, extra_words):
+ task_vars = dict(
+ ansible_mounts=ansible_mounts,
+ )
+ check = EtcdVolume(execute_module=fake_execute_module)
+
+ with pytest.raises(OpenShiftCheckException) as excinfo:
+ check.run(tmp=None, task_vars=task_vars)
+
+ for word in 'determine available disk'.split() + extra_words:
+ assert word in str(excinfo.value)
+
+
+@pytest.mark.parametrize('size_limit,ansible_mounts', [
+ (
+ # if no size limit is specified, expect max usage
+ # limit to default to 90% of size_total
+ None,
+ [{
+ 'mount': '/',
+ 'size_available': 40 * 10**9,
+ 'size_total': 80 * 10**9
+ }],
+ ),
+ (
+ 1,
+ [{
+ 'mount': '/',
+ 'size_available': 30 * 10**9,
+ 'size_total': 30 * 10**9,
+ }],
+ ),
+ (
+ 20000000000,
+ [{
+ 'mount': '/',
+ 'size_available': 20 * 10**9,
+ 'size_total': 40 * 10**9,
+ }],
+ ),
+ (
+ 5000000000,
+ [{
+ # not enough space on / ...
+ 'mount': '/',
+ 'size_available': 0,
+ 'size_total': 0,
+ }, {
+ # not enough space on /var/lib ...
+ 'mount': '/var/lib',
+ 'size_available': 2 * 10**9,
+ 'size_total': 21 * 10**9,
+ }, {
+ # ... but enough on /var/lib/etcd
+ 'mount': '/var/lib/etcd',
+ 'size_available': 36 * 10**9,
+ 'size_total': 40 * 10**9
+ }],
+ )
+])
+def test_succeeds_with_recommended_disk_space(size_limit, ansible_mounts):
+ task_vars = dict(
+ etcd_disk_size_limit_percent=size_limit,
+ ansible_mounts=ansible_mounts,
+ )
+
+ if task_vars["etcd_disk_size_limit_percent"] is None:
+ task_vars.pop("etcd_disk_size_limit_percent")
+
+ check = EtcdVolume(execute_module=fake_execute_module)
+ result = check.run(tmp=None, task_vars=task_vars)
+
+ assert not result.get('failed', False)
+
+
+@pytest.mark.parametrize('size_limit_percent,ansible_mounts,extra_words', [
+ (
+ # if no size limit is specified, expect max usage
+ # limit to default to 90% of size_total
+ None,
+ [{
+ 'mount': '/',
+ 'size_available': 1 * 10**9,
+ 'size_total': 100 * 10**9,
+ }],
+ ['90.00 GB'],
+ ),
+ (
+ 0.7,
+ [{
+ 'mount': '/',
+ 'size_available': 1 * 10**6,
+ 'size_total': 5 * 10**9,
+ }],
+ ['3.50 GB'],
+ ),
+ (
+ 0.4,
+ [{
+ 'mount': '/',
+ 'size_available': 2 * 10**9,
+ 'size_total': 6 * 10**9,
+ }],
+ ['2.40 GB'],
+ ),
+ (
+ None,
+ [{
+ # enough space on /var ...
+ 'mount': '/var',
+ 'size_available': 20 * 10**9,
+ 'size_total': 20 * 10**9,
+ }, {
+ # .. but not enough on /var/lib
+ 'mount': '/var/lib',
+ 'size_available': 1 * 10**9,
+ 'size_total': 20 * 10**9,
+ }],
+ ['18.00 GB'],
+ ),
+])
+def test_fails_with_insufficient_disk_space(size_limit_percent, ansible_mounts, extra_words):
+ task_vars = dict(
+ etcd_disk_size_limit_percent=size_limit_percent,
+ ansible_mounts=ansible_mounts,
+ )
+
+ if task_vars["etcd_disk_size_limit_percent"] is None:
+ task_vars.pop("etcd_disk_size_limit_percent")
+
+ check = EtcdVolume(execute_module=fake_execute_module)
+ result = check.run(tmp=None, task_vars=task_vars)
+
+ assert result['failed']
+ for word in extra_words:
+ assert word in result['msg']
+
+
+def fake_execute_module(*args):
+ raise AssertionError('this function should not be called')