From a1228a7c4acdb088fbf43c9a67b7eccf7ee67d07 Mon Sep 17 00:00:00 2001 From: Luke Meyer Date: Thu, 27 Apr 2017 13:09:08 -0400 Subject: health check playbooks: relocate and expand We are moving toward having adhoc post-install checks and so the "preflight" designation needs to be widened. Updated location to playbooks/byo/openshift-checks, added health check playbook, and updated README. Also included the certificate_expiry playbooks. Left behind symlinks and wrappers for existing checks. To conform with the direction of the rest of the repo, the openshift-checks playbooks are split into two directories, one under playbooks/common with the actual invocation and one under playbooks/byo for entrypoints that are just wrappers for the ones in common. Because the certificate_expiry playbooks are intended not just to be functional but to be examples that users modify, I did not split them similarly. That could happen later after discussion but for now I just left them whole under byo/openshift-checks/certificate_expiry. --- playbooks/byo/openshift-checks/README.md | 65 ++++++++++++++++++++++ .../certificate_expiry/default.yaml | 10 ++++ .../certificate_expiry/easy-mode-upload.yaml | 40 +++++++++++++ .../certificate_expiry/easy-mode.yaml | 18 ++++++ .../html_and_json_default_paths.yaml | 12 ++++ .../html_and_json_timestamp.yaml | 16 ++++++ .../longer-warning-period-json-results.yaml | 13 +++++ .../certificate_expiry/longer_warning_period.yaml | 12 ++++ .../byo/openshift-checks/certificate_expiry/roles | 1 + playbooks/byo/openshift-checks/health.yml | 3 + playbooks/byo/openshift-checks/pre-install.yml | 3 + playbooks/byo/openshift-preflight/README.md | 43 -------------- playbooks/byo/openshift-preflight/check.yml | 16 +----- playbooks/byo/openshift-preflight/roles | 1 - 14 files changed, 195 insertions(+), 58 deletions(-) create mode 100644 playbooks/byo/openshift-checks/README.md create mode 100644 playbooks/byo/openshift-checks/certificate_expiry/default.yaml create mode 100644 playbooks/byo/openshift-checks/certificate_expiry/easy-mode-upload.yaml create mode 100644 playbooks/byo/openshift-checks/certificate_expiry/easy-mode.yaml create mode 100644 playbooks/byo/openshift-checks/certificate_expiry/html_and_json_default_paths.yaml create mode 100644 playbooks/byo/openshift-checks/certificate_expiry/html_and_json_timestamp.yaml create mode 100644 playbooks/byo/openshift-checks/certificate_expiry/longer-warning-period-json-results.yaml create mode 100644 playbooks/byo/openshift-checks/certificate_expiry/longer_warning_period.yaml create mode 120000 playbooks/byo/openshift-checks/certificate_expiry/roles create mode 100644 playbooks/byo/openshift-checks/health.yml create mode 100644 playbooks/byo/openshift-checks/pre-install.yml delete mode 100644 playbooks/byo/openshift-preflight/README.md delete mode 120000 playbooks/byo/openshift-preflight/roles (limited to 'playbooks/byo') diff --git a/playbooks/byo/openshift-checks/README.md b/playbooks/byo/openshift-checks/README.md new file mode 100644 index 000000000..4b2ff1f94 --- /dev/null +++ b/playbooks/byo/openshift-checks/README.md @@ -0,0 +1,65 @@ +# OpenShift health checks + +This directory contains Ansible playbooks for detecting potential problems prior +to an install, as well as health checks to run on existing OpenShift clusters. + +Ansible's default operation mode is to fail fast, on the first error. However, +when performing checks, it is useful to gather as much information about +problems as possible in a single run. + +Thus, the playbooks run a battery of checks against the inventory hosts and have +Ansible gather intermediate errors, giving a more complete diagnostic of the +state of each host. If any check failed, the playbook run will be marked as +failed. + +To facilitate understanding the problems that were encountered, a custom +callback plugin summarizes execution errors at the end of a playbook run. + +# Available playbooks + +1. Pre-install playbook ([pre-install.yml](pre-install.yml)) - verifies system + requirements and look for common problems that can prevent a successful + installation of a production cluster. + +2. Diagnostic playbook ([health.yml](health.yml)) - check an existing cluster + for known signs of problems. + +3. Certificate expiry playbooks ([certificate_expiry](certificate_expiry)) - + check that certificates in use are valid and not expiring soon. + +## Running + +With a [recent installation of Ansible](../../../README.md#setup), run the playbook +against your inventory file. Here is the step-by-step: + +1. If you haven't done it yet, clone this repository: + + ```console + $ git clone https://github.com/openshift/openshift-ansible + $ cd openshift-ansible + ``` + +2. Run the appropriate playbook: + + ```console + $ ansible-playbook -i playbooks/byo/openshift-checks/pre-install.yml + ``` + + or + + ```console + $ ansible-playbook -i playbooks/byo/openshift-checks/health.yml + ``` + + or + + ```console + $ ansible-playbook -i playbooks/byo/openshift-checks/certificate_expiry/default.yaml -v + ``` + +## Running via Docker image + +This repository is built into a Docker image including Ansible so that it can +be run anywhere Docker is available. Instructions for doing so may be found +[in the README](../../README_CONTAINER_IMAGE.md). + diff --git a/playbooks/byo/openshift-checks/certificate_expiry/default.yaml b/playbooks/byo/openshift-checks/certificate_expiry/default.yaml new file mode 100644 index 000000000..630135cae --- /dev/null +++ b/playbooks/byo/openshift-checks/certificate_expiry/default.yaml @@ -0,0 +1,10 @@ +--- +# Default behavior, you will need to ensure you run ansible with the +# -v option to see report results: + +- name: Check cert expirys + hosts: nodes:masters:etcd + become: yes + gather_facts: no + roles: + - role: openshift_certificate_expiry diff --git a/playbooks/byo/openshift-checks/certificate_expiry/easy-mode-upload.yaml b/playbooks/byo/openshift-checks/certificate_expiry/easy-mode-upload.yaml new file mode 100644 index 000000000..378d1f154 --- /dev/null +++ b/playbooks/byo/openshift-checks/certificate_expiry/easy-mode-upload.yaml @@ -0,0 +1,40 @@ +# This example generates HTML and JSON reports and +# +# Copies of the generated HTML and JSON reports are uploaded to the masters, +# which is particularly useful when this playbook is run from a container. +# +# All certificates (healthy or not) are included in the results +# +# Optional environment variables to alter the behaviour of the playbook: +# CERT_EXPIRY_WARN_DAYS: Length of the warning window in days (45) +# COPY_TO_PATH: path to copy reports to in the masters (/etc/origin/certificate_expiration_report) +--- +- name: Generate certificate expiration reports + hosts: nodes:masters:etcd + gather_facts: no + vars: + openshift_certificate_expiry_save_json_results: yes + openshift_certificate_expiry_generate_html_report: yes + openshift_certificate_expiry_show_all: yes + openshift_certificate_expiry_warning_days: "{{ lookup('env', 'CERT_EXPIRY_WARN_DAYS') | default('45', true) }}" + roles: + - role: openshift_certificate_expiry + +- name: Upload reports to master + hosts: masters + gather_facts: no + vars: + destination_path: "{{ lookup('env', 'COPY_TO_PATH') | default('/etc/origin/certificate_expiration_report', true) }}" + timestamp: "{{ lookup('pipe', 'date +%Y%m%d') }}" + tasks: + - name: Ensure that the target directory exists + file: + path: "{{ destination_path }}" + state: directory + - name: Copy the reports + copy: + dest: "{{ destination_path }}/{{ timestamp }}-{{ item }}" + src: "/tmp/{{ item }}" + with_items: + - "cert-expiry-report.html" + - "cert-expiry-report.json" diff --git a/playbooks/byo/openshift-checks/certificate_expiry/easy-mode.yaml b/playbooks/byo/openshift-checks/certificate_expiry/easy-mode.yaml new file mode 100644 index 000000000..ae41c7c14 --- /dev/null +++ b/playbooks/byo/openshift-checks/certificate_expiry/easy-mode.yaml @@ -0,0 +1,18 @@ +--- +# This example playbook is great if you're just wanting to try the +# role out. +# +# This example enables HTML and JSON reports +# +# All certificates (healthy or not) are included in the results + +- name: Check cert expirys + hosts: nodes:masters:etcd + become: yes + gather_facts: no + vars: + openshift_certificate_expiry_save_json_results: yes + openshift_certificate_expiry_generate_html_report: yes + openshift_certificate_expiry_show_all: yes + roles: + - role: openshift_certificate_expiry diff --git a/playbooks/byo/openshift-checks/certificate_expiry/html_and_json_default_paths.yaml b/playbooks/byo/openshift-checks/certificate_expiry/html_and_json_default_paths.yaml new file mode 100644 index 000000000..d80cb6ff4 --- /dev/null +++ b/playbooks/byo/openshift-checks/certificate_expiry/html_and_json_default_paths.yaml @@ -0,0 +1,12 @@ +--- +# Generate HTML and JSON artifacts in their default paths: + +- name: Check cert expirys + hosts: nodes:masters:etcd + become: yes + gather_facts: no + vars: + openshift_certificate_expiry_generate_html_report: yes + openshift_certificate_expiry_save_json_results: yes + roles: + - role: openshift_certificate_expiry diff --git a/playbooks/byo/openshift-checks/certificate_expiry/html_and_json_timestamp.yaml b/playbooks/byo/openshift-checks/certificate_expiry/html_and_json_timestamp.yaml new file mode 100644 index 000000000..2189455b7 --- /dev/null +++ b/playbooks/byo/openshift-checks/certificate_expiry/html_and_json_timestamp.yaml @@ -0,0 +1,16 @@ +--- +# Generate timestamped HTML and JSON reports in /var/lib/certcheck + +- name: Check cert expirys + hosts: nodes:masters:etcd + become: yes + gather_facts: no + vars: + openshift_certificate_expiry_generate_html_report: yes + openshift_certificate_expiry_save_json_results: yes + openshift_certificate_expiry_show_all: yes + timestamp: "{{ lookup('pipe', 'date +%Y%m%d') }}" + openshift_certificate_expiry_html_report_path: "/var/lib/certcheck/{{ timestamp }}-cert-expiry-report.html" + openshift_certificate_expiry_json_results_path: "/var/lib/certcheck/{{ timestamp }}-cert-expiry-report.json" + roles: + - role: openshift_certificate_expiry diff --git a/playbooks/byo/openshift-checks/certificate_expiry/longer-warning-period-json-results.yaml b/playbooks/byo/openshift-checks/certificate_expiry/longer-warning-period-json-results.yaml new file mode 100644 index 000000000..87a0f3be4 --- /dev/null +++ b/playbooks/byo/openshift-checks/certificate_expiry/longer-warning-period-json-results.yaml @@ -0,0 +1,13 @@ +--- +# Change the expiration warning window to 1500 days (good for testing +# the module out) and save the results as a JSON file: + +- name: Check cert expirys + hosts: nodes:masters:etcd + become: yes + gather_facts: no + vars: + openshift_certificate_expiry_warning_days: 1500 + openshift_certificate_expiry_save_json_results: yes + roles: + - role: openshift_certificate_expiry diff --git a/playbooks/byo/openshift-checks/certificate_expiry/longer_warning_period.yaml b/playbooks/byo/openshift-checks/certificate_expiry/longer_warning_period.yaml new file mode 100644 index 000000000..960457c4b --- /dev/null +++ b/playbooks/byo/openshift-checks/certificate_expiry/longer_warning_period.yaml @@ -0,0 +1,12 @@ +--- +# Change the expiration warning window to 1500 days (good for testing +# the module out): + +- name: Check cert expirys + hosts: nodes:masters:etcd + become: yes + gather_facts: no + vars: + openshift_certificate_expiry_warning_days: 1500 + roles: + - role: openshift_certificate_expiry diff --git a/playbooks/byo/openshift-checks/certificate_expiry/roles b/playbooks/byo/openshift-checks/certificate_expiry/roles new file mode 120000 index 000000000..4bdbcbad3 --- /dev/null +++ b/playbooks/byo/openshift-checks/certificate_expiry/roles @@ -0,0 +1 @@ +../../../../roles \ No newline at end of file diff --git a/playbooks/byo/openshift-checks/health.yml b/playbooks/byo/openshift-checks/health.yml new file mode 100644 index 000000000..dfc1a7db0 --- /dev/null +++ b/playbooks/byo/openshift-checks/health.yml @@ -0,0 +1,3 @@ +--- +- include: ../openshift-cluster/initialize_groups.yml +- include: ../../common/openshift-checks/health.yml diff --git a/playbooks/byo/openshift-checks/pre-install.yml b/playbooks/byo/openshift-checks/pre-install.yml new file mode 100644 index 000000000..5e8c3ab9b --- /dev/null +++ b/playbooks/byo/openshift-checks/pre-install.yml @@ -0,0 +1,3 @@ +--- +- include: ../openshift-cluster/initialize_groups.yml +- include: ../../common/openshift-checks/pre-install.yml diff --git a/playbooks/byo/openshift-preflight/README.md b/playbooks/byo/openshift-preflight/README.md deleted file mode 100644 index b50292eac..000000000 --- a/playbooks/byo/openshift-preflight/README.md +++ /dev/null @@ -1,43 +0,0 @@ -# OpenShift preflight checks - -Here we provide an Ansible playbook for detecting potential roadblocks prior to -an install or upgrade. - -Ansible's default operation mode is to fail fast, on the first error. However, -when performing checks, it is useful to gather as much information about -problems as possible in a single run. - -The `check.yml` playbook runs a battery of checks against the inventory hosts -and tells Ansible to ignore intermediate errors, thus giving a more complete -diagnostic of the state of each host. Still, if any check failed, the playbook -run will be marked as having failed. - -To facilitate understanding the problems that were encountered, we provide a -custom callback plugin to summarize execution errors at the end of a playbook -run. - ---- - -*Note that currently the `check.yml` playbook is only useful for RPM-based -installations. Containerized installs are excluded from checks for now, but -might be included in the future if there is demand for that.* - ---- - -## Running - -With an installation of Ansible 2.2 or greater, run the playbook directly -against your inventory file. Here is the step-by-step: - -1. If you haven't done it yet, clone this repository: - - ```console - $ git clone https://github.com/openshift/openshift-ansible - $ cd openshift-ansible - ``` - -2. Run the playbook: - - ```console - $ ansible-playbook -i playbooks/byo/openshift-preflight/check.yml - ``` diff --git a/playbooks/byo/openshift-preflight/check.yml b/playbooks/byo/openshift-preflight/check.yml index eb763221f..2e53452a6 100644 --- a/playbooks/byo/openshift-preflight/check.yml +++ b/playbooks/byo/openshift-preflight/check.yml @@ -1,15 +1,3 @@ --- -- include: ../openshift-cluster/initialize_groups.yml - -- name: Run OpenShift health checks - # Temporarily reverting to OSEv3 until group standardization is complete - hosts: OSEv3 - roles: - - openshift_health_checker - post_tasks: - # NOTE: we need to use the old "action: name" syntax until - # https://github.com/ansible/ansible/issues/20513 is fixed. - - action: openshift_health_check - args: - checks: - - '@preflight' +# location is moved; this file remains so existing instructions keep working +- include: ../openshift-checks/pre-install.yml diff --git a/playbooks/byo/openshift-preflight/roles b/playbooks/byo/openshift-preflight/roles deleted file mode 120000 index 20c4c58cf..000000000 --- a/playbooks/byo/openshift-preflight/roles +++ /dev/null @@ -1 +0,0 @@ -../../../roles \ No newline at end of file -- cgit v1.2.3