Merge pull request #5495 from abutcher/ca-redeploy-expired-etcd

Automatic merge from submit-queue Improve CA redeploy restart logic Expired etcd certificates require special casing around restarts in the certificate redeploy playbooks. When etcd certificates are expired we can't restart masters or nodes. We also can't simply restart etcd because peers also had expired certificates so we must start/stop etcd when we detect expired etcd certificates. `openshift-ca.yml`: * No longer restart master services when etcd certificates were previously expired. * No longer restart node services when master or etcd certificates were previously expired. `etcd-ca.yml`: * No longer restart master services when etcd certificates were previously expired. Tested using [gen_expired_tls.sh](https://gist.github.com/abutcher/bdd20b9d582675d89fb22658689c49e4) on one of my master/etcd hosts to ensure that restart logic changes caused us to skip the right restarts and do a full start/stop of etcd in the `redeploy-certificates.yml` and `redeploy-etcd-certificates.yml` playbooks. Note: When this happens with a cluster and you want to replace all certificates, you can run: `redeploy-etcd-ca.yml`, `redeploy-openshift-ca.yml` (which will both skip restarts) and then run `redeploy-certificates.yml` which will now be able to full/stop start etcd.
author: OpenShift Merge Robot <openshift-merge-robot@users.noreply.github.com> 2017-09-24 11:46:04 -0700
committer: GitHub <noreply@github.com> 2017-09-24 11:46:04 -0700
commit: 02485be8fe554e03bf07f0650f9ff36398679722 (patch)
tree: f4394f8c81eef0d588ad70c723babdf6059a730f /playbooks/common/openshift-cluster/redeploy-certificates
parent: 5632b266c6d44f371fe7d36a0e367c462cf7a701 (diff)
parent: 775128cc52d8295fe4cf08211ab53730d101692c (diff)
3 files changed, 70 insertions, 31 deletions
diff --git a/playbooks/common/openshift-cluster/redeploy-certificates/check-expiry.yml b/playbooks/common/openshift-cluster/redeploy-certificates/check-expiry.yml
new file mode 100644
index 000000000..4a9fbf7eb
--- /dev/null
+++ b/playbooks/common/openshift-cluster/redeploy-certificates/check-expiry.yml
@@ -0,0 +1,12 @@
+---
+- name: Check cert expirys
+  hosts: "{{ g_check_expiry_hosts }}"
+  vars:
+    openshift_certificate_expiry_show_all: yes
+  roles:
+  # Sets 'check_results' per host which contains health status for
+  # etcd, master and node certificates.  We will use 'check_results'
+  # to determine if any certificates were expired prior to running
+  # this playbook. Service restarts will be skipped if any
+  # certificates were previously expired.
+  - role: openshift_certificate_expiry
diff --git a/playbooks/common/openshift-cluster/redeploy-certificates/etcd-ca.yml b/playbooks/common/openshift-cluster/redeploy-certificates/etcd-ca.yml
index 58bbcc658..3a8e32ed1 100644
--- a/playbooks/common/openshift-cluster/redeploy-certificates/etcd-ca.yml
+++ b/playbooks/common/openshift-cluster/redeploy-certificates/etcd-ca.yml
@@ -153,13 +153,19 @@
     changed_when: false
 
 - include: ../../openshift-master/restart.yml
-  # Do not restart masters when master certificates were previously expired.
-  when: ('expired' not in hostvars
-                       | oo_select_keys(groups['oo_masters_to_config'])
-                       | oo_collect('check_results.check_results.ocp_certs')
-                       | oo_collect('health', {'path':hostvars[groups.oo_first_master.0].openshift.common.config_base ~ "/master/master.server.crt"}))
-        and
-        ('expired' not in hostvars
-                          | oo_select_keys(groups['oo_masters_to_config'])
-                          | oo_collect('check_results.check_results.ocp_certs')
-                          | oo_collect('health', {'path':hostvars[groups.oo_first_master.0].openshift.common.config_base ~ "/master/ca-bundle.crt"}))
+  # Do not restart masters when master or etcd certificates were previously expired.
+  when:
+  # masters
+  - ('expired' not in hostvars
+      | oo_select_keys(groups['oo_masters_to_config'])
+      | oo_collect('check_results.check_results.ocp_certs')
+      | oo_collect('health', {'path':hostvars[groups.oo_first_master.0].openshift.common.config_base ~ "/master/master.server.crt"}))
+  - ('expired' not in hostvars
+      | oo_select_keys(groups['oo_masters_to_config'])
+      | oo_collect('check_results.check_results.ocp_certs')
+      | oo_collect('health', {'path':hostvars[groups.oo_first_master.0].openshift.common.config_base ~ "/master/ca-bundle.crt"}))
+  # etcd
+  - ('expired' not in (hostvars
+      | oo_select_keys(groups['etcd'])
+      | oo_collect('check_results.check_results.etcd')
+      | oo_collect('health')))
diff --git a/playbooks/common/openshift-cluster/redeploy-certificates/openshift-ca.yml b/playbooks/common/openshift-cluster/redeploy-certificates/openshift-ca.yml
index 089ae6bbc..b54acae6c 100644
--- a/playbooks/common/openshift-cluster/redeploy-certificates/openshift-ca.yml
+++ b/playbooks/common/openshift-cluster/redeploy-certificates/openshift-ca.yml
@@ -7,7 +7,7 @@
     when: not openshift.common.version_gte_3_2_or_1_2 | bool
 
 - name: Check cert expirys
-  hosts: oo_nodes_to_config:oo_masters_to_config
+  hosts: oo_nodes_to_config:oo_masters_to_config:oo_etcd_to_config
   vars:
     openshift_certificate_expiry_show_all: yes
   roles:
@@ -209,16 +209,22 @@
     with_items: "{{ client_users }}"
 
 - include: ../../openshift-master/restart.yml
-  # Do not restart masters when master certificates were previously expired.
-  when: ('expired' not in hostvars
-                       | oo_select_keys(groups['oo_masters_to_config'])
-                       | oo_collect('check_results.check_results.ocp_certs')
-                       | oo_collect('health', {'path':hostvars[groups.oo_first_master.0].openshift.common.config_base ~ "/master/master.server.crt"}))
-        and
-        ('expired' not in hostvars
-                          | oo_select_keys(groups['oo_masters_to_config'])
-                          | oo_collect('check_results.check_results.ocp_certs')
-                          | oo_collect('health', {'path':hostvars[groups.oo_first_master.0].openshift.common.config_base ~ "/master/ca-bundle.crt"}))
+  # Do not restart masters when master or etcd certificates were previously expired.
+  when:
+  # masters
+  - ('expired' not in hostvars
+      | oo_select_keys(groups['oo_masters_to_config'])
+      | oo_collect('check_results.check_results.ocp_certs')
+      | oo_collect('health', {'path':hostvars[groups.oo_first_master.0].openshift.common.config_base ~ "/master/master.server.crt"}))
+  - ('expired' not in hostvars
+      | oo_select_keys(groups['oo_masters_to_config'])
+      | oo_collect('check_results.check_results.ocp_certs')
+      | oo_collect('health', {'path':hostvars[groups.oo_first_master.0].openshift.common.config_base ~ "/master/ca-bundle.crt"}))
+  # etcd
+  - ('expired' not in (hostvars
+      | oo_select_keys(groups['etcd'])
+      | oo_collect('check_results.check_results.etcd')
+      | oo_collect('health')))
 
 - name: Distribute OpenShift CA certificate to nodes
   hosts: oo_nodes_to_config
@@ -268,13 +274,28 @@
     changed_when: false
 
 - include: ../../openshift-node/restart.yml
-  # Do not restart nodes when node certificates were previously expired.
-  when: ('expired' not in hostvars
-                       | oo_select_keys(groups['oo_nodes_to_config'])
-                       | oo_collect('check_results.check_results.ocp_certs')
-                       | oo_collect('health', {'path':hostvars[groups.oo_nodes_to_config.0].openshift.common.config_base ~ "/node/server.crt"}))
-        and
-        ('expired' not in hostvars
-                          | oo_select_keys(groups['oo_nodes_to_config'])
-                          | oo_collect('check_results.check_results.ocp_certs')
-                          | oo_collect('health', {'path':hostvars[groups.oo_nodes_to_config.0].openshift.common.config_base ~ "/node/ca.crt"}))
+  # Do not restart nodes when node, master or etcd certificates were previously expired.
+  when:
+  # nodes
+  - ('expired' not in hostvars
+      | oo_select_keys(groups['oo_nodes_to_config'])
+      | oo_collect('check_results.check_results.ocp_certs')
+      | oo_collect('health', {'path':hostvars[groups.oo_nodes_to_config.0].openshift.common.config_base ~ "/node/server.crt"}))
+  - ('expired' not in hostvars
+      | oo_select_keys(groups['oo_nodes_to_config'])
+      | oo_collect('check_results.check_results.ocp_certs')
+      | oo_collect('health', {'path':hostvars[groups.oo_nodes_to_config.0].openshift.common.config_base ~ "/node/ca.crt"}))
+  # masters
+  - ('expired' not in hostvars
+      | oo_select_keys(groups['oo_masters_to_config'])
+      | oo_collect('check_results.check_results.ocp_certs')
+      | oo_collect('health', {'path':hostvars[groups.oo_first_master.0].openshift.common.config_base ~ "/master/master.server.crt"}))
+  - ('expired' not in hostvars
+      | oo_select_keys(groups['oo_masters_to_config'])
+      | oo_collect('check_results.check_results.ocp_certs')
+      | oo_collect('health', {'path':hostvars[groups.oo_first_master.0].openshift.common.config_base ~ "/master/ca-bundle.crt"}))
+  # etcd
+  - ('expired' not in (hostvars
+      | oo_select_keys(groups['etcd'])
+      | oo_collect('check_results.check_results.etcd')
+      | oo_collect('health')))
author	OpenShift Merge Robot <openshift-merge-robot@users.noreply.github.com>	2017-09-24 11:46:04 -0700
committer	GitHub <noreply@github.com>	2017-09-24 11:46:04 -0700
commit	02485be8fe554e03bf07f0650f9ff36398679722 (patch)
tree	f4394f8c81eef0d588ad70c723babdf6059a730f /playbooks/common/openshift-cluster/redeploy-certificates
parent	5632b266c6d44f371fe7d36a0e367c462cf7a701 (diff)
parent	775128cc52d8295fe4cf08211ab53730d101692c (diff)