diff options
author | Scott Dodson <sdodson@redhat.com> | 2017-05-19 16:01:03 -0400 |
---|---|---|
committer | Scott Dodson <sdodson@redhat.com> | 2017-05-19 17:22:42 -0400 |
commit | 1a868e61fbab8f1e2095c0952031656c47926220 (patch) | |
tree | 04ac7a4554c56f2d8d87c7ee5cec115b15e2661f | |
parent | c3f96b1f9f77cbd105d082a216b4761ca040bcf7 (diff) | |
download | openshift-1a868e61fbab8f1e2095c0952031656c47926220.tar.gz openshift-1a868e61fbab8f1e2095c0952031656c47926220.tar.bz2 openshift-1a868e61fbab8f1e2095c0952031656c47926220.tar.xz openshift-1a868e61fbab8f1e2095c0952031656c47926220.zip |
Tolerate failures in the node upgrade playbook
-rw-r--r-- | inventory/byo/hosts.origin.example | 25 | ||||
-rw-r--r-- | inventory/byo/hosts.ose.example | 25 | ||||
-rw-r--r-- | playbooks/common/openshift-cluster/upgrades/upgrade_control_plane.yml | 2 | ||||
-rw-r--r-- | playbooks/common/openshift-cluster/upgrades/upgrade_nodes.yml | 2 |
4 files changed, 52 insertions, 2 deletions
diff --git a/inventory/byo/hosts.origin.example b/inventory/byo/hosts.origin.example index 20f342023..310b8ab44 100644 --- a/inventory/byo/hosts.origin.example +++ b/inventory/byo/hosts.origin.example @@ -788,6 +788,31 @@ openshift_master_identity_providers=[{'name': 'htpasswd_auth', 'login': 'true', # #etcd_ca_default_days=1825 +# Upgrade Control +# +# By default nodes are upgraded in a serial manner one at a time and all failures +# are fatal +#openshift_upgrade_nodes_serial=1 +#openshift_upgrade_nodes_max_fail_percentage=0 +# +# You can specify the number of nodes to upgrade at once. We do not currently +# attempt to verify that you have capacity to drain this many nodes at once +# so please be careful when specifying these values. You should also verify that +# the expected number of nodes are all schedulable and ready before starting an +# upgrade. If it's not possible to drain the requested nodes the upgrade will +# stall indefinitely until the drain is successful. +# +# If you're upgrading more than one node at a time you can specify the maximum +# percentage of failure within the batch before the upgrade is aborted. Any +# nodes that do fail are ignored for the rest of the playbook run and you should +# take care to investigate the failure and return the node to service so that +# your cluster. +# +# The percentage must exceed the value, this would fail on two failures +# openshift_upgrade_nodes_serial=4 openshift_upgrade_nodes_max_fail_percentage=49 +# where as this would not +# openshift_upgrade_nodes_serial=4 openshift_upgrade_nodes_max_fail_percentage=50 + # host group for masters [masters] ose3-master[1:3]-ansible.test.example.com diff --git a/inventory/byo/hosts.ose.example b/inventory/byo/hosts.ose.example index f75a47bb8..e126bbcab 100644 --- a/inventory/byo/hosts.ose.example +++ b/inventory/byo/hosts.ose.example @@ -785,6 +785,31 @@ openshift_master_identity_providers=[{'name': 'htpasswd_auth', 'login': 'true', # #etcd_ca_default_days=1825 +# Upgrade Control +# +# By default nodes are upgraded in a serial manner one at a time and all failures +# are fatal +#openshift_upgrade_nodes_serial=1 +#openshift_upgrade_nodes_max_fail_percentage=0 +# +# You can specify the number of nodes to upgrade at once. We do not currently +# attempt to verify that you have capacity to drain this many nodes at once +# so please be careful when specifying these values. You should also verify that +# the expected number of nodes are all schedulable and ready before starting an +# upgrade. If it's not possible to drain the requested nodes the upgrade will +# stall indefinitely until the drain is successful. +# +# If you're upgrading more than one node at a time you can specify the maximum +# percentage of failure within the batch before the upgrade is aborted. Any +# nodes that do fail are ignored for the rest of the playbook run and you should +# take care to investigate the failure and return the node to service so that +# your cluster. +# +# The percentage must exceed the value, this would fail on two failures +# openshift_upgrade_nodes_serial=4 openshift_upgrade_nodes_max_fail_percentage=49 +# where as this would not +# openshift_upgrade_nodes_serial=4 openshift_upgrade_nodes_max_fail_percentage=50 + # host group for masters [masters] ose3-master[1:3]-ansible.test.example.com diff --git a/playbooks/common/openshift-cluster/upgrades/upgrade_control_plane.yml b/playbooks/common/openshift-cluster/upgrades/upgrade_control_plane.yml index 0ad934d2d..a335c1072 100644 --- a/playbooks/common/openshift-cluster/upgrades/upgrade_control_plane.yml +++ b/playbooks/common/openshift-cluster/upgrades/upgrade_control_plane.yml @@ -248,7 +248,7 @@ # This var must be set with -e on invocation, as it is not a per-host inventory var # and is evaluated early. Values such as "20%" can also be used. serial: "{{ openshift_upgrade_nodes_serial | default(1) }}" - any_errors_fatal: true + max_fail_percentage: "{{ openshift_upgrade_nodes_max_fail_percentage | default(0) }}" pre_tasks: - name: Load lib_openshift modules diff --git a/playbooks/common/openshift-cluster/upgrades/upgrade_nodes.yml b/playbooks/common/openshift-cluster/upgrades/upgrade_nodes.yml index 4d455fe0a..91dbc2cd4 100644 --- a/playbooks/common/openshift-cluster/upgrades/upgrade_nodes.yml +++ b/playbooks/common/openshift-cluster/upgrades/upgrade_nodes.yml @@ -4,7 +4,7 @@ # This var must be set with -e on invocation, as it is not a per-host inventory var # and is evaluated early. Values such as "20%" can also be used. serial: "{{ openshift_upgrade_nodes_serial | default(1) }}" - any_errors_fatal: true + max_fail_percentage: "{{ openshift_upgrade_nodes_max_fail_percentage | default(0) }}" pre_tasks: - name: Load lib_openshift modules |