From ad47ff1df5147d35572ee3ae08f0fd74fade5e33 Mon Sep 17 00:00:00 2001
From: Devan Goodwin <dgoodwin@redhat.com>
Date: Thu, 22 Sep 2016 14:31:00 -0300
Subject: Allow a couple retries when unscheduling/rescheduling nodes in
 upgrade.

This can fail with a transient "object has been modified" error asking
you to re-try your changes on the latest version of the object.

Allow up to three retries to see if we can get the change to take
effect.
---
 .../common/openshift-cluster/upgrades/upgrade_nodes.yml      | 12 ++++++++++++
 1 file changed, 12 insertions(+)

(limited to 'playbooks')

diff --git a/playbooks/common/openshift-cluster/upgrades/upgrade_nodes.yml b/playbooks/common/openshift-cluster/upgrades/upgrade_nodes.yml
index 917c95e29..9b572dcdf 100644
--- a/playbooks/common/openshift-cluster/upgrades/upgrade_nodes.yml
+++ b/playbooks/common/openshift-cluster/upgrades/upgrade_nodes.yml
@@ -32,6 +32,12 @@
       {{ openshift.common.admin_binary }} manage-node {{ openshift.node.nodename | lower }} --schedulable=false
     delegate_to: "{{ groups.oo_first_master.0 }}"
     when: inventory_hostname in groups.oo_nodes_to_upgrade
+    # NOTE: There is a transient "object has been modified" error here, allow a couple
+    # retries for a more reliable upgrade.
+    register: node_unsched
+    until: node_unsched.rc == 0
+    retries: 3
+    delay: 1
 
   - name: Evacuate Node for Kubelet upgrade
     command: >
@@ -61,3 +67,9 @@
       {{ openshift.common.admin_binary }} manage-node {{ openshift.node.nodename | lower }} --schedulable=true
     delegate_to: "{{ groups.oo_first_master.0 }}"
     when: inventory_hostname in groups.oo_nodes_to_upgrade and was_schedulable | bool
+    register: node_sched
+    until: node_sched.rc == 0
+    retries: 3
+    delay: 1
+
+
-- 
cgit v1.2.3