From f67fcebdf7dde847c39b0ed450ce7267f2945ebe Mon Sep 17 00:00:00 2001 From: Devan Goodwin Date: Wed, 10 Aug 2016 10:40:20 -0300 Subject: Shutdown Docker before upgrading the rpm. This avoids the automatic image migration in 1.10, which can take a very long time and potentially cause rpm db corruption. --- playbooks/common/openshift-cluster/upgrades/docker/upgrade.yml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/playbooks/common/openshift-cluster/upgrades/docker/upgrade.yml b/playbooks/common/openshift-cluster/upgrades/docker/upgrade.yml index 20d66522f..5fa4b7355 100644 --- a/playbooks/common/openshift-cluster/upgrades/docker/upgrade.yml +++ b/playbooks/common/openshift-cluster/upgrades/docker/upgrade.yml @@ -18,9 +18,13 @@ register: nuke_images_result when: docker_upgrade_nuke_images is defined and docker_upgrade_nuke_images | bool +- service: name=docker state=stopped + - name: Upgrade Docker action: "{{ ansible_pkg_mgr }} name=docker{{ '-' + docker_version }} state=present" +- service: name=docker state=started + - name: Restart containerized services service: name={{ item }} state=started with_items: -- cgit v1.2.3 From b89c835e3235f2628b37de15713c311d1b5a4bad Mon Sep 17 00:00:00 2001 From: Devan Goodwin Date: Thu, 11 Aug 2016 11:30:19 -0300 Subject: Improvements for Docker 1.10+ upgrade image nuking. In a parallel step prior to real upgrade tasks, clear out all unused Docker images on all hosts. This should be relatively safe to interrupt as no real upgrade steps have taken place. Once into actual upgrade, we again clear all images only this time with force, and after stopping and removing all containers. Both rmi commands use a new and hopefully less error prone command to do the removal, this should avoid missed orphans as we were hitting before. Added some logging around the current image count before and after this step, most of them are only printed if we're crossing the 1.10 boundary but one does not, just for additional information in your ansible log. --- .../openshift-cluster/upgrades/docker/upgrade.yml | 14 +++++++++ .../upgrades/files/nuke_images.sh | 8 ++++-- .../common/openshift-cluster/upgrades/pre.yml | 20 ++++++------- .../common/openshift-cluster/upgrades/upgrade.yml | 33 ++++++++++++++++++---- 4 files changed, 55 insertions(+), 20 deletions(-) diff --git a/playbooks/common/openshift-cluster/upgrades/docker/upgrade.yml b/playbooks/common/openshift-cluster/upgrades/docker/upgrade.yml index 5fa4b7355..03e7b844c 100644 --- a/playbooks/common/openshift-cluster/upgrades/docker/upgrade.yml +++ b/playbooks/common/openshift-cluster/upgrades/docker/upgrade.yml @@ -13,11 +13,25 @@ failed_when: false when: openshift.common.is_containerized | bool +- name: Check Docker image count + shell: "docker images -aq | wc -l" + register: docker_image_count + +- debug: var=docker_image_count.stdout + - name: Remove all containers and images script: nuke_images.sh docker register: nuke_images_result when: docker_upgrade_nuke_images is defined and docker_upgrade_nuke_images | bool +- name: Check Docker image count + shell: "docker images -aq | wc -l" + register: docker_image_count + when: docker_upgrade_nuke_images is defined and docker_upgrade_nuke_images | bool + +- debug: var=docker_image_count.stdout + when: docker_upgrade_nuke_images is defined and docker_upgrade_nuke_images | bool + - service: name=docker state=stopped - name: Upgrade Docker diff --git a/playbooks/common/openshift-cluster/upgrades/files/nuke_images.sh b/playbooks/common/openshift-cluster/upgrades/files/nuke_images.sh index 6b155f7fa..8635eab0d 100644 --- a/playbooks/common/openshift-cluster/upgrades/files/nuke_images.sh +++ b/playbooks/common/openshift-cluster/upgrades/files/nuke_images.sh @@ -15,9 +15,11 @@ then fi # Delete all images (forcefully) -image_ids=`docker images -q` +image_ids=`docker images -aq` if test -n "$image_ids" then - # Taken from: https://gist.github.com/brianclements/f72b2de8e307c7b56689#gistcomment-1443144 - docker rmi $(docker images | grep "$2/\|/$2 \| $2 \|$2 \|$2-\|$2_" | awk '{print $1 ":" $2}') 2>/dev/null || echo "No images matching \"$2\" left to purge." + # Some layers are deleted recursively and are no longer present + # when docker goes to remove them: + docker rmi -f `docker images -aq` || true fi + diff --git a/playbooks/common/openshift-cluster/upgrades/pre.yml b/playbooks/common/openshift-cluster/upgrades/pre.yml index b5fbc4af6..42a24eaf8 100644 --- a/playbooks/common/openshift-cluster/upgrades/pre.yml +++ b/playbooks/common/openshift-cluster/upgrades/pre.yml @@ -185,10 +185,12 @@ - name: Verify docker upgrade targets hosts: oo_masters_to_config:oo_nodes_to_config:oo_etcd_to_config tasks: - - name: Determine available Docker - script: ../files/rpm_versions.sh docker - register: g_docker_version_result - when: not openshift.common.is_atomic | bool + # Only check if docker upgrade is required if docker_upgrade is not + # already set to False. + - include: docker/upgrade_check.yml + when: docker_upgrade is not defined or docker_upgrade | bool and not openshift.common.is_atomic | bool + + # Additional checks for Atomic hosts: - name: Determine available Docker shell: "rpm -q --queryformat '---\ncurr_version: %{VERSION}\navail_version: \n' docker" @@ -196,18 +198,12 @@ when: openshift.common.is_atomic | bool - set_fact: - g_docker_version: "{{ g_docker_version_result.stdout | from_yaml }}" - when: not openshift.common.is_atomic | bool - - - set_fact: - g_docker_version: "{{ g_atomic_docker_version_result.stdout | from_yaml }}" + l_docker_version: "{{ g_atomic_docker_version_result.stdout | from_yaml }}" when: openshift.common.is_atomic | bool - fail: msg: This playbook requires access to Docker 1.10 or later - when: g_docker_version.avail_version | default(g_docker_version.curr_version, true) | version_compare('1.10','<') - - # TODO: add check to upgrade ostree to get latest Docker + when: openshift.common.is_atomic | bool and l_docker_version.avail_version | default(l_docker_version.curr_version, true) | version_compare('1.10','<') - set_fact: pre_upgrade_complete: True diff --git a/playbooks/common/openshift-cluster/upgrades/upgrade.yml b/playbooks/common/openshift-cluster/upgrades/upgrade.yml index dee086cf5..3ec47d6f3 100644 --- a/playbooks/common/openshift-cluster/upgrades/upgrade.yml +++ b/playbooks/common/openshift-cluster/upgrades/upgrade.yml @@ -3,6 +3,34 @@ # The restart playbook should be run after this playbook completes. ############################################################################### +# Separate step so we can execute in parallel and clear out anything unused +# before we get into the serialized upgrade process which will then remove +# remaining images if possible. +- name: Cleanup unused Docker images + hosts: oo_masters_to_config:oo_nodes_to_config:oo_etcd_to_config + tasks: + - name: Check Docker image count + shell: "docker images -aq | wc -l" + register: docker_image_count + when: docker_upgrade_nuke_images is defined and docker_upgrade_nuke_images | bool + + - debug: var=docker_image_count.stdout + when: docker_upgrade_nuke_images is defined and docker_upgrade_nuke_images | bool + + - name: Remove unused Docker images for Docker 1.10+ migration + shell: "docker rmi `docker images -aq`" + # Will fail on images still in use: + failed_when: false + when: docker_upgrade_nuke_images is defined and docker_upgrade_nuke_images | bool + + - name: Check Docker image count + shell: "docker images -aq | wc -l" + register: docker_image_count + when: docker_upgrade_nuke_images is defined and docker_upgrade_nuke_images | bool + + - debug: var=docker_image_count.stdout + when: docker_upgrade_nuke_images is defined and docker_upgrade_nuke_images | bool + ############################################################################### # Upgrade Masters ############################################################################### @@ -111,11 +139,6 @@ delegate_to: "{{ groups.oo_first_master.0 }}" when: inventory_hostname in groups.oo_nodes_to_config - # Only check if docker upgrade is required if docker_upgrade is not - # already set to False. - - include: docker/upgrade_check.yml - when: docker_upgrade is not defined or docker_upgrade | bool and not openshift.common.is_atomic | bool - - include: docker/upgrade.yml when: l_docker_upgrade is defined and l_docker_upgrade | bool and not openshift.common.is_atomic | bool - include: "{{ node_config_hook }}" -- cgit v1.2.3