diff options
author | Andrew Butcher <abutcher@afrolegs.com> | 2016-10-04 09:45:48 -0400 |
---|---|---|
committer | GitHub <noreply@github.com> | 2016-10-04 09:45:48 -0400 |
commit | 28f853477a3551b7518b7734e8ac80ca9c27da81 (patch) | |
tree | 86cecbe676e4939f83c9e0f7efb920ede112f3a7 /playbooks/common | |
parent | a025229edb987afb69d4799c84956821a2c56ecc (diff) | |
parent | 9461cbf44d75c657ed400324b1cc2c39a2d6b9ff (diff) | |
download | openshift-28f853477a3551b7518b7734e8ac80ca9c27da81.tar.gz openshift-28f853477a3551b7518b7734e8ac80ca9c27da81.tar.bz2 openshift-28f853477a3551b7518b7734e8ac80ca9c27da81.tar.xz openshift-28f853477a3551b7518b7734e8ac80ca9c27da81.zip |
Merge pull request #2441 from dgoodwin/34-upgrade-improvements
3.4 Upgrade Improvements
Diffstat (limited to 'playbooks/common')
17 files changed, 447 insertions, 408 deletions
diff --git a/playbooks/common/openshift-cluster/initialize_facts.yml b/playbooks/common/openshift-cluster/initialize_facts.yml index 04dde632b..6d83d2527 100644 --- a/playbooks/common/openshift-cluster/initialize_facts.yml +++ b/playbooks/common/openshift-cluster/initialize_facts.yml @@ -11,3 +11,5 @@ hostname: "{{ openshift_hostname | default(None) }}" - set_fact: openshift_docker_hosted_registry_network: "{{ hostvars[groups.oo_first_master.0].openshift.common.portal_net }}" + - set_fact: + openshift_deployment_type: "{{ deployment_type }}" diff --git a/playbooks/common/openshift-cluster/upgrades/cleanup_unused_images.yml b/playbooks/common/openshift-cluster/upgrades/cleanup_unused_images.yml new file mode 100644 index 000000000..6e953be69 --- /dev/null +++ b/playbooks/common/openshift-cluster/upgrades/cleanup_unused_images.yml @@ -0,0 +1,22 @@ +--- +- name: Check Docker image count + shell: "docker images -aq | wc -l" + register: docker_image_count + when: docker_upgrade_nuke_images is defined and docker_upgrade_nuke_images | bool + +- debug: var=docker_image_count.stdout + when: docker_upgrade_nuke_images is defined and docker_upgrade_nuke_images | bool + +- name: Remove unused Docker images for Docker 1.10+ migration + shell: "docker rmi `docker images -aq`" + # Will fail on images still in use: + failed_when: false + when: docker_upgrade_nuke_images is defined and docker_upgrade_nuke_images | bool + +- name: Check Docker image count + shell: "docker images -aq | wc -l" + register: docker_image_count + when: docker_upgrade_nuke_images is defined and docker_upgrade_nuke_images | bool + +- debug: var=docker_image_count.stdout + when: docker_upgrade_nuke_images is defined and docker_upgrade_nuke_images | bool diff --git a/playbooks/common/openshift-cluster/upgrades/create_service_signer_cert.yml b/playbooks/common/openshift-cluster/upgrades/create_service_signer_cert.yml index e8a20aa2b..78f6c46f3 100644 --- a/playbooks/common/openshift-cluster/upgrades/create_service_signer_cert.yml +++ b/playbooks/common/openshift-cluster/upgrades/create_service_signer_cert.yml @@ -9,6 +9,7 @@ local_action: command mktemp -d /tmp/openshift-ansible-XXXXXXX register: local_cert_sync_tmpdir changed_when: false + when: not (hostvars[groups.oo_first_master.0].service_signer_cert_stat.stat.exists | bool) - name: Create service signer certificate hosts: oo_first_master @@ -17,6 +18,7 @@ command: mktemp -d /tmp/openshift-ansible-XXXXXXX register: remote_cert_create_tmpdir changed_when: false + when: not (hostvars[groups.oo_first_master.0].service_signer_cert_stat.stat.exists | bool) - name: Create service signer certificate command: > @@ -27,6 +29,7 @@ --serial=service-signer.serial.txt args: chdir: "{{ remote_cert_create_tmpdir.stdout }}/" + when: not (hostvars[groups.oo_first_master.0].service_signer_cert_stat.stat.exists | bool) - name: Retrieve service signer certificate fetch: @@ -38,12 +41,14 @@ with_items: - "service-signer.crt" - "service-signer.key" + when: not (hostvars[groups.oo_first_master.0].service_signer_cert_stat.stat.exists | bool) - name: Delete remote temp directory file: name: "{{ remote_cert_create_tmpdir.stdout }}" state: absent changed_when: false + when: not (hostvars[groups.oo_first_master.0].service_signer_cert_stat.stat.exists | bool) - name: Deploy service signer certificate hosts: oo_masters_to_config @@ -55,6 +60,7 @@ with_items: - "service-signer.crt" - "service-signer.key" + when: not (hostvars[groups.oo_first_master.0].service_signer_cert_stat.stat.exists | bool) - name: Delete local temp directory hosts: localhost @@ -67,3 +73,4 @@ name: "{{ local_cert_sync_tmpdir.stdout }}" state: absent changed_when: false + when: not (hostvars[groups.oo_first_master.0].service_signer_cert_stat.stat.exists | bool) diff --git a/playbooks/common/openshift-cluster/upgrades/docker/upgrade_check.yml b/playbooks/common/openshift-cluster/upgrades/docker/upgrade_check.yml index 8002af4fc..fc26d029e 100644 --- a/playbooks/common/openshift-cluster/upgrades/docker/upgrade_check.yml +++ b/playbooks/common/openshift-cluster/upgrades/docker/upgrade_check.yml @@ -1,7 +1,7 @@ --- # This snippet determines if a Docker upgrade is required by checking the inventory -# variables, the available packages, and sets l_docker_version to True if so. +# variables, the available packages, and sets l_docker_upgrade to True if so. - set_fact: docker_upgrade: True diff --git a/playbooks/common/openshift-cluster/upgrades/init.yml b/playbooks/common/openshift-cluster/upgrades/init.yml new file mode 100644 index 000000000..f3b3abe0d --- /dev/null +++ b/playbooks/common/openshift-cluster/upgrades/init.yml @@ -0,0 +1,50 @@ +--- +- include: ../verify_ansible_version.yml + +- hosts: localhost + connection: local + become: no + gather_facts: no + tasks: + - include_vars: ../../../byo/openshift-cluster/cluster_hosts.yml + - add_host: + name: "{{ item }}" + groups: l_oo_all_hosts + with_items: g_all_hosts | default([]) + +- hosts: l_oo_all_hosts + gather_facts: no + tasks: + - include_vars: ../../../byo/openshift-cluster/cluster_hosts.yml + +- include: ../evaluate_groups.yml + vars: + # Do not allow adding hosts during upgrade. + g_new_master_hosts: [] + g_new_node_hosts: [] + openshift_cluster_id: "{{ cluster_id | default('default') }}" + openshift_deployment_type: "{{ deployment_type }}" + +- name: Set oo_options + hosts: oo_all_hosts + tasks: + - set_fact: + openshift_docker_additional_registries: "{{ lookup('oo_option', 'docker_additional_registries') }}" + when: openshift_docker_additional_registries is not defined + - set_fact: + openshift_docker_insecure_registries: "{{ lookup('oo_option', 'docker_insecure_registries') }}" + when: openshift_docker_insecure_registries is not defined + - set_fact: + openshift_docker_blocked_registries: "{{ lookup('oo_option', 'docker_blocked_registries') }}" + when: openshift_docker_blocked_registries is not defined + - set_fact: + openshift_docker_options: "{{ lookup('oo_option', 'docker_options') }}" + when: openshift_docker_options is not defined + - set_fact: + openshift_docker_log_driver: "{{ lookup('oo_option', 'docker_log_driver') }}" + when: openshift_docker_log_driver is not defined + - set_fact: + openshift_docker_log_options: "{{ lookup('oo_option', 'docker_log_options') }}" + when: openshift_docker_log_options is not defined + +- include: ../initialize_facts.yml diff --git a/playbooks/common/openshift-cluster/upgrades/initialize_nodes_to_upgrade.yml b/playbooks/common/openshift-cluster/upgrades/initialize_nodes_to_upgrade.yml new file mode 100644 index 000000000..4e375ac26 --- /dev/null +++ b/playbooks/common/openshift-cluster/upgrades/initialize_nodes_to_upgrade.yml @@ -0,0 +1,40 @@ +--- +- name: Filter list of nodes to be upgraded if necessary + hosts: oo_first_master + tasks: + - name: Retrieve list of openshift nodes matching upgrade label + command: > + {{ openshift.common.client_binary }} + get nodes + --config={{ openshift.common.config_base }}/master/admin.kubeconfig + --selector={{ openshift_upgrade_nodes_label }} + -o jsonpath='{.items[*].metadata.name}' + register: matching_nodes + changed_when: false + when: openshift_upgrade_nodes_label is defined + + - set_fact: + nodes_to_upgrade: "{{ matching_nodes.stdout.split(' ') }}" + when: openshift_upgrade_nodes_label is defined + + # We got a list of nodes with the label, now we need to match these with inventory hosts + # using their openshift.common.hostname fact. + - name: Map labelled nodes to inventory hosts + add_host: + name: "{{ item }}" + groups: temp_nodes_to_upgrade + ansible_ssh_user: "{{ g_ssh_user | default(omit) }}" + ansible_become: "{{ g_sudo | default(omit) }}" + with_items: " {{ groups['oo_nodes_to_config'] }}" + when: openshift_upgrade_nodes_label is defined and hostvars[item].openshift.common.hostname in nodes_to_upgrade + changed_when: false + + # Build up the oo_nodes_to_upgrade group, use the list filtered by label if + # present, otherwise hit all nodes: + - name: Evaluate oo_nodes_to_upgrade + add_host: + name: "{{ item }}" + groups: oo_nodes_to_upgrade + ansible_ssh_user: "{{ g_ssh_user | default(omit) }}" + ansible_become: "{{ g_sudo | default(omit) }}" + with_items: "{{ groups['temp_nodes_to_upgrade'] | default(groups['oo_nodes_to_config']) }}" diff --git a/playbooks/common/openshift-cluster/upgrades/post.yml b/playbooks/common/openshift-cluster/upgrades/post_control_plane.yml index e43954453..e43954453 100644 --- a/playbooks/common/openshift-cluster/upgrades/post.yml +++ b/playbooks/common/openshift-cluster/upgrades/post_control_plane.yml diff --git a/playbooks/common/openshift-cluster/upgrades/pre.yml b/playbooks/common/openshift-cluster/upgrades/pre.yml deleted file mode 100644 index 42a24eaf8..000000000 --- a/playbooks/common/openshift-cluster/upgrades/pre.yml +++ /dev/null @@ -1,311 +0,0 @@ ---- -############################################################################### -# Evaluate host groups and gather facts -############################################################################### - -- include: ../initialize_facts.yml - -- name: Update repos and initialize facts on all hosts - hosts: oo_masters_to_config:oo_nodes_to_config:oo_etcd_to_config:oo_lb_to_config - roles: - - openshift_repos - -- name: Set openshift_no_proxy_internal_hostnames - hosts: oo_masters_to_config:oo_nodes_to_config - tasks: - - set_fact: - openshift_no_proxy_internal_hostnames: "{{ hostvars | oo_select_keys(groups['oo_nodes_to_config'] - | union(groups['oo_masters_to_config']) - | union(groups['oo_etcd_to_config'] | default([]))) - | oo_collect('openshift.common.hostname') | default([]) | join (',') - }}" - when: "{{ (openshift_http_proxy is defined or openshift_https_proxy is defined) and - openshift_generate_no_proxy_hosts | default(True) | bool }}" - -- name: Evaluate additional groups for upgrade - hosts: localhost - connection: local - become: no - tasks: - - name: Evaluate etcd_hosts_to_backup - add_host: - name: "{{ item }}" - groups: etcd_hosts_to_backup - with_items: groups.oo_etcd_to_config if groups.oo_etcd_to_config is defined and groups.oo_etcd_to_config | length > 0 else groups.oo_first_master - -############################################################################### -# Pre-upgrade checks -############################################################################### -- name: Verify upgrade can proceed on first master - hosts: oo_first_master - vars: - g_pacemaker_upgrade_url_segment: "{{ 'org/latest' if deployment_type =='origin' else '.com/enterprise/3.1' }}" - gather_facts: no - tasks: - - fail: - msg: > - This upgrade is only supported for atomic-enterprise, origin, openshift-enterprise, and online - deployment types - when: deployment_type not in ['atomic-enterprise', 'origin','openshift-enterprise', 'online'] - - - fail: - msg: > - This upgrade does not support Pacemaker: - https://docs.openshift.{{ g_pacemaker_upgrade_url_segment }}/install_config/upgrading/pacemaker_to_native_ha.html - when: openshift.master.cluster_method is defined and openshift.master.cluster_method == 'pacemaker' - - # Error out in situations where the user has older versions specified in their - # inventory in any of the openshift_release, openshift_image_tag, and - # openshift_pkg_version variables. These must be removed or updated to proceed - # with upgrade. - # TODO: Should we block if you're *over* the next major release version as well? - - fail: - msg: > - openshift_pkg_version is {{ openshift_pkg_version }} which is not a - valid version for a {{ openshift_upgrade_target }} upgrade - when: openshift_pkg_version is defined and openshift_pkg_version.split('-',1).1 | version_compare(openshift_upgrade_target ,'<') - - - fail: - msg: > - openshift_image_tag is {{ openshift_image_tag }} which is not a - valid version for a {{ openshift_upgrade_target }} upgrade - when: openshift_image_tag is defined and openshift_image_tag.split('v',1).1 | version_compare(openshift_upgrade_target ,'<') - - - set_fact: - openshift_release: "{{ openshift_release[1:] }}" - when: openshift_release is defined and openshift_release[0] == 'v' - - - fail: - msg: > - openshift_release is {{ openshift_release }} which is not a - valid release for a {{ openshift_upgrade_target }} upgrade - when: openshift_release is defined and not openshift_release | version_compare(openshift_upgrade_target ,'=') - -- include: ../../../common/openshift-cluster/initialize_openshift_version.yml - vars: - # Request specific openshift_release and let the openshift_version role handle converting this - # to a more specific version, respecting openshift_image_tag and openshift_pkg_version if - # defined, and overriding the normal behavior of protecting the installed version - openshift_release: "{{ openshift_upgrade_target }}" - openshift_protect_installed_version: False - # Docker role (a dependency) should be told not to do anything to installed version - # of docker, we handle this separately during upgrade. (the inventory may have a - # docker_version defined, we don't want to actually do it until later) - docker_protect_installed_version: True - -- name: Verify master processes - hosts: oo_masters_to_config - roles: - - openshift_facts - tasks: - - openshift_facts: - role: master - local_facts: - ha: "{{ groups.oo_masters_to_config | length > 1 }}" - - - name: Ensure Master is running - service: - name: "{{ openshift.common.service_type }}-master" - state: started - enabled: yes - when: openshift.master.ha is defined and not openshift.master.ha | bool and openshift.common.is_containerized | bool - - - name: Ensure HA Master is running - service: - name: "{{ openshift.common.service_type }}-master-api" - state: started - enabled: yes - when: openshift.master.ha is defined and openshift.master.ha | bool and openshift.common.is_containerized | bool - - - name: Ensure HA Master is running - service: - name: "{{ openshift.common.service_type }}-master-controllers" - state: started - enabled: yes - when: openshift.master.ha is defined and openshift.master.ha | bool and openshift.common.is_containerized | bool - -- name: Verify node processes - hosts: oo_nodes_to_config - roles: - - openshift_facts - - openshift_docker_facts - tasks: - - name: Ensure Node is running - service: - name: "{{ openshift.common.service_type }}-node" - state: started - enabled: yes - when: openshift.common.is_containerized | bool - -- name: Verify upgrade targets - hosts: oo_masters_to_config:oo_nodes_to_config - vars: - openshift_docker_hosted_registry_network: "{{ hostvars[groups.oo_first_master.0].openshift.common.portal_net }}" - pre_tasks: - - fail: - msg: Verify OpenShift is already installed - when: openshift.common.version is not defined - - - fail: - msg: Verify the correct version was found - when: verify_upgrade_version is defined and openshift_version != verify_upgrade_version - - - name: Clean package cache - command: "{{ ansible_pkg_mgr }} clean all" - when: not openshift.common.is_atomic | bool - - - set_fact: - g_new_service_name: "{{ 'origin' if deployment_type =='origin' else 'atomic-openshift' }}" - when: not openshift.common.is_containerized | bool - - - name: Verify containers are available for upgrade - command: > - docker pull {{ openshift.common.cli_image }}:{{ openshift_image_tag }} - register: pull_result - changed_when: "'Downloaded newer image' in pull_result.stdout" - when: openshift.common.is_containerized | bool - - - name: Check latest available OpenShift RPM version - command: > - {{ repoquery_cmd }} --qf '%{version}' "{{ openshift.common.service_type }}" - failed_when: false - changed_when: false - register: avail_openshift_version - when: not openshift.common.is_containerized | bool - - - name: Verify OpenShift RPMs are available for upgrade - fail: - msg: "OpenShift {{ avail_openshift_version.stdout }} is available, but {{ openshift_upgrade_target }} or greater is required" - when: not openshift.common.is_containerized | bool and not avail_openshift_version | skipped and avail_openshift_version.stdout | default('0.0', True) | version_compare(openshift_release, '<') - - - fail: - msg: "This upgrade playbook must be run against OpenShift {{ openshift_upgrade_min }} or later" - when: deployment_type == 'origin' and openshift.common.version | version_compare(openshift_upgrade_min,'<') - -- name: Verify docker upgrade targets - hosts: oo_masters_to_config:oo_nodes_to_config:oo_etcd_to_config - tasks: - # Only check if docker upgrade is required if docker_upgrade is not - # already set to False. - - include: docker/upgrade_check.yml - when: docker_upgrade is not defined or docker_upgrade | bool and not openshift.common.is_atomic | bool - - # Additional checks for Atomic hosts: - - - name: Determine available Docker - shell: "rpm -q --queryformat '---\ncurr_version: %{VERSION}\navail_version: \n' docker" - register: g_atomic_docker_version_result - when: openshift.common.is_atomic | bool - - - set_fact: - l_docker_version: "{{ g_atomic_docker_version_result.stdout | from_yaml }}" - when: openshift.common.is_atomic | bool - - - fail: - msg: This playbook requires access to Docker 1.10 or later - when: openshift.common.is_atomic | bool and l_docker_version.avail_version | default(l_docker_version.curr_version, true) | version_compare('1.10','<') - - - set_fact: - pre_upgrade_complete: True - - -############################################################################## -# Gate on pre-upgrade checks -############################################################################## -- name: Gate on pre-upgrade checks - hosts: localhost - connection: local - become: no - vars: - pre_upgrade_hosts: "{{ groups.oo_masters_to_config | union(groups.oo_nodes_to_config) }}" - tasks: - - set_fact: - pre_upgrade_completed: "{{ hostvars - | oo_select_keys(pre_upgrade_hosts) - | oo_collect('inventory_hostname', {'pre_upgrade_complete': true}) }}" - - set_fact: - pre_upgrade_failed: "{{ pre_upgrade_hosts | difference(pre_upgrade_completed) }}" - - fail: - msg: "Upgrade cannot continue. The following hosts did not complete pre-upgrade checks: {{ pre_upgrade_failed | join(',') }}" - when: pre_upgrade_failed | length > 0 - -############################################################################### -# Backup etcd -############################################################################### -- name: Backup etcd - hosts: etcd_hosts_to_backup - vars: - embedded_etcd: "{{ hostvars[groups.oo_first_master.0].openshift.master.embedded_etcd }}" - timestamp: "{{ lookup('pipe', 'date +%Y%m%d%H%M%S') }}" - roles: - - openshift_facts - tasks: - # Ensure we persist the etcd role for this host in openshift_facts - - openshift_facts: - role: etcd - local_facts: {} - when: "'etcd' not in openshift" - - - stat: path=/var/lib/openshift - register: var_lib_openshift - - - stat: path=/var/lib/origin - register: var_lib_origin - - - name: Create origin symlink if necessary - file: src=/var/lib/openshift/ dest=/var/lib/origin state=link - when: var_lib_openshift.stat.exists == True and var_lib_origin.stat.exists == False - - # TODO: replace shell module with command and update later checks - # We assume to be using the data dir for all backups. - - name: Check available disk space for etcd backup - shell: df --output=avail -k {{ openshift.common.data_dir }} | tail -n 1 - register: avail_disk - - # TODO: replace shell module with command and update later checks - - name: Check current embedded etcd disk usage - shell: du -k {{ openshift.etcd.etcd_data_dir }} | tail -n 1 | cut -f1 - register: etcd_disk_usage - when: embedded_etcd | bool - - - name: Abort if insufficient disk space for etcd backup - fail: - msg: > - {{ etcd_disk_usage.stdout }} Kb disk space required for etcd backup, - {{ avail_disk.stdout }} Kb available. - when: (embedded_etcd | bool) and (etcd_disk_usage.stdout|int > avail_disk.stdout|int) - - - name: Install etcd (for etcdctl) - action: "{{ ansible_pkg_mgr }} name=etcd state=latest" - when: not openshift.common.is_atomic | bool - - - name: Generate etcd backup - command: > - etcdctl backup --data-dir={{ openshift.etcd.etcd_data_dir }} - --backup-dir={{ openshift.common.data_dir }}/etcd-backup-{{ timestamp }} - - - set_fact: - etcd_backup_complete: True - - - name: Display location of etcd backup - debug: - msg: "Etcd backup created in {{ openshift.common.data_dir }}/etcd-backup-{{ timestamp }}" - - -############################################################################## -# Gate on etcd backup -############################################################################## -- name: Gate on etcd backup - hosts: localhost - connection: local - become: no - tasks: - - set_fact: - etcd_backup_completed: "{{ hostvars - | oo_select_keys(groups.etcd_hosts_to_backup) - | oo_collect('inventory_hostname', {'etcd_backup_complete': true}) }}" - - set_fact: - etcd_backup_failed: "{{ groups.etcd_hosts_to_backup | difference(etcd_backup_completed) }}" - - fail: - msg: "Upgrade cannot continue. The following hosts did not complete etcd backup: {{ etcd_backup_failed | join(',') }}" - when: etcd_backup_failed | length > 0 diff --git a/playbooks/common/openshift-cluster/upgrades/pre/gate_checks.yml b/playbooks/common/openshift-cluster/upgrades/pre/gate_checks.yml new file mode 100644 index 000000000..8ecae4539 --- /dev/null +++ b/playbooks/common/openshift-cluster/upgrades/pre/gate_checks.yml @@ -0,0 +1,6 @@ +--- +- name: Flag pre-upgrade checks complete for hosts without errors + hosts: oo_masters_to_config:oo_nodes_to_upgrade:oo_etcd_to_config + tasks: + - set_fact: + pre_upgrade_complete: True diff --git a/playbooks/common/openshift-cluster/upgrades/pre/roles b/playbooks/common/openshift-cluster/upgrades/pre/roles new file mode 120000 index 000000000..415645be6 --- /dev/null +++ b/playbooks/common/openshift-cluster/upgrades/pre/roles @@ -0,0 +1 @@ +../../../../../roles/
\ No newline at end of file diff --git a/playbooks/common/openshift-cluster/upgrades/pre/verify_control_plane_running.yml b/playbooks/common/openshift-cluster/upgrades/pre/verify_control_plane_running.yml new file mode 100644 index 000000000..06eb5f936 --- /dev/null +++ b/playbooks/common/openshift-cluster/upgrades/pre/verify_control_plane_running.yml @@ -0,0 +1,31 @@ +--- +- name: Verify master processes + hosts: oo_masters_to_config + roles: + - openshift_facts + tasks: + - openshift_facts: + role: master + local_facts: + ha: "{{ groups.oo_masters_to_config | length > 1 }}" + + - name: Ensure Master is running + service: + name: "{{ openshift.common.service_type }}-master" + state: started + enabled: yes + when: openshift.master.ha is defined and not openshift.master.ha | bool and openshift.common.is_containerized | bool + + - name: Ensure HA Master is running + service: + name: "{{ openshift.common.service_type }}-master-api" + state: started + enabled: yes + when: openshift.master.ha is defined and openshift.master.ha | bool and openshift.common.is_containerized | bool + + - name: Ensure HA Master is running + service: + name: "{{ openshift.common.service_type }}-master-controllers" + state: started + enabled: yes + when: openshift.master.ha is defined and openshift.master.ha | bool and openshift.common.is_containerized | bool diff --git a/playbooks/common/openshift-cluster/upgrades/pre/verify_docker_upgrade_targets.yml b/playbooks/common/openshift-cluster/upgrades/pre/verify_docker_upgrade_targets.yml new file mode 100644 index 000000000..ba4d77617 --- /dev/null +++ b/playbooks/common/openshift-cluster/upgrades/pre/verify_docker_upgrade_targets.yml @@ -0,0 +1,23 @@ +--- +- name: Verify docker upgrade targets + hosts: oo_masters_to_config:oo_nodes_to_upgrade:oo_etcd_to_config + tasks: + # Only check if docker upgrade is required if docker_upgrade is not + # already set to False. + - include: ../docker/upgrade_check.yml + when: docker_upgrade is not defined or docker_upgrade | bool and not openshift.common.is_atomic | bool + + # Additional checks for Atomic hosts: + + - name: Determine available Docker + shell: "rpm -q --queryformat '---\ncurr_version: %{VERSION}\navail_version: \n' docker" + register: g_atomic_docker_version_result + when: openshift.common.is_atomic | bool + + - set_fact: + l_docker_version: "{{ g_atomic_docker_version_result.stdout | from_yaml }}" + when: openshift.common.is_atomic | bool + + - fail: + msg: This playbook requires access to Docker 1.10 or later + when: openshift.common.is_atomic | bool and l_docker_version.avail_version | default(l_docker_version.curr_version, true) | version_compare('1.10','<') diff --git a/playbooks/common/openshift-cluster/upgrades/pre/verify_inventory_vars.yml b/playbooks/common/openshift-cluster/upgrades/pre/verify_inventory_vars.yml new file mode 100644 index 000000000..9a959a959 --- /dev/null +++ b/playbooks/common/openshift-cluster/upgrades/pre/verify_inventory_vars.yml @@ -0,0 +1,37 @@ +--- +- name: Verify upgrade can proceed on first master + hosts: oo_first_master + gather_facts: no + tasks: + - fail: + msg: > + This upgrade is only supported for origin, openshift-enterprise, and online + deployment types + when: deployment_type not in ['origin','openshift-enterprise', 'online'] + + # Error out in situations where the user has older versions specified in their + # inventory in any of the openshift_release, openshift_image_tag, and + # openshift_pkg_version variables. These must be removed or updated to proceed + # with upgrade. + # TODO: Should we block if you're *over* the next major release version as well? + - fail: + msg: > + openshift_pkg_version is {{ openshift_pkg_version }} which is not a + valid version for a {{ openshift_upgrade_target }} upgrade + when: openshift_pkg_version is defined and openshift_pkg_version.split('-',1).1 | version_compare(openshift_upgrade_target ,'<') + + - fail: + msg: > + openshift_image_tag is {{ openshift_image_tag }} which is not a + valid version for a {{ openshift_upgrade_target }} upgrade + when: openshift_image_tag is defined and openshift_image_tag.split('v',1).1 | version_compare(openshift_upgrade_target ,'<') + + - set_fact: + openshift_release: "{{ openshift_release[1:] }}" + when: openshift_release is defined and openshift_release[0] == 'v' + + - fail: + msg: > + openshift_release is {{ openshift_release }} which is not a + valid release for a {{ openshift_upgrade_target }} upgrade + when: openshift_release is defined and not openshift_release | version_compare(openshift_upgrade_target ,'=') diff --git a/playbooks/common/openshift-cluster/upgrades/pre/verify_nodes_running.yml b/playbooks/common/openshift-cluster/upgrades/pre/verify_nodes_running.yml new file mode 100644 index 000000000..354af3cde --- /dev/null +++ b/playbooks/common/openshift-cluster/upgrades/pre/verify_nodes_running.yml @@ -0,0 +1,13 @@ +--- +- name: Verify node processes + hosts: oo_nodes_to_config + roles: + - openshift_facts + - openshift_docker_facts + tasks: + - name: Ensure Node is running + service: + name: "{{ openshift.common.service_type }}-node" + state: started + enabled: yes + when: openshift.common.is_containerized | bool diff --git a/playbooks/common/openshift-cluster/upgrades/pre/verify_upgrade_targets.yml b/playbooks/common/openshift-cluster/upgrades/pre/verify_upgrade_targets.yml new file mode 100644 index 000000000..9632626a4 --- /dev/null +++ b/playbooks/common/openshift-cluster/upgrades/pre/verify_upgrade_targets.yml @@ -0,0 +1,45 @@ +--- +- name: Verify upgrade targets + hosts: oo_masters_to_config:oo_nodes_to_upgrade + vars: + openshift_docker_hosted_registry_network: "{{ hostvars[groups.oo_first_master.0].openshift.common.portal_net }}" + pre_tasks: + - fail: + msg: Verify OpenShift is already installed + when: openshift.common.version is not defined + + - fail: + msg: Verify the correct version was found + when: verify_upgrade_version is defined and openshift_version != verify_upgrade_version + + - name: Clean package cache + command: "{{ ansible_pkg_mgr }} clean all" + when: not openshift.common.is_atomic | bool + + - set_fact: + g_new_service_name: "{{ 'origin' if deployment_type =='origin' else 'atomic-openshift' }}" + when: not openshift.common.is_containerized | bool + + - name: Verify containers are available for upgrade + command: > + docker pull {{ openshift.common.cli_image }}:{{ openshift_image_tag }} + register: pull_result + changed_when: "'Downloaded newer image' in pull_result.stdout" + when: openshift.common.is_containerized | bool + + - name: Check latest available OpenShift RPM version + command: > + {{ repoquery_cmd }} --qf '%{version}' "{{ openshift.common.service_type }}" + failed_when: false + changed_when: false + register: avail_openshift_version + when: not openshift.common.is_containerized | bool + + - name: Verify OpenShift RPMs are available for upgrade + fail: + msg: "OpenShift {{ avail_openshift_version.stdout }} is available, but {{ openshift_upgrade_target }} or greater is required" + when: not openshift.common.is_containerized | bool and not avail_openshift_version | skipped and avail_openshift_version.stdout | default('0.0', True) | version_compare(openshift_release, '<') + + - fail: + msg: "This upgrade playbook must be run against OpenShift {{ openshift_upgrade_min }} or later" + when: deployment_type == 'origin' and openshift.common.version | version_compare(openshift_upgrade_min,'<') diff --git a/playbooks/common/openshift-cluster/upgrades/upgrade.yml b/playbooks/common/openshift-cluster/upgrades/upgrade_control_plane.yml index 8a2784fb4..2c641e21e 100644 --- a/playbooks/common/openshift-cluster/upgrades/upgrade.yml +++ b/playbooks/common/openshift-cluster/upgrades/upgrade_control_plane.yml @@ -1,39 +1,93 @@ --- ############################################################################### -# The restart playbook should be run after this playbook completes. +# Upgrade Masters ############################################################################### - -# Separate step so we can execute in parallel and clear out anything unused -# before we get into the serialized upgrade process which will then remove -# remaining images if possible. -- name: Cleanup unused Docker images - hosts: oo_masters_to_config:oo_nodes_to_config:oo_etcd_to_config +- name: Evaluate additional groups for upgrade + hosts: localhost + connection: local + become: no tasks: - - name: Check Docker image count - shell: "docker images -aq | wc -l" - register: docker_image_count - when: docker_upgrade_nuke_images is defined and docker_upgrade_nuke_images | bool + - name: Evaluate etcd_hosts_to_backup + add_host: + name: "{{ item }}" + groups: etcd_hosts_to_backup + with_items: groups.oo_etcd_to_config if groups.oo_etcd_to_config is defined and groups.oo_etcd_to_config | length > 0 else groups.oo_first_master + +- name: Backup etcd + hosts: etcd_hosts_to_backup + vars: + embedded_etcd: "{{ hostvars[groups.oo_first_master.0].openshift.master.embedded_etcd }}" + timestamp: "{{ lookup('pipe', 'date +%Y%m%d%H%M%S') }}" + roles: + - openshift_facts + tasks: + # Ensure we persist the etcd role for this host in openshift_facts + - openshift_facts: + role: etcd + local_facts: {} + when: "'etcd' not in openshift" + + - stat: path=/var/lib/openshift + register: var_lib_openshift + + - stat: path=/var/lib/origin + register: var_lib_origin + + - name: Create origin symlink if necessary + file: src=/var/lib/openshift/ dest=/var/lib/origin state=link + when: var_lib_openshift.stat.exists == True and var_lib_origin.stat.exists == False + + # TODO: replace shell module with command and update later checks + # We assume to be using the data dir for all backups. + - name: Check available disk space for etcd backup + shell: df --output=avail -k {{ openshift.common.data_dir }} | tail -n 1 + register: avail_disk + + # TODO: replace shell module with command and update later checks + - name: Check current embedded etcd disk usage + shell: du -k {{ openshift.etcd.etcd_data_dir }} | tail -n 1 | cut -f1 + register: etcd_disk_usage + when: embedded_etcd | bool + + - name: Abort if insufficient disk space for etcd backup + fail: + msg: > + {{ etcd_disk_usage.stdout }} Kb disk space required for etcd backup, + {{ avail_disk.stdout }} Kb available. + when: (embedded_etcd | bool) and (etcd_disk_usage.stdout|int > avail_disk.stdout|int) + + - name: Install etcd (for etcdctl) + action: "{{ ansible_pkg_mgr }} name=etcd state=latest" + when: not openshift.common.is_atomic | bool + + - name: Generate etcd backup + command: > + etcdctl backup --data-dir={{ openshift.etcd.etcd_data_dir }} + --backup-dir={{ openshift.common.data_dir }}/etcd-backup-{{ timestamp }} - - debug: var=docker_image_count.stdout - when: docker_upgrade_nuke_images is defined and docker_upgrade_nuke_images | bool + - set_fact: + etcd_backup_complete: True - - name: Remove unused Docker images for Docker 1.10+ migration - shell: "docker rmi `docker images -aq`" - # Will fail on images still in use: - failed_when: false - when: docker_upgrade_nuke_images is defined and docker_upgrade_nuke_images | bool + - name: Display location of etcd backup + debug: + msg: "Etcd backup created in {{ openshift.common.data_dir }}/etcd-backup-{{ timestamp }}" - - name: Check Docker image count - shell: "docker images -aq | wc -l" - register: docker_image_count - when: docker_upgrade_nuke_images is defined and docker_upgrade_nuke_images | bool - - debug: var=docker_image_count.stdout - when: docker_upgrade_nuke_images is defined and docker_upgrade_nuke_images | bool +- name: Gate on etcd backup + hosts: localhost + connection: local + become: no + tasks: + - set_fact: + etcd_backup_completed: "{{ hostvars + | oo_select_keys(groups.etcd_hosts_to_backup) + | oo_collect('inventory_hostname', {'etcd_backup_complete': true}) }}" + - set_fact: + etcd_backup_failed: "{{ groups.etcd_hosts_to_backup | difference(etcd_backup_completed) }}" + - fail: + msg: "Upgrade cannot continue. The following hosts did not complete etcd backup: {{ etcd_backup_failed | join(',') }}" + when: etcd_backup_failed | length > 0 -############################################################################### -# Upgrade Masters -############################################################################### - name: Upgrade master packages hosts: oo_masters_to_config handlers: @@ -57,7 +111,6 @@ # Create service signer cert when missing. Service signer certificate # is added to master config in the master config hook for v3_3. - include: create_service_signer_cert.yml - when: not (hostvars[groups.oo_first_master.0].service_signer_cert_stat.stat.exists | bool) - name: Upgrade master config and systemd units hosts: oo_masters_to_config @@ -143,9 +196,9 @@ origin_reconcile_bindings: "{{ deployment_type == 'origin' and openshift_version | version_compare('1.0.6', '>') }}" ent_reconcile_bindings: true openshift_docker_hosted_registry_network: "{{ hostvars[groups.oo_first_master.0].openshift.common.portal_net }}" - # Similar to pre.yml, we don't want to upgrade docker during the openshift_cli role, - # it will be updated when we perform node upgrade. - docker_protect_installed_version: True + # Another spot where we assume docker is running and do not want to accidentally trigger an unsafe + # restart. + skip_docker_role: True tasks: - name: Verifying the correct commandline tools are available shell: grep {{ verify_upgrade_version }} {{ openshift.common.admin_binary}} @@ -177,71 +230,6 @@ - set_fact: reconcile_complete: True -############################################################################### -# Upgrade Nodes -############################################################################### - -# Here we handle all tasks that might require a node evac. (upgrading docker, and the node service) -- name: Perform upgrades that may require node evacuation - hosts: oo_masters_to_config:oo_etcd_to_config:oo_nodes_to_config - serial: 1 - any_errors_fatal: true - roles: - - openshift_facts - handlers: - - include: ../../../../roles/openshift_node/handlers/main.yml - static: yes - tasks: - # TODO: To better handle re-trying failed upgrades, it would be nice to check if the node - # or docker actually needs an upgrade before proceeding. Perhaps best to save this until - # we merge upgrade functionality into the base roles and a normal config.yml playbook run. - - name: Determine if node is currently scheduleable - command: > - {{ openshift.common.client_binary }} get node {{ openshift.node.nodename }} -o json - register: node_output - delegate_to: "{{ groups.oo_first_master.0 }}" - changed_when: false - when: inventory_hostname in groups.oo_nodes_to_config - - - set_fact: - was_schedulable: "{{ 'unschedulable' not in (node_output.stdout | from_json).spec }}" - when: inventory_hostname in groups.oo_nodes_to_config - - - name: Mark unschedulable if host is a node - command: > - {{ openshift.common.admin_binary }} manage-node {{ openshift.node.nodename }} --schedulable=false - delegate_to: "{{ groups.oo_first_master.0 }}" - when: inventory_hostname in groups.oo_nodes_to_config - - - name: Evacuate Node for Kubelet upgrade - command: > - {{ openshift.common.admin_binary }} manage-node {{ openshift.node.nodename }} --evacuate --force - delegate_to: "{{ groups.oo_first_master.0 }}" - when: inventory_hostname in groups.oo_nodes_to_config - - - include: docker/upgrade.yml - when: l_docker_upgrade is defined and l_docker_upgrade | bool and not openshift.common.is_atomic | bool - - include: "{{ node_config_hook }}" - when: node_config_hook is defined and inventory_hostname in groups.oo_nodes_to_config - - - include: rpm_upgrade.yml - vars: - component: "node" - openshift_version: "{{ openshift_pkg_version | default('') }}" - when: inventory_hostname in groups.oo_nodes_to_config and not openshift.common.is_containerized | bool - - - include: containerized_node_upgrade.yml - when: inventory_hostname in groups.oo_nodes_to_config and openshift.common.is_containerized | bool - - - meta: flush_handlers - - - name: Set node schedulability - command: > - {{ openshift.common.admin_binary }} manage-node {{ openshift.node.nodename }} --schedulable=true - delegate_to: "{{ groups.oo_first_master.0 }}" - when: inventory_hostname in groups.oo_nodes_to_config and was_schedulable | bool - - ############################################################################## # Gate on reconcile ############################################################################## @@ -259,3 +247,13 @@ - fail: msg: "Upgrade cannot continue. The following masters did not finish reconciling: {{ reconcile_failed | join(',') }}" when: reconcile_failed | length > 0 + +- name: Upgrade Docker on dedicated containerized etcd hosts + hosts: oo_etcd_to_config:!oo_nodes_to_upgrade + serial: 1 + any_errors_fatal: true + roles: + - openshift_facts + tasks: + - include: docker/upgrade.yml + when: l_docker_upgrade is defined and l_docker_upgrade | bool and not openshift.common.is_atomic | bool diff --git a/playbooks/common/openshift-cluster/upgrades/upgrade_nodes.yml b/playbooks/common/openshift-cluster/upgrades/upgrade_nodes.yml new file mode 100644 index 000000000..9b572dcdf --- /dev/null +++ b/playbooks/common/openshift-cluster/upgrades/upgrade_nodes.yml @@ -0,0 +1,75 @@ +--- +- name: Evacuate and upgrade nodes + hosts: oo_nodes_to_upgrade + # This var must be set with -e on invocation, as it is not a per-host inventory var + # and is evaluated early. Values such as "20%" can also be used. + serial: "{{ openshift_upgrade_nodes_serial | default(1) }}" + any_errors_fatal: true + roles: + - openshift_facts + - docker + handlers: + - include: ../../../../roles/openshift_node/handlers/main.yml + static: yes + pre_tasks: + # TODO: To better handle re-trying failed upgrades, it would be nice to check if the node + # or docker actually needs an upgrade before proceeding. Perhaps best to save this until + # we merge upgrade functionality into the base roles and a normal config.yml playbook run. + - name: Determine if node is currently scheduleable + command: > + {{ openshift.common.client_binary }} get node {{ openshift.node.nodename | lower }} -o json + register: node_output + delegate_to: "{{ groups.oo_first_master.0 }}" + changed_when: false + when: inventory_hostname in groups.oo_nodes_to_upgrade + + - set_fact: + was_schedulable: "{{ 'unschedulable' not in (node_output.stdout | from_json).spec }}" + when: inventory_hostname in groups.oo_nodes_to_upgrade + + - name: Mark unschedulable if host is a node + command: > + {{ openshift.common.admin_binary }} manage-node {{ openshift.node.nodename | lower }} --schedulable=false + delegate_to: "{{ groups.oo_first_master.0 }}" + when: inventory_hostname in groups.oo_nodes_to_upgrade + # NOTE: There is a transient "object has been modified" error here, allow a couple + # retries for a more reliable upgrade. + register: node_unsched + until: node_unsched.rc == 0 + retries: 3 + delay: 1 + + - name: Evacuate Node for Kubelet upgrade + command: > + {{ openshift.common.admin_binary }} manage-node {{ openshift.node.nodename | lower }} --evacuate --force + delegate_to: "{{ groups.oo_first_master.0 }}" + when: inventory_hostname in groups.oo_nodes_to_upgrade + tasks: + - include: docker/upgrade.yml + when: l_docker_upgrade is defined and l_docker_upgrade | bool and not openshift.common.is_atomic | bool + + - include: "{{ node_config_hook }}" + when: node_config_hook is defined and inventory_hostname in groups.oo_nodes_to_upgrade + + - include: rpm_upgrade.yml + vars: + component: "node" + openshift_version: "{{ openshift_pkg_version | default('') }}" + when: inventory_hostname in groups.oo_nodes_to_upgrade and not openshift.common.is_containerized | bool + + - include: containerized_node_upgrade.yml + when: inventory_hostname in groups.oo_nodes_to_upgrade and openshift.common.is_containerized | bool + + - meta: flush_handlers + + - name: Set node schedulability + command: > + {{ openshift.common.admin_binary }} manage-node {{ openshift.node.nodename | lower }} --schedulable=true + delegate_to: "{{ groups.oo_first_master.0 }}" + when: inventory_hostname in groups.oo_nodes_to_upgrade and was_schedulable | bool + register: node_sched + until: node_sched.rc == 0 + retries: 3 + delay: 1 + + |