diff options
Diffstat (limited to 'playbooks/common')
34 files changed, 382 insertions, 251 deletions
diff --git a/playbooks/common/openshift-cluster/additional_config.yml b/playbooks/common/openshift-cluster/additional_config.yml index 26b31d313..825f46415 100644 --- a/playbooks/common/openshift-cluster/additional_config.yml +++ b/playbooks/common/openshift-cluster/additional_config.yml @@ -11,6 +11,8 @@ - role: openshift_examples registry_url: "{{ openshift.master.registry_url }}" when: openshift.common.install_examples | bool + - role: openshift_hosted_templates + registry_url: "{{ openshift.master.registry_url }}" - role: openshift_manageiq when: openshift.common.use_manageiq | bool - role: cockpit diff --git a/playbooks/common/openshift-cluster/initialize_openshift_version.yml b/playbooks/common/openshift-cluster/initialize_openshift_version.yml index 7112a6084..2f384ddea 100644 --- a/playbooks/common/openshift-cluster/initialize_openshift_version.yml +++ b/playbooks/common/openshift-cluster/initialize_openshift_version.yml @@ -1,5 +1,21 @@ --- # NOTE: requires openshift_facts be run +- hosts: l_oo_all_hosts + gather_facts: no + tasks: + # See: + # https://bugzilla.redhat.com/show_bug.cgi?id=1395047 + # https://bugzilla.redhat.com/show_bug.cgi?id=1282961 + # https://github.com/openshift/openshift-ansible/issues/1138 + - name: Check for bad combinations of yum and subscription-manager + command: > + {{ repoquery_cmd }} --installed --qf '%{version}' "yum" + register: yum_ver_test + changed_when: false + - fail: + msg: Incompatible versions of yum and subscription-manager found. You may need to update yum and yum-utils. + when: "'Plugin \"search-disabled-repos\" requires API 2.7. Supported API is 2.6.' in yum_ver_test.stdout" + - name: Determine openshift_version to configure on first master hosts: oo_first_master roles: diff --git a/playbooks/common/openshift-cluster/openshift_hosted.yml b/playbooks/common/openshift-cluster/openshift_hosted.yml index 2ba7fded5..ccbba54b4 100644 --- a/playbooks/common/openshift-cluster/openshift_hosted.yml +++ b/playbooks/common/openshift-cluster/openshift_hosted.yml @@ -56,13 +56,13 @@ openshift_hosted_logging_ops_hostname: "{{ logging_ops_hostname }}" openshift_hosted_logging_master_public_url: "{{ logging_master_public_url }}" openshift_hosted_logging_elasticsearch_cluster_size: "{{ logging_elasticsearch_cluster_size }}" - openshift_hosted_logging_elasticsearch_pvc_dynamic: "{{ 'true' if openshift.hosted.logging.storage_kind | default(none) == 'dynamic' else 'false' }}" - openshift_hosted_logging_elasticsearch_pvc_size: "{{ openshift.hosted.logging.storage.volume.size if openshift.hosted.logging.storage_kind | default(none) == 'dynamic' else '' }}" - openshift_hosted_logging_elasticsearch_pvc_prefix: "{{ 'logging-es' if openshift.hosted.logging.storage_kind | default(none) is not none else '' }}" + openshift_hosted_logging_elasticsearch_pvc_dynamic: "{{ 'true' if openshift_hosted_logging_storage_kind | default(none) == 'dynamic' else '' }}" + openshift_hosted_logging_elasticsearch_pvc_size: "{{ openshift.hosted.logging.storage.volume.size if openshift_hosted_logging_storage_kind | default(none) in ['dynamic','nfs'] else '' }}" + openshift_hosted_logging_elasticsearch_pvc_prefix: "{{ 'logging-es' if openshift_hosted_logging_storage_kind | default(none) == 'dynamic' else '' }}" openshift_hosted_logging_elasticsearch_ops_cluster_size: "{{ logging_elasticsearch_ops_cluster_size }}" - openshift_hosted_logging_elasticsearch_ops_pvc_dynamic: "{{ 'true' if openshift.hosted.logging.storage_kind | default(none) == 'dynamic' else 'false' }}" - openshift_hosted_logging_elasticsearch_ops_pvc_size: "{{ openshift.hosted.logging.storage.volume.size if openshift.hosted.logging.storage_kind | default(none) == 'dynamic' else '' }}" - openshift_hosted_logging_elasticsearch_ops_pvc_prefix: "{{ 'logging-es' if openshift.hosted.logging.storage_kind | default(none) is not none else '' }}" + openshift_hosted_logging_elasticsearch_ops_pvc_dynamic: "{{ 'true' if openshift_hosted_logging_storage_kind | default(none) == 'dynamic' else '' }}" + openshift_hosted_logging_elasticsearch_ops_pvc_size: "{{ openshift.hosted.logging.storage.volume.size if openshift_hosted_logging_storage_kind | default(none) in ['dynamic','nfs' ] else '' }}" + openshift_hosted_logging_elasticsearch_ops_pvc_prefix: "{{ 'logging-es' if openshift_hosted_logging_storage_kind | default(none) =='dynamic' else '' }}" - role: cockpit-ui - when: ( openshift.common.version_gte_3_3_or_1_3 | bool ) and ( openshift_hosted_manage_registry | default(true) | bool ) + when: ( openshift.common.version_gte_3_3_or_1_3 | bool ) and ( openshift_hosted_manage_registry | default(true) | bool ) and not (openshift.docker.hosted_registry_insecure | default(false) | bool) diff --git a/playbooks/common/openshift-cluster/redeploy-certificates.yml b/playbooks/common/openshift-cluster/redeploy-certificates.yml index 4996c56a7..5f008a045 100644 --- a/playbooks/common/openshift-cluster/redeploy-certificates.yml +++ b/playbooks/common/openshift-cluster/redeploy-certificates.yml @@ -224,7 +224,7 @@ - name: Prepare for node evacuation command: > - {{ openshift.common.admin_binary }} --config={{ hostvars[groups.oo_first_master.0].mktemp.stdout }}/admin.kubeconfig + {{ openshift.common.client_binary }} adm --config={{ hostvars[groups.oo_first_master.0].mktemp.stdout }}/admin.kubeconfig manage-node {{ openshift.node.nodename }} --schedulable=false delegate_to: "{{ groups.oo_first_master.0 }}" @@ -232,7 +232,7 @@ - name: Evacuate node command: > - {{ openshift.common.admin_binary }} --config={{ hostvars[groups.oo_first_master.0].mktemp.stdout }}/admin.kubeconfig + {{ openshift.common.client_binary }} adm --config={{ hostvars[groups.oo_first_master.0].mktemp.stdout }}/admin.kubeconfig manage-node {{ openshift.node.nodename }} --evacuate --force delegate_to: "{{ groups.oo_first_master.0 }}" @@ -240,7 +240,7 @@ - name: Set node schedulability command: > - {{ openshift.common.admin_binary }} --config={{ hostvars[groups.oo_first_master.0].mktemp.stdout }}/admin.kubeconfig + {{ openshift.common.client_binary }} adm --config={{ hostvars[groups.oo_first_master.0].mktemp.stdout }}/admin.kubeconfig manage-node {{ openshift.node.nodename }} --schedulable=true delegate_to: "{{ groups.oo_first_master.0 }}" when: openshift_certificates_redeploy_ca | default(false) | bool and was_schedulable | bool diff --git a/playbooks/common/openshift-cluster/upgrades/containerized_node_upgrade.yml b/playbooks/common/openshift-cluster/upgrades/containerized_node_upgrade.yml index 32a3636aa..439df5ffd 100644 --- a/playbooks/common/openshift-cluster/upgrades/containerized_node_upgrade.yml +++ b/playbooks/common/openshift-cluster/upgrades/containerized_node_upgrade.yml @@ -1,5 +1,3 @@ -- include_vars: ../../../../roles/openshift_node/vars/main.yml - - name: Update systemd units include: ../../../../roles/openshift_node/tasks/systemd_units.yml openshift_version={{ openshift_image_tag }} diff --git a/playbooks/common/openshift-cluster/upgrades/create_service_signer_cert.yml b/playbooks/common/openshift-cluster/upgrades/create_service_signer_cert.yml index 78f6c46f3..23cf8cf76 100644 --- a/playbooks/common/openshift-cluster/upgrades/create_service_signer_cert.yml +++ b/playbooks/common/openshift-cluster/upgrades/create_service_signer_cert.yml @@ -22,11 +22,11 @@ - name: Create service signer certificate command: > - {{ openshift.common.admin_binary }} ca create-signer-cert - --cert=service-signer.crt - --key=service-signer.key - --name=openshift-service-serving-signer - --serial=service-signer.serial.txt + {{ openshift.common.client_binary }} adm ca create-signer-cert + --cert="{{ remote_cert_create_tmpdir.stdout }}/"service-signer.crt + --key="{{ remote_cert_create_tmpdir.stdout }}/"service-signer.key + --name="{{ remote_cert_create_tmpdir.stdout }}/"openshift-service-serving-signer + --serial="{{ remote_cert_create_tmpdir.stdout }}/"service-signer.serial.txt args: chdir: "{{ remote_cert_create_tmpdir.stdout }}/" when: not (hostvars[groups.oo_first_master.0].service_signer_cert_stat.stat.exists | bool) diff --git a/playbooks/common/openshift-cluster/upgrades/docker/upgrade_check.yml b/playbooks/common/openshift-cluster/upgrades/docker/upgrade_check.yml index fc26d029e..ee75aa853 100644 --- a/playbooks/common/openshift-cluster/upgrades/docker/upgrade_check.yml +++ b/playbooks/common/openshift-cluster/upgrades/docker/upgrade_check.yml @@ -22,22 +22,24 @@ command: > {{ repoquery_cmd }} --qf '%{version}' "docker" register: avail_docker_version + # Don't expect docker rpm to be available on hosts that don't already have it installed: + when: pkg_check.rc == 0 failed_when: false changed_when: false - fail: msg: This playbook requires access to Docker 1.10 or later # Disable the 1.10 requirement if the user set a specific Docker version - when: docker_version is not defined and (docker_upgrade is not defined or docker_upgrade | bool == True) and (avail_docker_version.stdout == "" or avail_docker_version.stdout | version_compare('1.10','<')) + when: docker_version is not defined and (docker_upgrade is not defined or docker_upgrade | bool == True) and (pkg_check.rc == 0 and (avail_docker_version.stdout == "" or avail_docker_version.stdout | version_compare('1.10','<'))) # Default l_docker_upgrade to False, we'll set to True if an upgrade is required: - set_fact: l_docker_upgrade: False -# Make sure a docker_verison is set if none was requested: +# Make sure a docker_version is set if none was requested: - set_fact: docker_version: "{{ avail_docker_version.stdout }}" - when: docker_version is not defined + when: pkg_check.rc == 0 and docker_version is not defined - name: Flag for Docker upgrade if necessary set_fact: diff --git a/playbooks/common/openshift-cluster/upgrades/etcd/backup.yml b/playbooks/common/openshift-cluster/upgrades/etcd/backup.yml new file mode 100644 index 000000000..57b156b1c --- /dev/null +++ b/playbooks/common/openshift-cluster/upgrades/etcd/backup.yml @@ -0,0 +1,73 @@ +- name: Backup etcd + hosts: etcd_hosts_to_backup + vars: + embedded_etcd: "{{ hostvars[groups.oo_first_master.0].openshift.master.embedded_etcd }}" + timestamp: "{{ lookup('pipe', 'date +%Y%m%d%H%M%S') }}" + roles: + - openshift_facts + tasks: + # Ensure we persist the etcd role for this host in openshift_facts + - openshift_facts: + role: etcd + local_facts: {} + when: "'etcd' not in openshift" + + - stat: path=/var/lib/openshift + register: var_lib_openshift + + - stat: path=/var/lib/origin + register: var_lib_origin + + - name: Create origin symlink if necessary + file: src=/var/lib/openshift/ dest=/var/lib/origin state=link + when: var_lib_openshift.stat.exists == True and var_lib_origin.stat.exists == False + + # TODO: replace shell module with command and update later checks + # We assume to be using the data dir for all backups. + - name: Check available disk space for etcd backup + shell: df --output=avail -k {{ openshift.common.data_dir }} | tail -n 1 + register: avail_disk + + # TODO: replace shell module with command and update later checks + - name: Check current embedded etcd disk usage + shell: du -k {{ openshift.etcd.etcd_data_dir }} | tail -n 1 | cut -f1 + register: etcd_disk_usage + when: embedded_etcd | bool + + - name: Abort if insufficient disk space for etcd backup + fail: + msg: > + {{ etcd_disk_usage.stdout }} Kb disk space required for etcd backup, + {{ avail_disk.stdout }} Kb available. + when: (embedded_etcd | bool) and (etcd_disk_usage.stdout|int > avail_disk.stdout|int) + + - name: Install etcd (for etcdctl) + action: "{{ ansible_pkg_mgr }} name=etcd state=present" + when: not openshift.common.is_atomic | bool + + - name: Generate etcd backup + command: > + etcdctl backup --data-dir={{ openshift.etcd.etcd_data_dir }} + --backup-dir={{ openshift.common.data_dir }}/etcd-backup-{{ backup_tag | default('') }}{{ timestamp }} + + - set_fact: + etcd_backup_complete: True + + - name: Display location of etcd backup + debug: + msg: "Etcd backup created in {{ openshift.common.data_dir }}/etcd-backup-{{ backup_tag | default('') }}{{ timestamp }}" + +- name: Gate on etcd backup + hosts: localhost + connection: local + become: no + tasks: + - set_fact: + etcd_backup_completed: "{{ hostvars + | oo_select_keys(groups.etcd_hosts_to_backup) + | oo_collect('inventory_hostname', {'etcd_backup_complete': true}) }}" + - set_fact: + etcd_backup_failed: "{{ groups.etcd_hosts_to_backup | difference(etcd_backup_completed) }}" + - fail: + msg: "Upgrade cannot continue. The following hosts did not complete etcd backup: {{ etcd_backup_failed | join(',') }}" + when: etcd_backup_failed | length > 0 diff --git a/playbooks/common/openshift-cluster/upgrades/etcd/containerized_tasks.yml b/playbooks/common/openshift-cluster/upgrades/etcd/containerized_tasks.yml new file mode 100644 index 000000000..35f391f8c --- /dev/null +++ b/playbooks/common/openshift-cluster/upgrades/etcd/containerized_tasks.yml @@ -0,0 +1,47 @@ +--- +- name: Verify cluster is healthy pre-upgrade + command: "etcdctl --cert-file /etc/etcd/peer.crt --key-file /etc/etcd/peer.key --ca-file /etc/etcd/ca.crt -C https://{{ openshift.common.hostname }}:2379 cluster-health" + +- name: Get current image + shell: grep 'ExecStart=' /etc/systemd/system/etcd_container.service | awk '{print $NF}' + register: current_image + +- name: Set new_etcd_image + set_fact: + new_etcd_image: "{{ current_image.stdout | regex_replace('/etcd.*$','/etcd3:' ~ upgrade_version ) if upgrade_version | version_compare('3.0','>=') + else current_image.stdout.split(':')[0] ~ ':' ~ upgrade_version }}" + +- name: Pull new etcd image + command: "docker pull {{ new_etcd_image }}" + +- name: Update to latest etcd image + replace: + dest: /etc/systemd/system/etcd_container.service + regexp: "{{ current_image.stdout }}$" + replace: "{{ new_etcd_image }}" + +- name: Restart etcd_container + systemd: + name: etcd_container + daemon_reload: yes + state: restarted + +## TODO: probably should just move this into the backup playbooks, also this +## will fail on atomic host. We need to revisit how to do etcd backups there as +## the container may be newer than etcdctl on the host. Assumes etcd3 obsoletes etcd (7.3.1) +- name: Upgrade etcd for etcdctl when not atomic + action: "{{ ansible_pkg_mgr }} name=etcd ensure=latest" + when: not openshift.common.is_atomic | bool + +- name: Verify cluster is healthy + command: "etcdctl --cert-file /etc/etcd/peer.crt --key-file /etc/etcd/peer.key --ca-file /etc/etcd/ca.crt -C https://{{ openshift.common.hostname }}:2379 cluster-health" + register: etcdctl + until: etcdctl.rc == 0 + retries: 3 + delay: 10 + +- name: Store new etcd_image + openshift_facts: + role: etcd + local_facts: + etcd_image: "{{ new_etcd_image }}" diff --git a/playbooks/common/openshift-cluster/upgrades/etcd/fedora_tasks.yml b/playbooks/common/openshift-cluster/upgrades/etcd/fedora_tasks.yml new file mode 100644 index 000000000..30232110e --- /dev/null +++ b/playbooks/common/openshift-cluster/upgrades/etcd/fedora_tasks.yml @@ -0,0 +1,23 @@ +--- +# F23 GA'd with etcd 2.0, currently has 2.2 in updates +# F24 GA'd with etcd-2.2, currently has 2.2 in updates +# F25 Beta currently has etcd 3.0 +- name: Verify cluster is healthy pre-upgrade + command: "etcdctl --cert-file /etc/etcd/peer.crt --key-file /etc/etcd/peer.key --ca-file /etc/etcd/ca.crt -C https://{{ openshift.common.hostname }}:2379 cluster-health" + +- name: Update etcd + package: + name: "etcd" + state: "latest" + +- name: Restart etcd + service: + name: etcd + state: restarted + +- name: Verify cluster is healthy + command: "etcdctl --cert-file /etc/etcd/peer.crt --key-file /etc/etcd/peer.key --ca-file /etc/etcd/ca.crt -C https://{{ openshift.common.hostname }}:2379 cluster-health" + register: etcdctl + until: etcdctl.rc == 0 + retries: 3 + delay: 10 diff --git a/playbooks/common/openshift-cluster/upgrades/etcd/files/etcdctl.sh b/playbooks/common/openshift-cluster/upgrades/etcd/files/etcdctl.sh new file mode 120000 index 000000000..641e04e44 --- /dev/null +++ b/playbooks/common/openshift-cluster/upgrades/etcd/files/etcdctl.sh @@ -0,0 +1 @@ +../roles/etcd/files/etcdctl.sh
\ No newline at end of file diff --git a/playbooks/common/openshift-cluster/upgrades/etcd/filter_plugins b/playbooks/common/openshift-cluster/upgrades/etcd/filter_plugins new file mode 120000 index 000000000..27ddaa18b --- /dev/null +++ b/playbooks/common/openshift-cluster/upgrades/etcd/filter_plugins @@ -0,0 +1 @@ +../../../../../filter_plugins
\ No newline at end of file diff --git a/playbooks/common/openshift-cluster/upgrades/etcd/lookup_plugins b/playbooks/common/openshift-cluster/upgrades/etcd/lookup_plugins new file mode 120000 index 000000000..cf407f69b --- /dev/null +++ b/playbooks/common/openshift-cluster/upgrades/etcd/lookup_plugins @@ -0,0 +1 @@ +../../../../../lookup_plugins
\ No newline at end of file diff --git a/playbooks/common/openshift-cluster/upgrades/etcd/main.yml b/playbooks/common/openshift-cluster/upgrades/etcd/main.yml new file mode 100644 index 000000000..cce844403 --- /dev/null +++ b/playbooks/common/openshift-cluster/upgrades/etcd/main.yml @@ -0,0 +1,122 @@ +--- +# For 1.4/3.4 we want to upgrade everyone to etcd-3.0. etcd docs say to +# upgrade from 2.0.x to 2.1.x to 2.2.x to 2.3.x to 3.0.x. While this is a tedius +# task for RHEL and CENTOS it's simply not possible in Fedora unless you've +# mirrored packages on your own because only the GA and latest versions are +# available in the repos. So for Fedora we'll simply skip this, sorry. + +- include: ../../evaluate_groups.yml + tags: + - always + +- name: Evaluate additional groups for upgrade + hosts: localhost + connection: local + become: no + tasks: + - name: Evaluate etcd_hosts_to_upgrade + add_host: + name: "{{ item }}" + groups: etcd_hosts_to_upgrade, etcd_hosts_to_backup + with_items: "{{ groups.oo_etcd_to_config if groups.oo_etcd_to_config is defined and groups.oo_etcd_to_config | length > 0 else groups.oo_first_master }}" + +- name: Backup etcd before upgrading anything + include: backup.yml + vars: + backup_tag: "pre-upgrade-" + +- name: Drop etcdctl profiles + hosts: etcd_hosts_to_upgrade + tasks: + - include: roles/etcd/tasks/etcdctl.yml + +- name: Determine etcd version + hosts: etcd_hosts_to_upgrade + tasks: + - name: Record RPM based etcd version + command: rpm -qa --qf '%{version}' etcd\* + register: etcd_installed_version + failed_when: false + when: not openshift.common.is_containerized | bool + - name: Record containerized etcd version + command: docker exec etcd_container rpm -qa --qf '%{version}' etcd\* + register: etcd_installed_version + failed_when: false + when: openshift.common.is_containerized | bool + +# I really dislike this copy/pasta but I wasn't able to find a way to get it to loop +# through hosts, then loop through tasks only when appropriate +- name: Upgrade to 2.1 + hosts: etcd_hosts_to_upgrade + serial: 1 + vars: + upgrade_version: '2.1' + tasks: + - include: rhel_tasks.yml + when: etcd_installed_version.stdout | default('99') | version_compare('2.1','<') and ansible_distribution == 'RedHat' and not openshift.common.is_containerized | bool + +- name: Upgrade RPM hosts to 2.2 + hosts: etcd_hosts_to_upgrade + serial: 1 + vars: + upgrade_version: '2.2' + tasks: + - include: rhel_tasks.yml + when: etcd_installed_version.stdout | default('99') | version_compare('2.2','<') and ansible_distribution == 'RedHat' and not openshift.common.is_containerized | bool + +- name: Upgrade containerized hosts to 2.2.5 + hosts: etcd_hosts_to_upgrade + serial: 1 + vars: + upgrade_version: 2.2.5 + tasks: + - include: containerized_tasks.yml + when: etcd_installed_version.stdout | default('99') | version_compare('2.2','<') and openshift.common.is_containerized | bool + +- name: Upgrade RPM hosts to 2.3 + hosts: etcd_hosts_to_upgrade + serial: 1 + vars: + upgrade_version: '2.3' + tasks: + - include: rhel_tasks.yml + when: etcd_installed_version.stdout | default('99') | version_compare('2.3','<') and ansible_distribution == 'RedHat' and not openshift.common.is_containerized | bool + +- name: Upgrade containerized hosts to 2.3.7 + hosts: etcd_hosts_to_upgrade + serial: 1 + vars: + upgrade_version: 2.3.7 + tasks: + - include: containerized_tasks.yml + when: etcd_installed_version.stdout | default('99') | version_compare('2.3','<') and openshift.common.is_containerized | bool + +- name: Upgrade RPM hosts to 3.0 + hosts: etcd_hosts_to_upgrade + serial: 1 + vars: + upgrade_version: '3.0' + tasks: + - include: rhel_tasks.yml + when: etcd_installed_version.stdout | default('99') | version_compare('3.0','<') and ansible_distribution == 'RedHat' and not openshift.common.is_containerized | bool + +- name: Upgrade containerized hosts to etcd3 image + hosts: etcd_hosts_to_upgrade + serial: 1 + vars: + upgrade_version: 3.0.3 + tasks: + - include: containerized_tasks.yml + when: etcd_installed_version.stdout | default('99') | version_compare('3.0','<') and openshift.common.is_containerized | bool + +- name: Upgrade fedora to latest + hosts: etcd_hosts_to_upgrade + serial: 1 + tasks: + - include: fedora_tasks.yml + when: ansible_distribution == 'Fedora' and not openshift.common.is_containerized | bool + +- name: Backup etcd + include: backup.yml + vars: + backup_tag: "post-3.0-" diff --git a/playbooks/common/openshift-cluster/upgrades/etcd/rhel_tasks.yml b/playbooks/common/openshift-cluster/upgrades/etcd/rhel_tasks.yml new file mode 100644 index 000000000..8e7dc9d9b --- /dev/null +++ b/playbooks/common/openshift-cluster/upgrades/etcd/rhel_tasks.yml @@ -0,0 +1,23 @@ +--- +- name: Verify cluster is healthy pre-upgrade + command: "etcdctl --cert-file /etc/etcd/peer.crt --key-file /etc/etcd/peer.key --ca-file /etc/etcd/ca.crt -C https://{{ openshift.common.hostname }}:2379 cluster-health" + +- name: Update etcd package but exclude etcd3 + command: "{{ ansible_pkg_mgr }} install -y etcd-{{ upgrade_version }}\\* --exclude etcd3" + when: upgrade_version | version_compare('3.0','<') + +- name: Update etcd package not excluding etcd3 + command: "{{ ansible_pkg_mgr }} install -y etcd3-{{ upgrade_version }}\\*" + when: not upgrade_version | version_compare('3.0','<') + +- name: Restart etcd + service: + name: etcd + state: restarted + +- name: Verify cluster is healthy + command: "etcdctl --cert-file /etc/etcd/peer.crt --key-file /etc/etcd/peer.key --ca-file /etc/etcd/ca.crt -C https://{{ openshift.common.hostname }}:2379 cluster-health" + register: etcdctl + until: etcdctl.rc == 0 + retries: 3 + delay: 10 diff --git a/playbooks/common/openshift-cluster/upgrades/etcd/roles b/playbooks/common/openshift-cluster/upgrades/etcd/roles new file mode 120000 index 000000000..6bc1a7aef --- /dev/null +++ b/playbooks/common/openshift-cluster/upgrades/etcd/roles @@ -0,0 +1 @@ +../../../../../roles
\ No newline at end of file diff --git a/playbooks/common/openshift-cluster/upgrades/init.yml b/playbooks/common/openshift-cluster/upgrades/init.yml index f3b3abe0d..fbdb7900a 100644 --- a/playbooks/common/openshift-cluster/upgrades/init.yml +++ b/playbooks/common/openshift-cluster/upgrades/init.yml @@ -10,7 +10,7 @@ - add_host: name: "{{ item }}" groups: l_oo_all_hosts - with_items: g_all_hosts | default([]) + with_items: "{{ g_all_hosts | default([]) }}" - hosts: l_oo_all_hosts gather_facts: no diff --git a/playbooks/common/openshift-cluster/upgrades/openvswitch-avoid-oom.conf b/playbooks/common/openshift-cluster/upgrades/openvswitch-avoid-oom.conf new file mode 120000 index 000000000..514526fe2 --- /dev/null +++ b/playbooks/common/openshift-cluster/upgrades/openvswitch-avoid-oom.conf @@ -0,0 +1 @@ +../../../../roles/openshift_node/templates/openvswitch-avoid-oom.conf
\ No newline at end of file diff --git a/playbooks/common/openshift-cluster/upgrades/post_control_plane.yml b/playbooks/common/openshift-cluster/upgrades/post_control_plane.yml index e43954453..2bbcbe1f8 100644 --- a/playbooks/common/openshift-cluster/upgrades/post_control_plane.yml +++ b/playbooks/common/openshift-cluster/upgrades/post_control_plane.yml @@ -17,10 +17,14 @@ # not already exist. We could have potentially done a replace --force to # create and update in one step. - openshift_examples + - openshift_hosted_templates # Update the existing templates - role: openshift_examples registry_url: "{{ openshift.master.registry_url }}" openshift_examples_import_command: replace + - role: openshift_hosted_templates + registry_url: "{{ openshift.master.registry_url }}" + openshift_hosted_templates_import_command: replace pre_tasks: - name: Collect all routers command: > @@ -41,7 +45,7 @@ {{ oc_cmd }} patch dc/{{ item['labels']['deploymentconfig'] }} -n {{ item['namespace'] }} -p '{"spec":{"template":{"spec":{"containers":[{"name":"router","image":"{{ router_image }}","livenessProbe":{"tcpSocket":null,"httpGet":{"path": "/healthz", "port": 1936, "host": "localhost", "scheme": "HTTP"},"initialDelaySeconds":10,"timeoutSeconds":1}}]}}}}' --api-version=v1 - with_items: haproxy_routers + with_items: "{{ haproxy_routers }}" - name: Check for default registry command: > diff --git a/playbooks/common/openshift-cluster/upgrades/rpm_upgrade.yml b/playbooks/common/openshift-cluster/upgrades/rpm_upgrade.yml index af77f140f..cd1139b29 100644 --- a/playbooks/common/openshift-cluster/upgrades/rpm_upgrade.yml +++ b/playbooks/common/openshift-cluster/upgrades/rpm_upgrade.yml @@ -5,3 +5,7 @@ - name: Ensure python-yaml present for config upgrade action: "{{ ansible_pkg_mgr }} name=PyYAML state=present" when: not openshift.common.is_atomic | bool + +- name: Restart node service + service: name="{{ openshift.common.service_type }}-node" state=restarted + when: component == "node" diff --git a/playbooks/common/openshift-cluster/upgrades/upgrade_control_plane.yml b/playbooks/common/openshift-cluster/upgrades/upgrade_control_plane.yml index 2c641e21e..57c25aa41 100644 --- a/playbooks/common/openshift-cluster/upgrades/upgrade_control_plane.yml +++ b/playbooks/common/openshift-cluster/upgrades/upgrade_control_plane.yml @@ -11,82 +11,24 @@ add_host: name: "{{ item }}" groups: etcd_hosts_to_backup - with_items: groups.oo_etcd_to_config if groups.oo_etcd_to_config is defined and groups.oo_etcd_to_config | length > 0 else groups.oo_first_master + with_items: "{{ groups.oo_etcd_to_config if groups.oo_etcd_to_config is defined and groups.oo_etcd_to_config | length > 0 else groups.oo_first_master }}" -- name: Backup etcd - hosts: etcd_hosts_to_backup - vars: - embedded_etcd: "{{ hostvars[groups.oo_first_master.0].openshift.master.embedded_etcd }}" - timestamp: "{{ lookup('pipe', 'date +%Y%m%d%H%M%S') }}" +# If facts cache were for some reason deleted, this fact may not be set, and if not set +# it will always default to true. This causes problems for the etcd data dir fact detection +# so we must first make sure this is set correctly before attempting the backup. +- name: Set master embedded_etcd fact + hosts: oo_masters_to_config roles: - openshift_facts tasks: - # Ensure we persist the etcd role for this host in openshift_facts - openshift_facts: - role: etcd - local_facts: {} - when: "'etcd' not in openshift" - - - stat: path=/var/lib/openshift - register: var_lib_openshift - - - stat: path=/var/lib/origin - register: var_lib_origin - - - name: Create origin symlink if necessary - file: src=/var/lib/openshift/ dest=/var/lib/origin state=link - when: var_lib_openshift.stat.exists == True and var_lib_origin.stat.exists == False - - # TODO: replace shell module with command and update later checks - # We assume to be using the data dir for all backups. - - name: Check available disk space for etcd backup - shell: df --output=avail -k {{ openshift.common.data_dir }} | tail -n 1 - register: avail_disk - - # TODO: replace shell module with command and update later checks - - name: Check current embedded etcd disk usage - shell: du -k {{ openshift.etcd.etcd_data_dir }} | tail -n 1 | cut -f1 - register: etcd_disk_usage - when: embedded_etcd | bool - - - name: Abort if insufficient disk space for etcd backup - fail: - msg: > - {{ etcd_disk_usage.stdout }} Kb disk space required for etcd backup, - {{ avail_disk.stdout }} Kb available. - when: (embedded_etcd | bool) and (etcd_disk_usage.stdout|int > avail_disk.stdout|int) + role: master + local_facts: + embedded_etcd: "{{ groups.oo_etcd_to_config | default([]) | length == 0 }}" + debug_level: "{{ openshift_master_debug_level | default(openshift.common.debug_level | default(2)) }}" - - name: Install etcd (for etcdctl) - action: "{{ ansible_pkg_mgr }} name=etcd state=latest" - when: not openshift.common.is_atomic | bool - - - name: Generate etcd backup - command: > - etcdctl backup --data-dir={{ openshift.etcd.etcd_data_dir }} - --backup-dir={{ openshift.common.data_dir }}/etcd-backup-{{ timestamp }} - - - set_fact: - etcd_backup_complete: True - - - name: Display location of etcd backup - debug: - msg: "Etcd backup created in {{ openshift.common.data_dir }}/etcd-backup-{{ timestamp }}" - - -- name: Gate on etcd backup - hosts: localhost - connection: local - become: no - tasks: - - set_fact: - etcd_backup_completed: "{{ hostvars - | oo_select_keys(groups.etcd_hosts_to_backup) - | oo_collect('inventory_hostname', {'etcd_backup_complete': true}) }}" - - set_fact: - etcd_backup_failed: "{{ groups.etcd_hosts_to_backup | difference(etcd_backup_completed) }}" - - fail: - msg: "Upgrade cannot continue. The following hosts did not complete etcd backup: {{ etcd_backup_failed | join(',') }}" - when: etcd_backup_failed | length > 0 +- name: Backup etcd + include: ./etcd/backup.yml - name: Upgrade master packages hosts: oo_masters_to_config @@ -99,6 +41,8 @@ - include: rpm_upgrade.yml component=master when: not openshift.common.is_containerized | bool +# Create service signer cert when missing. Service signer certificate +# is added to master config in the master config hook for v3_3. - name: Determine if service signer cert must be created hosts: oo_first_master tasks: @@ -108,8 +52,6 @@ register: service_signer_cert_stat changed_when: false -# Create service signer cert when missing. Service signer certificate -# is added to master config in the master config hook for v3_3. - include: create_service_signer_cert.yml - name: Upgrade master config and systemd units @@ -128,13 +70,6 @@ - name: Update systemd units include: ../../../../roles/openshift_master/tasks/systemd_units.yml -# - name: Upgrade master configuration -# openshift_upgrade_config: -# from_version: '3.1' -# to_version: '3.2' -# role: master -# config_base: "{{ hostvars[inventory_hostname].openshift.common.config_base }}" - - name: Check for ca-bundle.crt stat: path: "{{ openshift.common.config_base }}/master/ca-bundle.crt" @@ -184,6 +119,10 @@ msg: "Upgrade cannot continue. The following masters did not finish updating: {{ master_update_failed | join(',') }}" when: master_update_failed | length > 0 +# We are now ready to restart master services (or entire system +# depending on openshift_rolling_restart_mode): +- include: ../../openshift-master/restart.yml + ############################################################################### # Reconcile Cluster Roles, Cluster Role Bindings and Security Context Constraints ############################################################################### @@ -200,19 +139,15 @@ # restart. skip_docker_role: True tasks: - - name: Verifying the correct commandline tools are available - shell: grep {{ verify_upgrade_version }} {{ openshift.common.admin_binary}} - when: openshift.common.is_containerized | bool and verify_upgrade_version is defined - - name: Reconcile Cluster Roles command: > - {{ openshift.common.admin_binary}} --config={{ openshift.common.config_base }}/master/admin.kubeconfig + {{ openshift.common.client_binary }} adm --config={{ openshift.common.config_base }}/master/admin.kubeconfig policy reconcile-cluster-roles --additive-only=true --confirm run_once: true - name: Reconcile Cluster Role Bindings command: > - {{ openshift.common.admin_binary}} --config={{ openshift.common.config_base }}/master/admin.kubeconfig + {{ openshift.common.client_binary }} adm --config={{ openshift.common.config_base }}/master/admin.kubeconfig policy reconcile-cluster-role-bindings --exclude-groups=system:authenticated --exclude-groups=system:authenticated:oauth @@ -222,9 +157,15 @@ when: origin_reconcile_bindings | bool or ent_reconcile_bindings | bool run_once: true + - name: Reconcile Jenkins Pipeline Role Bindings + command: > + {{ openshift.common.client_binary }} adm --config={{ openshift.common.config_base }}/master/admin.kubeconfig policy reconcile-cluster-role-bindings system:build-strategy-jenkinspipeline --confirm + run_once: true + when: openshift.common.version_gte_3_4_or_1_4 | bool + - name: Reconcile Security Context Constraints command: > - {{ openshift.common.admin_binary}} policy reconcile-sccs --confirm --additive-only=true + {{ openshift.common.client_binary }} adm policy reconcile-sccs --confirm --additive-only=true run_once: true - set_fact: diff --git a/playbooks/common/openshift-cluster/upgrades/upgrade_nodes.yml b/playbooks/common/openshift-cluster/upgrades/upgrade_nodes.yml index 9b572dcdf..1f314c854 100644 --- a/playbooks/common/openshift-cluster/upgrades/upgrade_nodes.yml +++ b/playbooks/common/openshift-cluster/upgrades/upgrade_nodes.yml @@ -29,7 +29,7 @@ - name: Mark unschedulable if host is a node command: > - {{ openshift.common.admin_binary }} manage-node {{ openshift.node.nodename | lower }} --schedulable=false + {{ openshift.common.client_binary }} adm manage-node {{ openshift.node.nodename | lower }} --schedulable=false delegate_to: "{{ groups.oo_first_master.0 }}" when: inventory_hostname in groups.oo_nodes_to_upgrade # NOTE: There is a transient "object has been modified" error here, allow a couple @@ -41,7 +41,7 @@ - name: Evacuate Node for Kubelet upgrade command: > - {{ openshift.common.admin_binary }} manage-node {{ openshift.node.nodename | lower }} --evacuate --force + {{ openshift.common.client_binary }} adm manage-node {{ openshift.node.nodename | lower }} --evacuate --force delegate_to: "{{ groups.oo_first_master.0 }}" when: inventory_hostname in groups.oo_nodes_to_upgrade tasks: @@ -64,7 +64,7 @@ - name: Set node schedulability command: > - {{ openshift.common.admin_binary }} manage-node {{ openshift.node.nodename | lower }} --schedulable=true + {{ openshift.common.client_binary }} adm manage-node {{ openshift.node.nodename | lower }} --schedulable=true delegate_to: "{{ groups.oo_first_master.0 }}" when: inventory_hostname in groups.oo_nodes_to_upgrade and was_schedulable | bool register: node_sched diff --git a/playbooks/common/openshift-etcd/service.yml b/playbooks/common/openshift-etcd/service.yml index fd2bc24ae..f460612ba 100644 --- a/playbooks/common/openshift-etcd/service.yml +++ b/playbooks/common/openshift-etcd/service.yml @@ -10,7 +10,7 @@ - name: Evaluate g_service_etcd add_host: name={{ item }} groups=g_service_etcd - with_items: oo_host_group_exp | default([]) + with_items: "{{ oo_host_group_exp | default([]) }}" - name: Change etcd state on etcd instance(s) hosts: g_service_etcd diff --git a/playbooks/common/openshift-loadbalancer/service.yml b/playbooks/common/openshift-loadbalancer/service.yml index e06a14c89..efc80edf9 100644 --- a/playbooks/common/openshift-loadbalancer/service.yml +++ b/playbooks/common/openshift-loadbalancer/service.yml @@ -10,7 +10,7 @@ - name: Evaluate g_service_lb add_host: name={{ item }} groups=g_service_lb - with_items: oo_host_group_exp | default([]) + with_items: "{{ oo_host_group_exp | default([]) }}" - name: Change state on lb instance(s) hosts: g_service_lb diff --git a/playbooks/common/openshift-master/config.yml b/playbooks/common/openshift-master/config.yml index a53c55c14..5fcb850a2 100644 --- a/playbooks/common/openshift-master/config.yml +++ b/playbooks/common/openshift-master/config.yml @@ -53,7 +53,7 @@ when: openshift_hosted_metrics_deployer_prefix is not defined - set_fact: openshift_hosted_metrics_deployer_version: "{{ lookup('oo_option', 'openshift_hosted_metrics_deployer_version') | default('latest') }}" - when: openshift_hosted_metrics_deployer_prefix is not defined + when: openshift_hosted_metrics_deployer_version is not defined roles: - openshift_facts post_tasks: diff --git a/playbooks/common/openshift-master/restart.yml b/playbooks/common/openshift-master/restart.yml index 57a63cfee..5769ef5cd 100644 --- a/playbooks/common/openshift-master/restart.yml +++ b/playbooks/common/openshift-master/restart.yml @@ -66,63 +66,8 @@ current_host: "{{ exists.stat.exists }}" when: openshift.common.rolling_restart_mode == 'system' -- name: Determine which masters are currently active - hosts: oo_masters_to_config - any_errors_fatal: true - tasks: - - name: Check master service status - command: > - systemctl is-active {{ openshift.common.service_type }}-master - register: active_check_output - when: openshift.master.cluster_method | default(None) == 'pacemaker' - failed_when: false - changed_when: false - - set_fact: - is_active: "{{ active_check_output.stdout == 'active' }}" - when: openshift.master.cluster_method | default(None) == 'pacemaker' - -- name: Evaluate master groups - hosts: localhost - become: no - tasks: - - fail: - msg: > - Did not receive active status from any masters. Please verify pacemaker cluster. - when: "{{ hostvars[groups.oo_first_master.0].openshift.master.cluster_method | default(None) == 'pacemaker' and 'True' not in (hostvars - | oo_select_keys(groups['oo_masters_to_config']) - | oo_collect('is_active') - | list) }}" - - name: Evaluate oo_active_masters - add_host: - name: "{{ item }}" - groups: oo_active_masters - ansible_ssh_user: "{{ g_ssh_user | default(omit) }}" - ansible_become: "{{ g_sudo | default(omit) }}" - with_items: "{{ groups.oo_masters_to_config | default([]) }}" - when: (hostvars[item]['is_active'] | default(false)) | bool - - name: Evaluate oo_current_masters - add_host: - name: "{{ item }}" - groups: oo_current_masters - ansible_ssh_user: "{{ g_ssh_user | default(omit) }}" - ansible_become: "{{ g_sudo | default(omit) }}" - with_items: "{{ groups.oo_masters_to_config | default([]) }}" - when: (hostvars[item]['current_host'] | default(false)) | bool - -- name: Validate pacemaker cluster - hosts: oo_active_masters - tasks: - - name: Retrieve pcs status - command: pcs status - register: pcs_status_output - changed_when: false - - fail: - msg: > - Pacemaker cluster validation failed. One or more nodes are not online. - when: not (pcs_status_output.stdout | validate_pcs_cluster(groups.oo_masters_to_config)) | bool - - name: Restart masters - hosts: oo_masters_to_config:!oo_active_masters:!oo_current_masters + hosts: oo_masters_to_config vars: openshift_master_ha: "{{ groups.oo_masters_to_config | length > 1 }}" serial: 1 @@ -132,20 +77,3 @@ - include: restart_services.yml when: openshift.common.rolling_restart_mode == 'services' -- name: Restart active masters - hosts: oo_active_masters - serial: 1 - tasks: - - include: restart_hosts_pacemaker.yml - when: openshift.common.rolling_restart_mode == 'system' - - include: restart_services_pacemaker.yml - when: openshift.common.rolling_restart_mode == 'services' - -- name: Restart current masters - hosts: oo_current_masters - serial: 1 - tasks: - - include: restart_hosts.yml - when: openshift.common.rolling_restart_mode == 'system' - - include: restart_services.yml - when: openshift.common.rolling_restart_mode == 'services' diff --git a/playbooks/common/openshift-master/restart_hosts.yml b/playbooks/common/openshift-master/restart_hosts.yml index ff206f5a2..b1c36718c 100644 --- a/playbooks/common/openshift-master/restart_hosts.yml +++ b/playbooks/common/openshift-master/restart_hosts.yml @@ -5,8 +5,8 @@ poll: 0 ignore_errors: true become: yes -# When cluster_method != pacemaker we can ensure the api_port is -# available. + +# Ensure the api_port is available. - name: Wait for master API to come back online become: no local_action: @@ -15,25 +15,3 @@ state=started delay=10 port="{{ openshift.master.api_port }}" - when: openshift.master.cluster_method != 'pacemaker' -- name: Wait for master to start - become: no - local_action: - module: wait_for - host="{{ inventory_hostname }}" - state=started - delay=10 - port=22 - when: openshift.master.cluster_method == 'pacemaker' -- name: Wait for master to become available - command: pcs status - register: pcs_status_output - until: pcs_status_output.stdout | validate_pcs_cluster([inventory_hostname]) | bool - retries: 15 - delay: 2 - changed_when: false - when: openshift.master.cluster_method == 'pacemaker' -- fail: - msg: > - Pacemaker cluster validation failed {{ inventory hostname }} is not online. - when: openshift.master.cluster_method == 'pacemaker' and not (pcs_status_output.stdout | validate_pcs_cluster([inventory_hostname])) | bool diff --git a/playbooks/common/openshift-master/restart_hosts_pacemaker.yml b/playbooks/common/openshift-master/restart_hosts_pacemaker.yml deleted file mode 100644 index c9219e8de..000000000 --- a/playbooks/common/openshift-master/restart_hosts_pacemaker.yml +++ /dev/null @@ -1,25 +0,0 @@ -- name: Fail over master resource - command: > - pcs resource move master {{ hostvars | oo_select_keys(groups['oo_masters_to_config']) | oo_collect('openshift.common.hostname', {'is_active': 'False'}) | list | first }} -- name: Wait for master API to come back online - become: no - local_action: - module: wait_for - host="{{ openshift.master.cluster_hostname }}" - state=started - delay=10 - port="{{ openshift.master.api_port }}" -- name: Restart master system - # https://github.com/ansible/ansible/issues/10616 - shell: sleep 2 && shutdown -r now "OpenShift Ansible master rolling restart" - async: 1 - poll: 0 - ignore_errors: true - become: yes -- name: Wait for master to start - become: no - local_action: - module: wait_for - host="{{ inventory_hostname }}" - state=started - delay=10 diff --git a/playbooks/common/openshift-master/restart_services_pacemaker.yml b/playbooks/common/openshift-master/restart_services_pacemaker.yml deleted file mode 100644 index e738f3fb6..000000000 --- a/playbooks/common/openshift-master/restart_services_pacemaker.yml +++ /dev/null @@ -1,10 +0,0 @@ -- name: Restart master services - command: pcs resource restart master -- name: Wait for master API to come back online - become: no - local_action: - module: wait_for - host="{{ openshift.master.cluster_hostname }}" - state=started - delay=10 - port="{{ openshift.master.api_port }}" diff --git a/playbooks/common/openshift-master/scaleup.yml b/playbooks/common/openshift-master/scaleup.yml index 56ed09e1b..18e5c665f 100644 --- a/playbooks/common/openshift-master/scaleup.yml +++ b/playbooks/common/openshift-master/scaleup.yml @@ -33,7 +33,7 @@ service: name={{ openshift.common.service_type }}-master-controllers state=restarted - name: verify api server command: > - curl --silent + curl --silent --tlsv1.2 {% if openshift.common.version_gte_3_2_or_1_2 | bool %} --cacert {{ openshift.common.config_base }}/master/ca-bundle.crt {% else %} diff --git a/playbooks/common/openshift-master/service.yml b/playbooks/common/openshift-master/service.yml index f60c5a2b5..5e5198335 100644 --- a/playbooks/common/openshift-master/service.yml +++ b/playbooks/common/openshift-master/service.yml @@ -10,7 +10,7 @@ - name: Evaluate g_service_masters add_host: name={{ item }} groups=g_service_masters - with_items: oo_host_group_exp | default([]) + with_items: "{{ oo_host_group_exp | default([]) }}" - name: Change state on master instance(s) hosts: g_service_masters diff --git a/playbooks/common/openshift-nfs/service.yml b/playbooks/common/openshift-nfs/service.yml index 20c8ca248..8468014da 100644 --- a/playbooks/common/openshift-nfs/service.yml +++ b/playbooks/common/openshift-nfs/service.yml @@ -8,7 +8,7 @@ - name: Evaluate g_service_nfs add_host: name={{ item }} groups=g_service_nfs - with_items: oo_host_group_exp | default([]) + with_items: "{{ oo_host_group_exp | default([]) }}" - name: Change state on nfs instance(s) hosts: g_service_nfs diff --git a/playbooks/common/openshift-node/config.yml b/playbooks/common/openshift-node/config.yml index 5191662f7..4824eeef3 100644 --- a/playbooks/common/openshift-node/config.yml +++ b/playbooks/common/openshift-node/config.yml @@ -165,7 +165,7 @@ # Using curl here since the uri module requires python-httplib2 and # wait_for port doesn't provide health information. command: > - curl --silent + curl --silent --tlsv1.2 {% if openshift.common.version_gte_3_2_or_1_2 | bool %} --cacert {{ openshift.common.config_base }}/master/ca-bundle.crt {% else %} diff --git a/playbooks/common/openshift-node/service.yml b/playbooks/common/openshift-node/service.yml index 0f07add2a..33095c9fb 100644 --- a/playbooks/common/openshift-node/service.yml +++ b/playbooks/common/openshift-node/service.yml @@ -10,7 +10,7 @@ - name: Evaluate g_service_nodes add_host: name={{ item }} groups=g_service_nodes - with_items: oo_host_group_exp | default([]) + with_items: "{{ oo_host_group_exp | default([]) }}" - name: Change state on node instance(s) hosts: g_service_nodes |