diff options
Diffstat (limited to 'playbooks')
21 files changed, 1014 insertions, 345 deletions
diff --git a/playbooks/aws/README.md b/playbooks/aws/README.md index 99698b4d0..410d98a9c 100644 --- a/playbooks/aws/README.md +++ b/playbooks/aws/README.md @@ -1,4 +1,269 @@ # AWS playbooks -This playbook directory is meant to be driven by [`bin/cluster`](../../bin), -which is community supported and most use is considered deprecated. +Parts of this playbook directory are meant to be driven by [`bin/cluster`](../../bin), +which is community supported and use is considered **deprecated**. + + +## Provisioning + +With recent desire for provisioning from customers and developers alike, the AWS + playbook directory now supports a limited set of ansible playbooks to achieve a + complete cluster setup. These playbooks bring into alignment our desire to + deploy highly scalable Openshift clusters utilizing AWS auto scale groups and + custom AMIs. + +### Where do I start? + +Before any provisioning may occur, AWS account credentials must be present in the environment. This can be done in two ways: + +- Create the following file `~/.aws/credentials` with the contents (substitute your access key and secret key): + ``` + [myaccount] + aws_access_key_id = <Your access_key here> + aws_secret_access_key = <Your secret acces key here> + ``` + From the shell: + ``` + $ export AWS_PROFILE=myaccount + ``` + --- +- Alternatively to using a profile you can export your AWS credentials as environment variables. + ``` + $ export AWS_ACCESS_KEY_ID=AKIXXXXXX + $ export AWS_SECRET_ACCESS_KEY=XXXXXX + ``` + +### Let's Provision! + +The newly added playbooks are the following: +- build_ami.yml +- provision.yml + +The current expected work flow should be to provide the `vars.yml` file with the +desired settings for cluster instances. These settings are AWS specific and should +be tailored to the consumer's AWS custom account settings. + +```yaml +clusterid: mycluster +region: us-east-1 + +provision: + clusterid: "{{ clusterid }}" + region: "{{ region }}" + + build: + base_image: ami-bdd5d6ab # base image for AMI to build from + # when creating an encrypted AMI please specify use_encryption + use_encryption: False + + yum_repositories: # this is an example repository but it requires sslclient info. Use a valid yum repository for openshift rpms + - name: openshift-repo + file: openshift-repo + description: OpenShift Builds + baseurl: https://mirror.openshift.com/enterprise/online-int/latest/x86_64/os/ + enabled: yes + gpgcheck: no + sslverify: no + # client cert and key required for this repository + sslclientcert: "/var/lib/yum/client-cert.pem" + sslclientkey: "/var/lib/yum/client-key.pem" + gpgkey: "https://mirror.ops.rhcloud.com/libra/keys/RPM-GPG-KEY-redhat-release https://mirror.ops.rhcloud.com/libra/keys/RPM-GPG-KEY-redhat-beta https://mirror.ops.rhcloud.com/libra/keys/RPM-GPG-KEY-redhat-openshifthosted" + + # for s3 registry backend + openshift_registry_s3: True + + # if using custom certificates these are required for the ELB + iam_cert_ca: + name: test_openshift + cert_path: '/path/to/wildcard.<clusterid>.example.com.crt' + key_path: '/path/to/wildcard.<clusterid>.example.com.key' + chain_path: '/path/to/cert.ca.crt' + + instance_users: + - key_name: myuser_key + username: myuser + pub_key: | + ssh-rsa aaa<place public ssh key here>aaaaa user@<clusterid> + + node_group_config: + tags: + clusterid: "{{ clusterid }}" + environment: stg + ssh_key_name: myuser_key # name of the ssh key from above + + # configure master settings here + master: + instance_type: m4.xlarge + ami: ami-cdeec8b6 # if using an encrypted AMI this will be replaced + volumes: + - device_name: /dev/sdb + volume_size: 100 + device_type: gp2 + delete_on_termination: False + health_check: + period: 60 + type: EC2 + # Set the following number to be the same for masters. + min_size: 3 + max_size: 3 + desired_size: 3 + tags: + host-type: master + sub-host-type: default + wait_for_instances: True +... + vpc: + # name: mycluster # If missing; will default to clusterid + cidr: 172.31.0.0/16 + subnets: + us-east-1: # These are us-east-1 region defaults. Ensure this matches your region + - cidr: 172.31.48.0/20 + az: "us-east-1c" + - cidr: 172.31.32.0/20 + az: "us-east-1e" + - cidr: 172.31.16.0/20 + az: "us-east-1a" + +``` + +Repeat the following setup for the infra and compute node groups. This most likely + will not need editing but if further customization is required these parameters + can be updated. + +#### Step 1 + +Once the vars.yml file has been updated with the correct settings for the desired AWS account then we are ready to build an AMI. + +``` +$ ansible-playbook build_ami.yml +``` + +1. This script will build a VPC. Default name will be clusterid if not specified. +2. Create an ssh key required for the instance. +3. Create an instance. +4. Run some setup roles to ensure packages and services are correctly configured. +5. Create the AMI. +6. If encryption is desired + - A KMS key is created with the name of $clusterid + - An encrypted AMI will be produced with $clusterid KMS key +7. Terminate the instance used to configure the AMI. + +#### Step 2 + +Now that we have created an AMI for our Openshift installation, that AMI id needs to be placed in the `vars.yml` file. To do so update the following fields (The AMI can be captured from the output of the previous step or found in the ec2 console under AMIs): + +``` + # when creating an encrypted AMI please specify use_encryption + use_encryption: False # defaults to false +``` + +**Note**: If using encryption, specify with `use_encryption: True`. This will ensure to take the recently created AMI and encrypt it to be used later. If encryption is not desired then set the value to false. The AMI id will be fetched and used according to its most recent creation date. + +#### Step 3 + +Create an openshift-ansible inventory file to use for a byo installation. The exception here is that there will be no hosts specified by the inventory file. Here is an example: + +```ini +[OSEv3:children] +masters +nodes +etcd + +[OSEv3:children] +masters +nodes +etcd + +[OSEv3:vars] +# cluster specific settings maybe be placed here +openshift_hosted_router_wait=False +openshift_hosted_registry_wait=False + +[masters] + +[etcd] + +[nodes] +``` + +There are more examples of cluster inventory settings [`here`](../../inventory/byo/). + +#### Step 4 + +We are ready to create the master instances and install Openshift. + +``` +$ ansible-playbook -i <inventory from step 3> provision.yml +``` + +This playbook runs through the following steps: +1. Ensures a VPC is created +2. Ensures a SSH key exists +3. Creates an s3 bucket for the registry named $clusterid +4. Create master security groups +5. Create a master launch config +6. Create the master auto scaling groups +7. If certificates are desired for ELB, they will be uploaded +8. Create internal and external master ELBs +9. Add newly created masters to the correct groups +10. Set a couple of important facts for the masters +11. Run the [`byo`](../../common/openshift-cluster/config.yml) + +At this point we have created a successful cluster with only the master nodes. + + +#### Step 5 + +Now that we have a cluster deployed it might be more interesting to create some node types. This can be done easily with the following playbook: + +``` +$ ansible-playbook provision_nodes.yml +``` + +Once this playbook completes, it should create the compute and infra node scale groups. These nodes will attempt to register themselves to the cluster. These requests must be approved by an administrator. + +#### Step 6 + +The registration of our nodes can be automated by running the following script `accept.yml`. This script can handle the registration in a few different ways. +- approve_all - **Note**: this option is for development and test environments. Security is bypassed +- nodes - A list of node names that will be accepted into the cluster + +```yaml + oc_adm_csr: + #approve_all: True + nodes: < list of nodes here > + timeout: 0 +``` +Once the desired accept method is chosen, run the following playbook `accept.yml`: +1. Run the following playbook. +``` +$ ansible-playbook accept.yml +``` + +Login to a master and run the following command: +``` +ssh root@<master ip address> +$ oc --config=/etc/origin/master/admin.kubeconfig get csr +node-bootstrapper-client-ip-172-31-49-148-ec2-internal 1h system:serviceaccount:openshift-infra:node-bootstrapper Approved,Issued +node-bootstrapper-server-ip-172-31-49-148-ec2-internal 1h system:node:ip-172-31-49-148.ec2.internal Approved,Issued +``` + +Verify the `CONDITION` is `Approved,Issued` on the `csr` objects. There are two for each node required. +1. `node-bootstrapper-client` is a request to access the api/controllers. +2. `node-bootstrapper-server` is a request to join the cluster. + +Once this is complete, verify the nodes have joined the cluster and are `ready`. + +``` +$ oc --config=/etc/origin/master/admin.kubeconfig get nodes +NAME STATUS AGE VERSION +ip-172-31-49-148.ec2.internal Ready 1h v1.6.1+5115d708d7 +``` + +### Ready To Work! + +At this point your cluster should be ready for workloads. Proceed to deploy applications on your cluster. + +### Still to compute + +There are more enhancements that are arriving for provisioning. These will include more playbooks that enhance the provisioning capabilities. diff --git a/playbooks/aws/openshift-cluster/accept.yml b/playbooks/aws/openshift-cluster/accept.yml new file mode 100755 index 000000000..d43c84205 --- /dev/null +++ b/playbooks/aws/openshift-cluster/accept.yml @@ -0,0 +1,48 @@ +--- +- name: Setup the vpc and the master node group + #hosts: oo_first_master + hosts: localhost + remote_user: root + gather_facts: no + tasks: + - name: get provisioning vars + include_vars: vars.yml + + - name: bring lib_openshift into scope + include_role: + name: lib_openshift + + - name: fetch masters + ec2_remote_facts: + region: "{{ provision.region }}" + filters: + "tag:clusterid": "{{ provision.clusterid }}" + "tag:host-type": master + instance-state-name: running + register: mastersout + retries: 20 + delay: 3 + until: "'instances' in mastersout and mastersout.instances|length > 0" + + - name: fetch new node instances + ec2_remote_facts: + region: "{{ provision.region }}" + filters: + "tag:clusterid": "{{ provision.clusterid }}" + "tag:host-type": node + instance-state-name: running + register: instancesout + retries: 20 + delay: 3 + until: "'instances' in instancesout and instancesout.instances|length > 0" + + - debug: + msg: "{{ instancesout.instances|map(attribute='private_dns_name') | list | regex_replace('.ec2.internal') }}" + + - name: approve nodes + oc_adm_csr: + #approve_all: True + nodes: "{{ instancesout.instances|map(attribute='private_dns_name') | list | regex_replace('.ec2.internal') }}" + timeout: 0 + register: nodeout + delegate_to: "{{ mastersout.instances[0].public_ip_address }}" diff --git a/playbooks/aws/openshift-cluster/build_ami.yml b/playbooks/aws/openshift-cluster/build_ami.yml new file mode 100644 index 000000000..fa708ffa1 --- /dev/null +++ b/playbooks/aws/openshift-cluster/build_ami.yml @@ -0,0 +1,134 @@ +--- +- hosts: localhost + connection: local + gather_facts: no + tasks: + - name: get the necessary vars for ami building + include_vars: vars.yml + + - name: create a vpc with the name <clusterid> + include_role: + name: openshift_aws_vpc + vars: + r_openshift_aws_vpc_clusterid: "{{ provision.clusterid }}" + r_openshift_aws_vpc_cidr: "{{ provision.vpc.cidr }}" + r_openshift_aws_vpc_subnets: "{{ provision.vpc.subnets }}" + r_openshift_aws_vpc_region: "{{ provision.region }}" + r_openshift_aws_vpc_tags: "{{ provision.vpc.tags }}" + r_openshift_aws_vpc_name: "{{ provision.vpc.name | default(provision.clusterid) }}" + + - name: create aws ssh keypair + include_role: + name: openshift_aws_ssh_keys + vars: + r_openshift_aws_ssh_keys_users: "{{ provision.instance_users }}" + r_openshift_aws_ssh_keys_region: "{{ provision.region }}" + + - name: fetch the default subnet id + ec2_vpc_subnet_facts: + region: "{{ provision.region }}" + filters: + "tag:Name": "{{ provision.vpc.subnets[provision.region][0].az }}" + register: subnetout + + - name: create instance for ami creation + ec2: + assign_public_ip: yes + region: "{{ provision.region }}" + key_name: "{{ provision.node_group_config.ssh_key_name }}" + group: "{{ provision.clusterid }}" + instance_type: m4.xlarge + vpc_subnet_id: "{{ subnetout.subnets[0].id }}" + image: "{{ provision.build.base_image }}" + volumes: + - device_name: /dev/sdb + volume_type: gp2 + volume_size: 100 + delete_on_termination: true + wait: yes + exact_count: 1 + count_tag: + Name: ami_base + instance_tags: + Name: ami_base + register: amibase + + - name: wait for ssh to become available + wait_for: + port: 22 + host: "{{ amibase.tagged_instances.0.public_ip }}" + timeout: 300 + search_regex: OpenSSH + + - name: add host to group + add_host: + name: "{{ amibase.tagged_instances.0.public_dns_name }}" + groups: amibase + +- hosts: amibase + remote_user: root + tasks: + - name: included required variables + include_vars: vars.yml + + - name: run openshift image preparation + include_role: + name: openshift_ami_prep + vars: + r_openshift_ami_prep_yum_repositories: "{{ provision.build.yum_repositories }}" + r_openshift_ami_prep_node: atomic-openshift-node + r_openshift_ami_prep_master: atomic-openshift-master + +- hosts: localhost + connection: local + become: no + tasks: + - name: bundle ami + ec2_ami: + instance_id: "{{ amibase.tagged_instances.0.id }}" + region: "{{ provision.region }}" + state: present + description: "This was provisioned {{ ansible_date_time.iso8601 }}" + name: "{{ provision.build.ami_name }}{{ lookup('pipe', 'date +%Y%m%d%H%M')}}" + wait: yes + register: amioutput + + - debug: var=amioutput + + - when: provision.build.use_encryption | default(False) + block: + - name: setup kms key for encryption + include_role: + name: openshift_aws_iam_kms + vars: + r_openshift_aws_iam_kms_region: "{{ provision.region }}" + r_openshift_aws_iam_kms_alias: "alias/{{ provision.clusterid }}_kms" + + - name: augment the encrypted ami tags with source-ami + set_fact: + source_tag: + source-ami: "{{ amioutput.image_id }}" + + - name: copy the ami for encrypted disks + include_role: + name: openshift_aws_ami_copy + vars: + r_openshift_aws_ami_copy_region: "{{ provision.region }}" + r_openshift_aws_ami_copy_name: "{{ provision.build.ami_name }}{{ lookup('pipe', 'date +%Y%m%d%H%M')}}-encrypted" + r_openshift_aws_ami_copy_src_ami: "{{ amioutput.image_id }}" + r_openshift_aws_ami_copy_kms_alias: "alias/{{ provision.clusterid }}_kms" + r_openshift_aws_ami_copy_tags: "{{ source_tag | combine(provision.build.openshift_ami_tags) }}" + r_openshift_aws_ami_copy_encrypt: "{{ provision.build.use_encryption }}" + # this option currently fails due to boto waiters + # when supported this need to be reapplied + #r_openshift_aws_ami_copy_wait: True + + - name: Display newly created encrypted ami id + debug: + msg: "{{ r_openshift_aws_ami_copy_retval_custom_ami }}" + + - name: terminate temporary instance + ec2: + state: absent + region: "{{ provision.region }}" + instance_ids: "{{ amibase.tagged_instances.0.id }}" diff --git a/playbooks/aws/openshift-cluster/build_node_group.yml b/playbooks/aws/openshift-cluster/build_node_group.yml new file mode 100644 index 000000000..3ef492238 --- /dev/null +++ b/playbooks/aws/openshift-cluster/build_node_group.yml @@ -0,0 +1,47 @@ +--- +- name: fetch recently created AMI + ec2_ami_find: + region: "{{ provision.region }}" + sort: creationDate + sort_order: descending + name: "{{ provision.build.ami_name }}*" + ami_tags: "{{ provision.build.openshift_ami_tags }}" + #no_result_action: fail + register: amiout + +- block: + - name: "Create {{ openshift_build_node_type }} sgs" + include_role: + name: openshift_aws_sg + vars: + r_openshift_aws_sg_clusterid: "{{ provision.clusterid }}" + r_openshift_aws_sg_region: "{{ provision.region }}" + r_openshift_aws_sg_type: "{{ openshift_build_node_type }}" + + - name: "generate a launch config name for {{ openshift_build_node_type }}" + set_fact: + launch_config_name: "{{ provision.clusterid }}-{{ openshift_build_node_type }}-{{ ansible_date_time.epoch }}" + + - name: create "{{ openshift_build_node_type }} launch config" + include_role: + name: openshift_aws_launch_config + vars: + r_openshift_aws_launch_config_name: "{{ launch_config_name }}" + r_openshift_aws_launch_config_clusterid: "{{ provision.clusterid }}" + r_openshift_aws_launch_config_region: "{{ provision.region }}" + r_openshift_aws_launch_config: "{{ provision.node_group_config }}" + r_openshift_aws_launch_config_type: "{{ openshift_build_node_type }}" + r_openshift_aws_launch_config_custom_image: "{{ '' if 'results' not in amiout else amiout.results[0].ami_id }}" + r_openshift_aws_launch_config_bootstrap_token: "{{ (local_bootstrap['content'] |b64decode) if local_bootstrap is defined else '' }}" + + - name: "create {{ openshift_build_node_type }} node groups" + include_role: + name: openshift_aws_node_group + vars: + r_openshift_aws_node_group_name: "{{ provision.clusterid }} openshift {{ openshift_build_node_type }}" + r_openshift_aws_node_group_lc_name: "{{ launch_config_name }}" + r_openshift_aws_node_group_clusterid: "{{ provision.clusterid }}" + r_openshift_aws_node_group_region: "{{ provision.region }}" + r_openshift_aws_node_group_config: "{{ provision.node_group_config }}" + r_openshift_aws_node_group_type: "{{ openshift_build_node_type }}" + r_openshift_aws_node_group_subnet_name: "{{ provision.vpc.subnets[provision.region][0].az }}" diff --git a/playbooks/aws/openshift-cluster/library/ec2_ami_find.py b/playbooks/aws/openshift-cluster/library/ec2_ami_find.py deleted file mode 100644 index 99d0f44f0..000000000 --- a/playbooks/aws/openshift-cluster/library/ec2_ami_find.py +++ /dev/null @@ -1,303 +0,0 @@ -#!/usr/bin/python -#pylint: skip-file -# flake8: noqa -# -# This file is part of Ansible -# -# Ansible is free software: you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. -# -# Ansible is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with Ansible. If not, see <http://www.gnu.org/licenses/>. - -DOCUMENTATION = ''' ---- -module: ec2_ami_find -version_added: 2.0 -short_description: Searches for AMIs to obtain the AMI ID and other information -description: - - Returns list of matching AMIs with AMI ID, along with other useful information - - Can search AMIs with different owners - - Can search by matching tag(s), by AMI name and/or other criteria - - Results can be sorted and sliced -author: Tom Bamford -notes: - - This module is not backwards compatible with the previous version of the ec2_search_ami module which worked only for Ubuntu AMIs listed on cloud-images.ubuntu.com. - - See the example below for a suggestion of how to search by distro/release. -options: - region: - description: - - The AWS region to use. - required: true - aliases: [ 'aws_region', 'ec2_region' ] - owner: - description: - - Search AMIs owned by the specified owner - - Can specify an AWS account ID, or one of the special IDs 'self', 'amazon' or 'aws-marketplace' - - If not specified, all EC2 AMIs in the specified region will be searched. - - You can include wildcards in many of the search options. An asterisk (*) matches zero or more characters, and a question mark (?) matches exactly one character. You can escape special characters using a backslash (\) before the character. For example, a value of \*amazon\?\\ searches for the literal string *amazon?\. - required: false - default: null - ami_id: - description: - - An AMI ID to match. - default: null - required: false - ami_tags: - description: - - A hash/dictionary of tags to match for the AMI. - default: null - required: false - architecture: - description: - - An architecture type to match (e.g. x86_64). - default: null - required: false - hypervisor: - description: - - A hypervisor type type to match (e.g. xen). - default: null - required: false - is_public: - description: - - Whether or not the image(s) are public. - choices: ['yes', 'no'] - default: null - required: false - name: - description: - - An AMI name to match. - default: null - required: false - platform: - description: - - Platform type to match. - default: null - required: false - sort: - description: - - Optional attribute which with to sort the results. - - If specifying 'tag', the 'tag_name' parameter is required. - choices: ['name', 'description', 'tag'] - default: null - required: false - sort_tag: - description: - - Tag name with which to sort results. - - Required when specifying 'sort=tag'. - default: null - required: false - sort_order: - description: - - Order in which to sort results. - - Only used when the 'sort' parameter is specified. - choices: ['ascending', 'descending'] - default: 'ascending' - required: false - sort_start: - description: - - Which result to start with (when sorting). - - Corresponds to Python slice notation. - default: null - required: false - sort_end: - description: - - Which result to end with (when sorting). - - Corresponds to Python slice notation. - default: null - required: false - state: - description: - - AMI state to match. - default: 'available' - required: false - virtualization_type: - description: - - Virtualization type to match (e.g. hvm). - default: null - required: false - no_result_action: - description: - - What to do when no results are found. - - "'success' reports success and returns an empty array" - - "'fail' causes the module to report failure" - choices: ['success', 'fail'] - default: 'success' - required: false -requirements: - - boto - -''' - -EXAMPLES = ''' -# Note: These examples do not set authentication details, see the AWS Guide for details. - -# Search for the AMI tagged "project:website" -- ec2_ami_find: - owner: self - tags: - project: website - no_result_action: fail - register: ami_find - -# Search for the latest Ubuntu 14.04 AMI -- ec2_ami_find: - name: "ubuntu/images/ebs/ubuntu-trusty-14.04-amd64-server-*" - owner: 099720109477 - sort: name - sort_order: descending - sort_end: 1 - register: ami_find - -# Launch an EC2 instance -- ec2: - image: "{{ ami_search.results[0].ami_id }}" - instance_type: m4.medium - key_name: mykey - wait: yes -''' - -try: - import boto.ec2 - HAS_BOTO=True -except ImportError: - HAS_BOTO=False - -import json - -def main(): - argument_spec = ec2_argument_spec() - argument_spec.update(dict( - region = dict(required=True, - aliases = ['aws_region', 'ec2_region']), - owner = dict(required=False, default=None), - ami_id = dict(required=False), - ami_tags = dict(required=False, type='dict', - aliases = ['search_tags', 'image_tags']), - architecture = dict(required=False), - hypervisor = dict(required=False), - is_public = dict(required=False), - name = dict(required=False), - platform = dict(required=False), - sort = dict(required=False, default=None, - choices=['name', 'description', 'tag']), - sort_tag = dict(required=False), - sort_order = dict(required=False, default='ascending', - choices=['ascending', 'descending']), - sort_start = dict(required=False), - sort_end = dict(required=False), - state = dict(required=False, default='available'), - virtualization_type = dict(required=False), - no_result_action = dict(required=False, default='success', - choices = ['success', 'fail']), - ) - ) - - module = AnsibleModule( - argument_spec=argument_spec, - ) - - if not HAS_BOTO: - module.fail_json(msg='boto required for this module, install via pip or your package manager') - - ami_id = module.params.get('ami_id') - ami_tags = module.params.get('ami_tags') - architecture = module.params.get('architecture') - hypervisor = module.params.get('hypervisor') - is_public = module.params.get('is_public') - name = module.params.get('name') - owner = module.params.get('owner') - platform = module.params.get('platform') - sort = module.params.get('sort') - sort_tag = module.params.get('sort_tag') - sort_order = module.params.get('sort_order') - sort_start = module.params.get('sort_start') - sort_end = module.params.get('sort_end') - state = module.params.get('state') - virtualization_type = module.params.get('virtualization_type') - no_result_action = module.params.get('no_result_action') - - filter = {'state': state} - - if ami_id: - filter['image_id'] = ami_id - if ami_tags: - for tag in ami_tags: - filter['tag:'+tag] = ami_tags[tag] - if architecture: - filter['architecture'] = architecture - if hypervisor: - filter['hypervisor'] = hypervisor - if is_public: - filter['is_public'] = is_public - if name: - filter['name'] = name - if platform: - filter['platform'] = platform - if virtualization_type: - filter['virtualization_type'] = virtualization_type - - ec2 = ec2_connect(module) - - images_result = ec2.get_all_images(owners=owner, filters=filter) - - if no_result_action == 'fail' and len(images_result) == 0: - module.fail_json(msg="No AMIs matched the attributes: %s" % json.dumps(filter)) - - results = [] - for image in images_result: - data = { - 'ami_id': image.id, - 'architecture': image.architecture, - 'description': image.description, - 'is_public': image.is_public, - 'name': image.name, - 'owner_id': image.owner_id, - 'platform': image.platform, - 'root_device_name': image.root_device_name, - 'root_device_type': image.root_device_type, - 'state': image.state, - 'tags': image.tags, - 'virtualization_type': image.virtualization_type, - } - - if image.kernel_id: - data['kernel_id'] = image.kernel_id - if image.ramdisk_id: - data['ramdisk_id'] = image.ramdisk_id - - results.append(data) - - if sort == 'tag': - if not sort_tag: - module.fail_json(msg="'sort_tag' option must be given with 'sort=tag'") - results.sort(key=lambda e: e['tags'][sort_tag], reverse=(sort_order=='descending')) - elif sort: - results.sort(key=lambda e: e[sort], reverse=(sort_order=='descending')) - - try: - if sort and sort_start and sort_end: - results = results[int(sort_start):int(sort_end)] - elif sort and sort_start: - results = results[int(sort_start):] - elif sort and sort_end: - results = results[:int(sort_end)] - except TypeError: - module.fail_json(msg="Please supply numeric values for sort_start and/or sort_end") - - module.exit_json(results=results) - -# import module snippets -from ansible.module_utils.basic import * -from ansible.module_utils.ec2 import * - -if __name__ == '__main__': - main() - diff --git a/playbooks/aws/openshift-cluster/provision.yml b/playbooks/aws/openshift-cluster/provision.yml new file mode 100644 index 000000000..dfbf61cc7 --- /dev/null +++ b/playbooks/aws/openshift-cluster/provision.yml @@ -0,0 +1,157 @@ +--- +- name: Setup the vpc and the master node group + hosts: localhost + tasks: + - name: get provisioning vars + include_vars: vars.yml + + - name: create default vpc + include_role: + name: openshift_aws_vpc + vars: + r_openshift_aws_vpc_clusterid: "{{ provision.clusterid }}" + r_openshift_aws_vpc_cidr: "{{ provision.vpc.cidr }}" + r_openshift_aws_vpc_subnets: "{{ provision.vpc.subnets }}" + r_openshift_aws_vpc_region: "{{ provision.region }}" + r_openshift_aws_vpc_tags: "{{ provision.vpc.tags }}" + r_openshift_aws_vpc_name: "{{ provision.vpc.name | default(provision.clusterid) }}" + + - name: create aws ssh keypair + include_role: + name: openshift_aws_ssh_keys + vars: + r_openshift_aws_ssh_keys_users: "{{ provision.instance_users }}" + r_openshift_aws_ssh_keys_region: "{{ provision.region }}" + + - when: provision.openshift_registry_s3 | default(false) + name: create s3 bucket for registry + include_role: + name: openshift_aws_s3 + vars: + r_openshift_aws_s3_clusterid: "{{ provision.clusterid }}-docker-registry" + r_openshift_aws_s3_region: "{{ provision.region }}" + r_openshift_aws_s3_mode: create + + - name: include scale group creation for master + include: build_node_group.yml + vars: + openshift_build_node_type: master + + - name: fetch new master instances + ec2_remote_facts: + region: "{{ provision.region }}" + filters: + "tag:clusterid": "{{ provision.clusterid }}" + "tag:host-type": master + instance-state-name: running + register: instancesout + retries: 20 + delay: 3 + until: instancesout.instances|length > 0 + + - name: bring iam_cert23 into scope + include_role: + name: lib_utils + + - name: upload certificates to AWS IAM + iam_cert23: + state: present + name: "{{ provision.clusterid }}-master-external" + cert: "{{ provision.iam_cert_ca.cert_path }}" + key: "{{ provision.iam_cert_ca.key_path }}" + cert_chain: "{{ provision.iam_cert_ca.chain_path | default(omit) }}" + register: elb_cert_chain + failed_when: + - "'failed' in elb_cert_chain" + - elb_cert_chain.failed + - "'msg' in elb_cert_chain" + - "'already exists' not in elb_cert_chain.msg" + when: provision.iam_cert_ca is defined + + - debug: var=elb_cert_chain + + - name: create our master external and internal load balancers + include_role: + name: openshift_aws_elb + vars: + r_openshift_aws_elb_clusterid: "{{ provision.clusterid }}" + r_openshift_aws_elb_region: "{{ provision.region }}" + r_openshift_aws_elb_instance_filter: + "tag:clusterid": "{{ provision.clusterid }}" + "tag:host-type": master + instance-state-name: running + r_openshift_aws_elb_type: master + r_openshift_aws_elb_direction: "{{ elb_item }}" + r_openshift_aws_elb_idle_timout: 400 + r_openshift_aws_elb_scheme: internet-facing + r_openshift_aws_elb_security_groups: + - "{{ provision.clusterid }}" + - "{{ provision.clusterid }}_master" + r_openshift_aws_elb_subnet_name: "{{ provision.vpc.subnets[provision.region][0].az }}" + r_openshift_aws_elb_name: "{{ provision.clusterid }}-master-{{ elb_item }}" + r_openshift_aws_elb_cert_arn: "{{ elb_cert_chain.arn }}" + with_items: + - internal + - external + loop_control: + loop_var: elb_item + + - name: add new master to masters group + add_host: + groups: "masters,etcd,nodes" + name: "{{ item.public_ip_address }}" + hostname: "{{ provision.clusterid }}-master-{{ item.id[:-5] }}" + with_items: "{{ instancesout.instances }}" + + - name: set facts for group normalization + set_fact: + cluster_id: "{{ provision.clusterid }}" + cluster_env: "{{ provision.node_group_config.tags.environment | default('dev') }}" + + - name: wait for ssh to become available + wait_for: + port: 22 + host: "{{ item.public_ip_address }}" + timeout: 300 + search_regex: OpenSSH + with_items: "{{ instancesout.instances }}" + + +- name: set the master facts for hostname to elb + hosts: masters + gather_facts: no + remote_user: root + tasks: + - name: include vars + include_vars: vars.yml + + - name: fetch elbs + ec2_elb_facts: + region: "{{ provision.region }}" + names: + - "{{ item }}" + with_items: + - "{{ provision.clusterid }}-master-external" + - "{{ provision.clusterid }}-master-internal" + delegate_to: localhost + register: elbs + + - debug: var=elbs + + - name: set fact + set_fact: + openshift_master_cluster_hostname: "{{ elbs.results[1].elbs[0].dns_name }}" + osm_custom_cors_origins: + - "{{ elbs.results[1].elbs[0].dns_name }}" + - "console.{{ provision.clusterid }}.openshift.com" + - "api.{{ provision.clusterid }}.openshift.com" + with_items: "{{ groups['masters'] }}" + +- name: normalize groups + include: ../../byo/openshift-cluster/initialize_groups.yml + +- name: run the std_include + include: ../../common/openshift-cluster/std_include.yml + +- name: run the config + include: ../../common/openshift-cluster/config.yml diff --git a/playbooks/aws/openshift-cluster/provision_nodes.yml b/playbooks/aws/openshift-cluster/provision_nodes.yml new file mode 100644 index 000000000..5428fb307 --- /dev/null +++ b/playbooks/aws/openshift-cluster/provision_nodes.yml @@ -0,0 +1,47 @@ +--- +# Get bootstrap config token +# bootstrap should be created on first master +# need to fetch it and shove it into cloud data +- name: create the node scale groups + hosts: localhost + connection: local + gather_facts: yes + tasks: + - name: get provisioning vars + include_vars: vars.yml + + - name: fetch master instances + ec2_remote_facts: + region: "{{ provision.region }}" + filters: + "tag:clusterid": "{{ provision.clusterid }}" + "tag:host-type": master + instance-state-name: running + register: instancesout + retries: 20 + delay: 3 + until: instancesout.instances|length > 0 + + - name: slurp down the bootstrap.kubeconfig + slurp: + src: /etc/origin/master/bootstrap.kubeconfig + delegate_to: "{{ instancesout.instances[0].public_ip_address }}" + remote_user: root + register: bootstrap + + - name: set_fact on localhost for kubeconfig + set_fact: + local_bootstrap: "{{ bootstrap }}" + launch_config_name: + infra: "infra-{{ ansible_date_time.epoch }}" + compute: "compute-{{ ansible_date_time.epoch }}" + + - name: include build node group + include: build_node_group.yml + vars: + openshift_build_node_type: infra + + - name: include build node group + include: build_node_group.yml + vars: + openshift_build_node_type: compute diff --git a/playbooks/aws/openshift-cluster/vars.yml b/playbooks/aws/openshift-cluster/vars.yml index d774187f0..b2b0716be 100644 --- a/playbooks/aws/openshift-cluster/vars.yml +++ b/playbooks/aws/openshift-cluster/vars.yml @@ -31,3 +31,126 @@ deployment_vars: enterprise: "{{ deployment_rhel7_ent_base }}" openshift-enterprise: "{{ deployment_rhel7_ent_base }}" atomic-enterprise: "{{ deployment_rhel7_ent_base }}" + +clusterid: mycluster +region: us-east-1 + +provision: + clusterid: "{{ clusterid }}" + region: "{{ region }}" + + build: # build specific variables here + ami_name: "openshift-gi-" + base_image: ami-bdd5d6ab # base image for AMI to build from + yum_repositories: # this is an example repository but it requires sslclient info + - name: openshift-repo + file: openshift-repo + description: OpenShift Builds + baseurl: https://mirror.openshift.com/enterprise/online-int/latest/x86_64/os/ + enabled: yes + gpgcheck: no + sslverify: no + sslclientcert: "/var/lib/yum/client-cert.pem" + sslclientkey: "/var/lib/yum/client-key.pem" + gpgkey: "https://mirror.ops.rhcloud.com/libra/keys/RPM-GPG-KEY-redhat-release https://mirror.ops.rhcloud.com/libra/keys/RPM-GPG-KEY-redhat-beta https://mirror.ops.rhcloud.com/libra/keys/RPM-GPG-KEY-redhat-openshifthosted" + + # when creating an encrypted AMI please specify use_encryption + use_encryption: False + + openshift_ami_tags: + bootstrap: "true" + openshift-created: "true" + clusterid: "{{ clusterid }}" + + # Use s3 backed registry storage + openshift_registry_s3: True + + # if using custom certificates these are required for the ELB + iam_cert_ca: + name: "{{ clusterid }}_openshift" + cert_path: '/path/to/wildcard.<clusterid>.example.com.crt' + key_path: '/path/to/wildcard.<clusterid>.example.com.key' + chain_path: '/path/to/cert.ca.crt' + + instance_users: + - key_name: myuser_key + username: myuser + pub_key: | + ssh-rsa AAAA== myuser@system + + node_group_config: + tags: + clusterid: "{{ clusterid }}" + environment: stg + + ssh_key_name: myuser_key + + # master specific cluster node settings + master: + instance_type: m4.xlarge + ami: ami-cdeec8b6 # if using an encrypted AMI this will be replaced + volumes: + - device_name: /dev/sdb + volume_size: 100 + device_type: gp2 + delete_on_termination: False + health_check: + period: 60 + type: EC2 + min_size: 3 + max_size: 3 + desired_size: 3 + tags: + host-type: master + sub-host-type: default + wait_for_instances: True + + # compute specific cluster node settings + compute: + instance_type: m4.xlarge + ami: ami-cdeec8b6 + volumes: + - device_name: /dev/sdb + volume_size: 100 + device_type: gp2 + delete_on_termination: True + health_check: + period: 60 + type: EC2 + min_size: 3 + max_size: 100 + desired_size: 3 + tags: + host-type: node + sub-host-type: compute + + # infra specific cluster node settings + infra: + instance_type: m4.xlarge + ami: ami-cdeec8b6 + volumes: + - device_name: /dev/sdb + volume_size: 100 + device_type: gp2 + delete_on_termination: True + health_check: + period: 60 + type: EC2 + min_size: 2 + max_size: 20 + desired_size: 2 + tags: + host-type: node + sub-host-type: infra + + # vpc settings + vpc: + cidr: 172.31.0.0/16 + subnets: + us-east-1: # These are us-east-1 region defaults. Ensure this matches your region + - cidr: 172.31.48.0/20 + az: "us-east-1c" + - cidr: 172.31.32.0/20 + az: "us-east-1e" + - cidr: 172.31.16.0/20 + az: "us-east-1a" diff --git a/playbooks/byo/openshift-checks/README.md b/playbooks/byo/openshift-checks/README.md index f0f14b268..b26e7d7ed 100644 --- a/playbooks/byo/openshift-checks/README.md +++ b/playbooks/byo/openshift-checks/README.md @@ -7,15 +7,14 @@ Ansible's default operation mode is to fail fast, on the first error. However, when performing checks, it is useful to gather as much information about problems as possible in a single run. -Thus, the playbooks run a battery of checks against the inventory hosts and have -Ansible gather intermediate errors, giving a more complete diagnostic of the -state of each host. If any check failed, the playbook run will be marked as -failed. +Thus, the playbooks run a battery of checks against the inventory hosts and +gather intermediate errors, giving a more complete diagnostic of the state of +each host. If any check failed, the playbook run will be marked as failed. To facilitate understanding the problems that were encountered, a custom callback plugin summarizes execution errors at the end of a playbook run. -# Available playbooks +## Available playbooks 1. Pre-install playbook ([pre-install.yml](pre-install.yml)) - verifies system requirements and look for common problems that can prevent a successful @@ -27,6 +26,10 @@ callback plugin summarizes execution errors at the end of a playbook run. 3. Certificate expiry playbooks ([certificate_expiry](certificate_expiry)) - check that certificates in use are valid and not expiring soon. +4. Adhoc playbook ([adhoc.yml](adhoc.yml)) - use it to run adhoc checks or to + list existing checks. + See the [next section](#the-adhoc-playbook) for a usage example. + ## Running With a [recent installation of Ansible](../../../README.md#setup), run the playbook @@ -59,6 +62,41 @@ against your inventory file. Here is the step-by-step: $ ansible-playbook -i <inventory file> playbooks/byo/openshift-checks/certificate_expiry/default.yaml -v ``` +### The adhoc playbook + +The adhoc playbook gives flexibility to run any check or a custom group of +checks. What will be run is determined by the `openshift_checks` variable, +which, among other ways supported by Ansible, can be set on the command line +using the `-e` flag. + +For example, to run the `docker_storage` check: + +```console +$ ansible-playbook -i <inventory file> playbooks/byo/openshift-checks/adhoc.yml -e openshift_checks=docker_storage +``` + +To run more checks, use a comma-separated list of check names: + +```console +$ ansible-playbook -i <inventory file> playbooks/byo/openshift-checks/adhoc.yml -e openshift_checks=docker_storage,disk_availability +``` + +To run an entire class of checks, use the name of a check group tag, prefixed by `@`. This will run all checks tagged `preflight`: + +```console +$ ansible-playbook -i <inventory file> playbooks/byo/openshift-checks/adhoc.yml -e openshift_checks=@preflight +``` + +It is valid to specify multiple check tags and individual check names together +in a comma-separated list. + +To list all of the available checks and tags, run the adhoc playbook without +setting the `openshift_checks` variable: + +```console +$ ansible-playbook -i <inventory file> playbooks/byo/openshift-checks/adhoc.yml +``` + ## Running in a container This repository is built into a Docker image including Ansible so that it can diff --git a/playbooks/byo/openshift-checks/adhoc.yml b/playbooks/byo/openshift-checks/adhoc.yml new file mode 100644 index 000000000..226bed732 --- /dev/null +++ b/playbooks/byo/openshift-checks/adhoc.yml @@ -0,0 +1,27 @@ +--- +# NOTE: ideally this would be just part of a single play in +# common/openshift-checks/adhoc.yml that lists the existing checks when +# openshift_checks is not set or run the requested checks. However, to actually +# run the checks we need to have the included dependencies to run first and that +# takes time. To speed up listing checks, we use this separate play that runs +# before the include of dependencies to save time and improve the UX. +- name: OpenShift health checks + # NOTE: though the openshift_checks variable could be potentially defined on + # individual hosts while not defined for localhost, we do not support that + # usage. Running this play only in localhost speeds up execution. + hosts: localhost + connection: local + roles: + - openshift_health_checker + vars: + - r_openshift_health_checker_playbook_context: adhoc + pre_tasks: + - name: List known health checks + action: openshift_health_check + when: openshift_checks is undefined or not openshift_checks + +- include: ../openshift-cluster/initialize_groups.yml + +- include: ../../common/openshift-cluster/std_include.yml + +- include: ../../common/openshift-checks/adhoc.yml diff --git a/playbooks/byo/openshift-checks/health.yml b/playbooks/byo/openshift-checks/health.yml index dfc1a7db0..96a71e4dc 100644 --- a/playbooks/byo/openshift-checks/health.yml +++ b/playbooks/byo/openshift-checks/health.yml @@ -1,3 +1,6 @@ --- - include: ../openshift-cluster/initialize_groups.yml + +- include: ../../common/openshift-cluster/std_include.yml + - include: ../../common/openshift-checks/health.yml diff --git a/playbooks/byo/openshift-checks/pre-install.yml b/playbooks/byo/openshift-checks/pre-install.yml index 5e8c3ab9b..dd93df0bb 100644 --- a/playbooks/byo/openshift-checks/pre-install.yml +++ b/playbooks/byo/openshift-checks/pre-install.yml @@ -1,3 +1,6 @@ --- - include: ../openshift-cluster/initialize_groups.yml + +- include: ../../common/openshift-cluster/std_include.yml + - include: ../../common/openshift-checks/pre-install.yml diff --git a/playbooks/byo/openshift-cluster/openshift-provisioners.yml b/playbooks/byo/openshift-cluster/openshift-provisioners.yml new file mode 100644 index 000000000..8e80f158b --- /dev/null +++ b/playbooks/byo/openshift-cluster/openshift-provisioners.yml @@ -0,0 +1,6 @@ +--- +- include: initialize_groups.yml + +- include: ../../common/openshift-cluster/std_include.yml + +- include: ../../common/openshift-cluster/openshift_provisioners.yml diff --git a/playbooks/common/openshift-checks/adhoc.yml b/playbooks/common/openshift-checks/adhoc.yml new file mode 100644 index 000000000..dfcef8435 --- /dev/null +++ b/playbooks/common/openshift-checks/adhoc.yml @@ -0,0 +1,12 @@ +--- +- name: OpenShift health checks + hosts: oo_all_hosts + roles: + - openshift_health_checker + vars: + - r_openshift_health_checker_playbook_context: adhoc + post_tasks: + - name: Run health checks + action: openshift_health_check + args: + checks: '{{ openshift_checks | default([]) }}' diff --git a/playbooks/common/openshift-checks/health.yml b/playbooks/common/openshift-checks/health.yml index ff5b5af67..21ea785ef 100644 --- a/playbooks/common/openshift-checks/health.yml +++ b/playbooks/common/openshift-checks/health.yml @@ -1,10 +1,6 @@ --- -- include: ../openshift-cluster/std_include.yml - tags: - - always - - name: Run OpenShift health checks - hosts: OSEv3 + hosts: oo_all_hosts roles: - openshift_health_checker vars: diff --git a/playbooks/common/openshift-checks/pre-install.yml b/playbooks/common/openshift-checks/pre-install.yml index 861229f21..88e6f9120 100644 --- a/playbooks/common/openshift-checks/pre-install.yml +++ b/playbooks/common/openshift-checks/pre-install.yml @@ -1,10 +1,6 @@ --- -- include: ../openshift-cluster/std_include.yml - tags: - - always - -- hosts: OSEv3 - name: run OpenShift pre-install checks +- name: run OpenShift pre-install checks + hosts: oo_all_hosts roles: - openshift_health_checker vars: diff --git a/playbooks/common/openshift-cluster/initialize_facts.yml b/playbooks/common/openshift-cluster/initialize_facts.yml index 4bf5d33b1..65be436c6 100644 --- a/playbooks/common/openshift-cluster/initialize_facts.yml +++ b/playbooks/common/openshift-cluster/initialize_facts.yml @@ -108,6 +108,20 @@ when: - l_any_system_container | bool + - name: Default system_images_registry to a enterprise registry + set_fact: + system_images_registry: "registry.access.redhat.com" + when: + - system_images_registry is not defined + - openshift_deployment_type == "openshift-enterprise" + + - name: Default system_images_registry to community registry + set_fact: + system_images_registry: "docker.io" + when: + - system_images_registry is not defined + - openshift_deployment_type == "origin" + - name: Gather Cluster facts and set is_containerized if needed openshift_facts: role: common @@ -115,6 +129,7 @@ debug_level: "{{ openshift_debug_level | default(2) }}" deployment_type: "{{ openshift_deployment_type }}" deployment_subtype: "{{ openshift_deployment_subtype | default(None) }}" + cli_image: "{{ osm_image | default(None) }}" cluster_id: "{{ openshift_cluster_id | default('default') }}" hostname: "{{ openshift_hostname | default(None) }}" ip: "{{ openshift_ip | default(None) }}" @@ -124,7 +139,7 @@ is_master_system_container: "{{ l_is_master_system_container | default(false) }}" is_etcd_system_container: "{{ l_is_etcd_system_container | default(false) }}" etcd_runtime: "{{ l_etcd_runtime }}" - system_images_registry: "{{ system_images_registry | default('') }}" + system_images_registry: "{{ system_images_registry }}" public_hostname: "{{ openshift_public_hostname | default(None) }}" public_ip: "{{ openshift_public_ip | default(None) }}" portal_net: "{{ openshift_portal_net | default(openshift_master_portal_net) | default(None) }}" diff --git a/playbooks/common/openshift-cluster/upgrades/pre/verify_nodes_running.yml b/playbooks/common/openshift-cluster/upgrades/pre/verify_nodes_running.yml deleted file mode 100644 index 354af3cde..000000000 --- a/playbooks/common/openshift-cluster/upgrades/pre/verify_nodes_running.yml +++ /dev/null @@ -1,13 +0,0 @@ ---- -- name: Verify node processes - hosts: oo_nodes_to_config - roles: - - openshift_facts - - openshift_docker_facts - tasks: - - name: Ensure Node is running - service: - name: "{{ openshift.common.service_type }}-node" - state: started - enabled: yes - when: openshift.common.is_containerized | bool diff --git a/playbooks/common/openshift-cluster/upgrades/v3_7/validator.yml b/playbooks/common/openshift-cluster/upgrades/v3_7/validator.yml index 90e95422b..136ad5362 100644 --- a/playbooks/common/openshift-cluster/upgrades/v3_7/validator.yml +++ b/playbooks/common/openshift-cluster/upgrades/v3_7/validator.yml @@ -7,6 +7,16 @@ hosts: oo_first_master roles: - { role: lib_openshift } + tasks: - name: Check for invalid namespaces and SDN errors oc_objectvalidator: + + - name: Confirm OpenShift authorization objects are in sync + command: > + {{ openshift.common.client_binary }} adm migrate authorization + changed_when: false + register: l_oc_result + until: l_oc_result.rc == 0 + retries: 4 + delay: 15 diff --git a/playbooks/common/openshift-etcd/migrate.yml b/playbooks/common/openshift-etcd/migrate.yml index 3e7a48669..311ff84b6 100644 --- a/playbooks/common/openshift-etcd/migrate.yml +++ b/playbooks/common/openshift-etcd/migrate.yml @@ -17,18 +17,14 @@ tags: - always +# TODO: This will be different for release-3.6 branch - name: Prepare masters for etcd data migration hosts: oo_masters_to_config tasks: - set_fact: master_services: - - "{{ openshift.common.service_type + '-master' }}" - - set_fact: - master_services: - "{{ openshift.common.service_type + '-master-controllers' }}" - "{{ openshift.common.service_type + '-master-api' }}" - when: - - (openshift_master_cluster_method is defined and openshift_master_cluster_method == "native") or openshift.common.is_master_system_container | bool - debug: msg: "master service name: {{ master_services }}" - name: Stop masters @@ -67,16 +63,59 @@ when: - etcd_backup_failed | length > 0 -- name: Migrate etcd data from v2 to v3 +- name: Stop etcd hosts: oo_etcd_to_migrate gather_facts: no tags: - always + pre_tasks: + - set_fact: + l_etcd_service: "{{ 'etcd_container' if openshift.common.is_containerized else 'etcd' }}" + - name: Disable etcd members + service: + name: "{{ l_etcd_service }}" + state: stopped + +- name: Migrate data on first etcd + hosts: oo_etcd_to_migrate[0] + gather_facts: no + tags: + - always roles: - role: etcd_migrate r_etcd_migrate_action: migrate r_etcd_common_embedded_etcd: "{{ groups.oo_etcd_to_config | default([]) | length == 0 }}" etcd_peer: "{{ ansible_default_ipv4.address }}" + etcd_url_scheme: "https" + etcd_peer_url_scheme: "https" + +- name: Clean data stores on remaining etcd hosts + hosts: oo_etcd_to_migrate[1:] + gather_facts: no + tags: + - always + roles: + - role: etcd_migrate + r_etcd_migrate_action: clean_data + r_etcd_common_embedded_etcd: "{{ groups.oo_etcd_to_config | default([]) | length == 0 }}" + etcd_peer: "{{ ansible_default_ipv4.address }}" + etcd_url_scheme: "https" + etcd_peer_url_scheme: "https" + post_tasks: + - name: Add etcd hosts + delegate_to: localhost + add_host: + name: "{{ item }}" + groups: oo_new_etcd_to_config + ansible_ssh_user: "{{ g_ssh_user | default(omit) }}" + ansible_become: "{{ g_sudo | default(omit) }}" + with_items: "{{ groups.oo_etcd_to_migrate[1:] | default([]) }}" + changed_when: no + - name: Set success + set_fact: + r_etcd_migrate_success: true + +- include: ./scaleup.yml - name: Gate on etcd migration hosts: oo_masters_to_config @@ -89,6 +128,16 @@ - set_fact: etcd_migration_failed: "{{ groups.oo_etcd_to_migrate | difference(etcd_migration_completed) }}" +- name: Add TTLs on the first master + hosts: oo_first_master[0] + roles: + - role: etcd_migrate + r_etcd_migrate_action: add_ttls + etcd_peer: "{{ hostvars[groups.oo_etcd_to_migrate.0].ansible_default_ipv4.address }}" + etcd_url_scheme: "https" + etcd_peer_url_scheme: "https" + when: etcd_migration_failed | length == 0 + - name: Configure masters if etcd data migration is succesfull hosts: oo_masters_to_config roles: @@ -100,10 +149,6 @@ msg: "Skipping master re-configuration since migration failed." when: - etcd_migration_failed | length > 0 - -- name: Start masters after etcd data migration - hosts: oo_masters_to_config - tasks: - name: Start master services service: name: "{{ item }}" diff --git a/playbooks/common/openshift-etcd/scaleup.yml b/playbooks/common/openshift-etcd/scaleup.yml index 192305bc8..52b90daca 100644 --- a/playbooks/common/openshift-etcd/scaleup.yml +++ b/playbooks/common/openshift-etcd/scaleup.yml @@ -24,6 +24,9 @@ member add {{ etcd_hostname }} {{ etcd_peer_url_scheme }}://{{ etcd_ip }}:{{ etcd_peer_port }} delegate_to: "{{ etcd_ca_host }}" register: etcd_add_check + retries: 3 + delay: 10 + until: etcd_add_check.rc == 0 roles: - role: openshift_etcd when: etcd_add_check.rc == 0 @@ -36,3 +39,13 @@ r_etcd_common_etcd_runtime: "{{ openshift.common.etcd_runtime }}" - role: nickhammond.logrotate when: etcd_add_check.rc == 0 + post_tasks: + - name: Verify cluster is stable + command: > + /usr/bin/etcdctl --cert-file {{ etcd_peer_cert_file }} + --key-file {{ etcd_peer_key_file }} + --ca-file {{ etcd_peer_ca_file }} + -C {{ etcd_peer_url_scheme }}://{{ hostvars[etcd_ca_host].etcd_hostname }}:{{ etcd_client_port }} + cluster-health + retries: 1 + delay: 30 |