From 72c57c0d9c49ad87b2d0b4b0eb9bec3a4f8d1d0c Mon Sep 17 00:00:00 2001 From: Eric Wolinetz Date: Wed, 3 Jan 2018 16:07:41 -0600 Subject: Adding logic to do a full cluster restart if we are incrementing our major versions of ES --- .../tasks/determine_version.yaml | 2 ++ .../tasks/get_es_version.yml | 42 ++++++++++++++++++++++ .../tasks/main.yaml | 14 ++++++++ .../tasks/restart_cluster.yml | 23 +++++++++++- .../tasks/restart_es_node.yml | 2 ++ .../openshift_logging_elasticsearch/vars/main.yml | 2 ++ 6 files changed, 84 insertions(+), 1 deletion(-) create mode 100644 roles/openshift_logging_elasticsearch/tasks/get_es_version.yml diff --git a/roles/openshift_logging_elasticsearch/tasks/determine_version.yaml b/roles/openshift_logging_elasticsearch/tasks/determine_version.yaml index c53a06019..c55e7c5ea 100644 --- a/roles/openshift_logging_elasticsearch/tasks/determine_version.yaml +++ b/roles/openshift_logging_elasticsearch/tasks/determine_version.yaml @@ -15,3 +15,5 @@ - fail: msg: Invalid version specified for Elasticsearch when: es_version not in __allowed_es_versions + +- include_tasks: get_es_version.yml diff --git a/roles/openshift_logging_elasticsearch/tasks/get_es_version.yml b/roles/openshift_logging_elasticsearch/tasks/get_es_version.yml new file mode 100644 index 000000000..9182bddb2 --- /dev/null +++ b/roles/openshift_logging_elasticsearch/tasks/get_es_version.yml @@ -0,0 +1,42 @@ +--- +- command: > + oc get pod -l component=es,provider=openshift -n {{ openshift_logging_elasticsearch_namespace }} -o jsonpath={.items[*].metadata.name} + register: _cluster_pods + +- name: "Getting ES version for logging-es cluster" + command: > + oc exec {{ _cluster_pods.stdout.split(' ')[0] }} -c elasticsearch -n {{ openshift_logging_elasticsearch_namespace }} -- {{ __es_local_curl }} -XGET 'https://localhost:9200/' + register: _curl_output + when: _cluster_pods.stdout_lines | count > 0 + +- command: > + oc get pod -l component=es-ops,provider=openshift -n {{ openshift_logging_elasticsearch_namespace }} -o jsonpath={.items[*].metadata.name} + register: _ops_cluster_pods + +- name: "Getting ES version for logging-es-ops cluster" + command: > + oc exec {{ _ops_cluster_pods.stdout.split(' ')[0] }} -c elasticsearch -n {{ openshift_logging_elasticsearch_namespace }} -- {{ __es_local_curl }} -XGET 'https://localhost:9200/' + register: _ops_curl_output + when: _ops_cluster_pods.stdout_lines | count > 0 + +- set_fact: + _es_output: "{{ _curl_output.stdout | from_json }}" + when: _curl_output.stdout is defined + +- set_fact: + _es_ops_output: "{{ _ops_curl_output.stdout | from_json }}" + when: _ops_curl_output.stdout is defined + +- set_fact: + _es_installed_version: "{{ _es_output.version.number }}" + when: + - _es_output is defined + - _es_output.version is defined + - _es_output.version.number is defined + +- set_fact: + _es_ops_installed_version: "{{ _es_ops_output.version.number }}" + when: + - _es_ops_output is defined + - _es_ops_output.version is defined + - _es_ops_output.version.number is defined diff --git a/roles/openshift_logging_elasticsearch/tasks/main.yaml b/roles/openshift_logging_elasticsearch/tasks/main.yaml index 6ddeb122e..32e0c5471 100644 --- a/roles/openshift_logging_elasticsearch/tasks/main.yaml +++ b/roles/openshift_logging_elasticsearch/tasks/main.yaml @@ -32,6 +32,20 @@ - include_tasks: determine_version.yaml +- set_fact: + full_restart_cluster: True + when: + - _es_installed_version is defined + - __es_version.split('.')[0] | int >= 5 + - _es_installed_version.split('.')[0] | int < __es_version.split('.')[0] | int + +- set_fact: + full_restart_cluster: True + when: + - _es_ops_installed_version is defined + - __es_version.split('.')[0] | int >= 5 + - _es_ops_installed_version.split('.')[0] | int < __es_version.split('.')[0] | int + # allow passing in a tempdir - name: Create temp directory for doing work in command: mktemp -d /tmp/openshift-logging-ansible-XXXXXX diff --git a/roles/openshift_logging_elasticsearch/tasks/restart_cluster.yml b/roles/openshift_logging_elasticsearch/tasks/restart_cluster.yml index 4a32453e3..be43357b1 100644 --- a/roles/openshift_logging_elasticsearch/tasks/restart_cluster.yml +++ b/roles/openshift_logging_elasticsearch/tasks/restart_cluster.yml @@ -11,17 +11,38 @@ changed_when: "'\"acknowledged\":true' in _disable_output.stdout" when: _cluster_pods.stdout_lines | count > 0 +# Flush ES +- name: "Flushing for logging-{{ _cluster_component }} cluster" + command: > + oc exec {{ _cluster_pods.stdout.split(' ')[0] }} -c elasticsearch -n {{ openshift_logging_elasticsearch_namespace }} -- {{ __es_local_curl }} -XPUT 'https://localhost:9200/_flush/synced' + register: _flush_output + changed_when: "'\"acknowledged\":true' in _flush_output.stdout" + when: + - _cluster_pods.stdout_lines | count > 0 + - full_restart_cluster | bool + - command: > oc get dc -l component={{ _cluster_component }},provider=openshift -n {{ openshift_logging_elasticsearch_namespace }} -o jsonpath={.items[*].metadata.name} register: _cluster_dcs +## restart all dcs for full restart +- name: "Restart ES node {{ _es_node }}" + include_tasks: restart_es_node.yml + with_items: "{{ _cluster_dcs }}" + loop_control: + loop_var: _es_node + when: + - full_restart_cluster | bool + ## restart the node if it's dc is in the list of nodes to restart? - name: "Restart ES node {{ _es_node }}" include_tasks: restart_es_node.yml with_items: "{{ _restart_logging_nodes }}" loop_control: loop_var: _es_node - when: _es_node in _cluster_dcs.stdout + when: + - not full_restart_cluster | bool + - _es_node in _cluster_dcs.stdout ## we may need a new first pod to run against -- fetch them all again - command: > diff --git a/roles/openshift_logging_elasticsearch/tasks/restart_es_node.yml b/roles/openshift_logging_elasticsearch/tasks/restart_es_node.yml index b07b232ce..6d0df40c8 100644 --- a/roles/openshift_logging_elasticsearch/tasks/restart_es_node.yml +++ b/roles/openshift_logging_elasticsearch/tasks/restart_es_node.yml @@ -14,6 +14,8 @@ - _dc_output.results.results[0].status is defined - _dc_output.results.results[0].status.readyReplicas is defined - _dc_output.results.results[0].status.readyReplicas > 0 + - _dc_output.results.results[0].status.updatedReplicas is defined + - _dc_output.results.results[0].status.updatedReplicas > 0 retries: 60 delay: 30 diff --git a/roles/openshift_logging_elasticsearch/vars/main.yml b/roles/openshift_logging_elasticsearch/vars/main.yml index 0e56a6eac..ef259cd3a 100644 --- a/roles/openshift_logging_elasticsearch/vars/main.yml +++ b/roles/openshift_logging_elasticsearch/vars/main.yml @@ -4,6 +4,7 @@ __allowed_es_versions: ["3_5", "3_6", "3_7", "3_8"] __allowed_es_types: ["data-master", "data-client", "master", "client"] __es_log_appenders: ['file', 'console'] __kibana_index_modes: ["unique", "shared_ops"] +__es_version: "2.4.4" __es_local_curl: "curl -s --cacert /etc/elasticsearch/secret/admin-ca --cert /etc/elasticsearch/secret/admin-cert --key /etc/elasticsearch/secret/admin-key" @@ -14,3 +15,4 @@ es_min_masters_default: "{{ (openshift_logging_elasticsearch_replica_count | int es_min_masters: "{{ (openshift_logging_elasticsearch_replica_count == 1) | ternary(1, es_min_masters_default) }}" es_recover_after_nodes: "{{ openshift_logging_elasticsearch_replica_count | int }}" es_recover_expected_nodes: "{{ openshift_logging_elasticsearch_replica_count | int }}" +full_restart_cluster: False -- cgit v1.2.3 From ceca2f9a8cbc11095c2fac20fcc789a61c424098 Mon Sep 17 00:00:00 2001 From: Eric Wolinetz Date: Thu, 4 Jan 2018 11:53:46 -0600 Subject: Adding logic to disable and reenable external communication to ES during full restart --- .../tasks/main.yaml | 2 -- .../tasks/restart_cluster.yml | 35 ++++++++++++++++++++++ 2 files changed, 35 insertions(+), 2 deletions(-) diff --git a/roles/openshift_logging_elasticsearch/tasks/main.yaml b/roles/openshift_logging_elasticsearch/tasks/main.yaml index 32e0c5471..39c7dd66f 100644 --- a/roles/openshift_logging_elasticsearch/tasks/main.yaml +++ b/roles/openshift_logging_elasticsearch/tasks/main.yaml @@ -36,14 +36,12 @@ full_restart_cluster: True when: - _es_installed_version is defined - - __es_version.split('.')[0] | int >= 5 - _es_installed_version.split('.')[0] | int < __es_version.split('.')[0] | int - set_fact: full_restart_cluster: True when: - _es_ops_installed_version is defined - - __es_version.split('.')[0] | int >= 5 - _es_ops_installed_version.split('.')[0] | int < __es_version.split('.')[0] | int # allow passing in a tempdir diff --git a/roles/openshift_logging_elasticsearch/tasks/restart_cluster.yml b/roles/openshift_logging_elasticsearch/tasks/restart_cluster.yml index be43357b1..d55beec86 100644 --- a/roles/openshift_logging_elasticsearch/tasks/restart_cluster.yml +++ b/roles/openshift_logging_elasticsearch/tasks/restart_cluster.yml @@ -1,4 +1,22 @@ --- +# Disable external communication for {{ _cluster_component }} +- name: Disable external communication for logging-{{ _cluster_component }} + oc_service: + state: present + name: "logging-{{ _cluster_component }}" + namespace: "{{ openshift_logging_elasticsearch_namespace }}" + selector: + component: "{{ _cluster_component }}" + provider: openshift + connection: blocked + labels: + logging-infra: 'support' + ports: + - port: 9200 + targetPort: "restapi" + when: + - full_restart_cluster | bool + ## get all pods for the cluster - command: > oc get pod -l component={{ _cluster_component }},provider=openshift -n {{ openshift_logging_elasticsearch_namespace }} -o jsonpath={.items[*].metadata.name} @@ -54,3 +72,20 @@ oc exec {{ _cluster_pods.stdout.split(' ')[0] }} -c elasticsearch -n {{ openshift_logging_elasticsearch_namespace }} -- {{ __es_local_curl }} -XPUT 'https://localhost:9200/_cluster/settings' -d '{ "transient": { "cluster.routing.allocation.enable" : "all" } }' register: _enable_output changed_when: "'\"acknowledged\":true' in _enable_output.stdout" + +# Reenable external communication for {{ _cluster_component }} +- name: Reenable external communication for logging-{{ _cluster_component }} + oc_service: + state: present + name: "logging-{{ _cluster_component }}" + namespace: "{{ openshift_logging_elasticsearch_namespace }}" + selector: + component: "{{ _cluster_component }}" + provider: openshift + labels: + logging-infra: 'support' + ports: + - port: 9200 + targetPort: "restapi" + when: + - full_restart_cluster | bool -- cgit v1.2.3