diff options
author | OpenShift Merge Robot <openshift-merge-robot@users.noreply.github.com> | 2018-02-14 14:28:33 -0800 |
---|---|---|
committer | GitHub <noreply@github.com> | 2018-02-14 14:28:33 -0800 |
commit | b62c397f0625b9ff3654347a1777ed2277942712 (patch) | |
tree | 950a36359a9ac5e7d4a0b692ccdaf43e6f106463 | |
parent | deb9a793cbb169b964424720f9c3a6ce6b976b09 (diff) | |
parent | 61df593d2047995f25327e54b32956944f413100 (diff) | |
download | openshift-b62c397f0625b9ff3654347a1777ed2277942712.tar.gz openshift-b62c397f0625b9ff3654347a1777ed2277942712.tar.bz2 openshift-b62c397f0625b9ff3654347a1777ed2277942712.tar.xz openshift-b62c397f0625b9ff3654347a1777ed2277942712.zip |
Merge pull request #7097 from ewolinetz/logging_fresh_lg_cluster_fix
Automatic merge from submit-queue.
Whenever we create a new es node ignore health checks, changing prome…
…theus pw gen for increased secret idempotency
Addresses https://bugzilla.redhat.com/show_bug.cgi?id=1540099
Whenever we are in a cluster sized > 1 the nodes required for recovery > 1. So when we have a fresh install we will not see the cluster start up because the number of required nodes is not met.
Whenever we are creating a new node, we do not wait for the health check so that the logging playbook can complete and we can roll out all updated nodes.
Also addresses prometheus pw generation so that each rerun of the playbook doesn't change the secret which triggers a full rollout of the cluster (assumes that keys/certs have changed).
4 files changed, 29 insertions, 10 deletions
diff --git a/roles/openshift_logging/tasks/install_logging.yaml b/roles/openshift_logging/tasks/install_logging.yaml index 9fabc5826..66dd2f5a3 100644 --- a/roles/openshift_logging/tasks/install_logging.yaml +++ b/roles/openshift_logging/tasks/install_logging.yaml @@ -131,6 +131,7 @@ openshift_logging_elasticsearch_storage_type: "{{ elasticsearch_storage_type | default(default_elasticsearch_storage_type) }}" openshift_logging_elasticsearch_pvc_pv_selector: "{{ openshift_logging_es_pv_selector }}" openshift_logging_elasticsearch_pvc_storage_class_name: "{{ openshift_logging_es_pvc_storage_class_name | default() }}" + __logging_scale_up: True with_sequence: count={{ openshift_logging_es_cluster_size | int - openshift_logging_facts.elasticsearch.deploymentconfigs.keys() | count }} loop_control: @@ -221,6 +222,7 @@ openshift_logging_es_hostname: "{{ openshift_logging_es_ops_hostname }}" openshift_logging_es_edge_term_policy: "{{ openshift_logging_es_ops_edge_term_policy | default('') }}" openshift_logging_es_allow_external: "{{ openshift_logging_es_ops_allow_external }}" + __logging_ops_scale_up: True with_sequence: count={{ openshift_logging_es_ops_cluster_size | int - openshift_logging_facts.elasticsearch_ops.deploymentconfigs.keys() | count }} loop_control: diff --git a/roles/openshift_logging_elasticsearch/tasks/main.yaml b/roles/openshift_logging_elasticsearch/tasks/main.yaml index 64e5a3a1f..441460b2d 100644 --- a/roles/openshift_logging_elasticsearch/tasks/main.yaml +++ b/roles/openshift_logging_elasticsearch/tasks/main.yaml @@ -138,15 +138,22 @@ - "prometheus_out.stderr | length > 0" - "'already exists' not in prometheus_out.stderr" -- set_fact: - _logging_metrics_proxy_passwd: "{{ 16 | lib_utils_oo_random_word | b64encode }}" +- name: Checking for passwd.yml + stat: path="{{ generated_certs_dir }}/passwd.yml" + register: passwd_file + check_mode: no -- template: +- when: not passwd_file.stat.exists + template: src: passwd.j2 - dest: "{{mktemp.stdout}}/passwd.yml" + dest: "{{ generated_certs_dir }}/passwd.yml" vars: logging_user_name: "{{ openshift_logging_elasticsearch_prometheus_sa }}" - logging_user_passwd: "{{ _logging_metrics_proxy_passwd }}" + logging_user_passwd: "{{ 16 | lib_utils_oo_random_word | b64encode }}" + +- slurp: + src: "{{ generated_certs_dir }}/passwd.yml" + register: _logging_metrics_proxy_passwd # View role and binding - name: Generate logging-elasticsearch-view-role @@ -296,7 +303,7 @@ - name: admin.jks path: "{{ generated_certs_dir }}/system.admin.jks" - name: passwd.yml - path: "{{mktemp.stdout}}/passwd.yml" + path: "{{ generated_certs_dir }}/passwd.yml" # services - name: Set logging-{{ es_component }}-cluster service @@ -433,7 +440,7 @@ es_container_security_context: "{{ _es_containers.elasticsearch.securityContext if _es_containers is defined and 'elasticsearch' in _es_containers and 'securityContext' in _es_containers.elasticsearch else None }}" deploy_type: "{{ openshift_logging_elasticsearch_deployment_type }}" es_replicas: 1 - basic_auth_passwd: "{{ _logging_metrics_proxy_passwd | b64decode }}" + basic_auth_passwd: "{{ ( _logging_metrics_proxy_passwd['content'] | b64decode | from_yaml )[openshift_logging_elasticsearch_prometheus_sa]['passwd'] }}" es_number_of_shards: "{{ openshift_logging_es_number_of_shards | default(1) }}" es_number_of_replicas: "{{ openshift_logging_es_number_of_replicas| default(0) }}" diff --git a/roles/openshift_logging_elasticsearch/tasks/restart_cluster.yml b/roles/openshift_logging_elasticsearch/tasks/restart_cluster.yml index 14f2313e1..01247dd5d 100644 --- a/roles/openshift_logging_elasticsearch/tasks/restart_cluster.yml +++ b/roles/openshift_logging_elasticsearch/tasks/restart_cluster.yml @@ -65,6 +65,12 @@ {{ openshift_client_binary }} get dc -l component={{ _cluster_component }},provider=openshift -n {{ openshift_logging_elasticsearch_namespace }} -o jsonpath={.items[*].metadata.name} register: _cluster_dcs + # If we are currently restarting the "es" cluster we want to check if we are scaling up the number of es nodes + # If we are currently restarting the "es-ops" cluster we want to check if we are scaling up the number of ops nodes + # If we've created a new node for that cluster then the appropriate variable will be true, otherwise we default to false + - set_fact: + _skip_healthcheck: "{{ __logging_scale_up | default(false) if _cluster_component == 'es' else __logging_ops_scale_up | default(false) }}" + ## restart all dcs for full restart - name: "Restart ES node {{ _es_node }}" include_tasks: restart_es_node.yml @@ -94,6 +100,7 @@ {{ openshift_client_binary }} exec {{ _cluster_pods.stdout.split(' ')[0] }} -c elasticsearch -n {{ openshift_logging_elasticsearch_namespace }} -- {{ __es_local_curl }} -XPUT 'https://localhost:9200/_cluster/settings' -d '{ "transient": { "cluster.routing.allocation.enable" : "all" } }' register: _enable_output changed_when: "'\"acknowledged\":true' in _enable_output.stdout" + when: _cluster_pods.stdout != "" # Reenable external communication for {{ _cluster_component }} - name: Reenable external communication for logging-{{ _cluster_component }} diff --git a/roles/openshift_logging_elasticsearch/tasks/restart_es_node.yml b/roles/openshift_logging_elasticsearch/tasks/restart_es_node.yml index a1e172168..934ab886b 100644 --- a/roles/openshift_logging_elasticsearch/tasks/restart_es_node.yml +++ b/roles/openshift_logging_elasticsearch/tasks/restart_es_node.yml @@ -3,7 +3,8 @@ command: > {{ openshift_client_binary }} rollout latest {{ _es_node }} -n {{ openshift_logging_elasticsearch_namespace }} -- name: "Waiting for {{ _es_node }} to finish scaling up" +- when: not _skip_healthcheck | bool + name: "Waiting for {{ _es_node }} to finish scaling up" oc_obj: state: list name: "{{ _es_node }}" @@ -19,12 +20,14 @@ retries: 60 delay: 30 -- name: Gettings name(s) of replica pod(s) +- when: not _skip_healthcheck | bool + name: Gettings name(s) of replica pod(s) command: > {{ openshift_client_binary }} get pods -l deploymentconfig={{ _es_node }} -n {{ openshift_logging_elasticsearch_namespace }} -o jsonpath={.items[*].metadata.name} register: _pods -- name: "Waiting for ES to be ready for {{ _es_node }}" +- when: not _skip_healthcheck | bool + name: "Waiting for ES to be ready for {{ _es_node }}" shell: > {{ openshift_client_binary }} exec "{{ _pod }}" -c elasticsearch -n "{{ openshift_logging_elasticsearch_namespace }}" -- es_cluster_health with_items: "{{ _pods.stdout.split(' ') }}" |