From 61df593d2047995f25327e54b32956944f413100 Mon Sep 17 00:00:00 2001 From: Eric Wolinetz Date: Fri, 9 Feb 2018 18:10:59 -0600 Subject: Whenever we create a new es node ignore health checks, changing prometheus pw gen for increased secret idempotency --- roles/openshift_logging/tasks/install_logging.yaml | 2 ++ .../openshift_logging_elasticsearch/tasks/main.yaml | 21 ++++++++++++++------- .../tasks/restart_cluster.yml | 7 +++++++ .../tasks/restart_es_node.yml | 9 ++++++--- 4 files changed, 29 insertions(+), 10 deletions(-) diff --git a/roles/openshift_logging/tasks/install_logging.yaml b/roles/openshift_logging/tasks/install_logging.yaml index 9fabc5826..66dd2f5a3 100644 --- a/roles/openshift_logging/tasks/install_logging.yaml +++ b/roles/openshift_logging/tasks/install_logging.yaml @@ -131,6 +131,7 @@ openshift_logging_elasticsearch_storage_type: "{{ elasticsearch_storage_type | default(default_elasticsearch_storage_type) }}" openshift_logging_elasticsearch_pvc_pv_selector: "{{ openshift_logging_es_pv_selector }}" openshift_logging_elasticsearch_pvc_storage_class_name: "{{ openshift_logging_es_pvc_storage_class_name | default() }}" + __logging_scale_up: True with_sequence: count={{ openshift_logging_es_cluster_size | int - openshift_logging_facts.elasticsearch.deploymentconfigs.keys() | count }} loop_control: @@ -221,6 +222,7 @@ openshift_logging_es_hostname: "{{ openshift_logging_es_ops_hostname }}" openshift_logging_es_edge_term_policy: "{{ openshift_logging_es_ops_edge_term_policy | default('') }}" openshift_logging_es_allow_external: "{{ openshift_logging_es_ops_allow_external }}" + __logging_ops_scale_up: True with_sequence: count={{ openshift_logging_es_ops_cluster_size | int - openshift_logging_facts.elasticsearch_ops.deploymentconfigs.keys() | count }} loop_control: diff --git a/roles/openshift_logging_elasticsearch/tasks/main.yaml b/roles/openshift_logging_elasticsearch/tasks/main.yaml index 64e5a3a1f..441460b2d 100644 --- a/roles/openshift_logging_elasticsearch/tasks/main.yaml +++ b/roles/openshift_logging_elasticsearch/tasks/main.yaml @@ -138,15 +138,22 @@ - "prometheus_out.stderr | length > 0" - "'already exists' not in prometheus_out.stderr" -- set_fact: - _logging_metrics_proxy_passwd: "{{ 16 | lib_utils_oo_random_word | b64encode }}" +- name: Checking for passwd.yml + stat: path="{{ generated_certs_dir }}/passwd.yml" + register: passwd_file + check_mode: no -- template: +- when: not passwd_file.stat.exists + template: src: passwd.j2 - dest: "{{mktemp.stdout}}/passwd.yml" + dest: "{{ generated_certs_dir }}/passwd.yml" vars: logging_user_name: "{{ openshift_logging_elasticsearch_prometheus_sa }}" - logging_user_passwd: "{{ _logging_metrics_proxy_passwd }}" + logging_user_passwd: "{{ 16 | lib_utils_oo_random_word | b64encode }}" + +- slurp: + src: "{{ generated_certs_dir }}/passwd.yml" + register: _logging_metrics_proxy_passwd # View role and binding - name: Generate logging-elasticsearch-view-role @@ -296,7 +303,7 @@ - name: admin.jks path: "{{ generated_certs_dir }}/system.admin.jks" - name: passwd.yml - path: "{{mktemp.stdout}}/passwd.yml" + path: "{{ generated_certs_dir }}/passwd.yml" # services - name: Set logging-{{ es_component }}-cluster service @@ -433,7 +440,7 @@ es_container_security_context: "{{ _es_containers.elasticsearch.securityContext if _es_containers is defined and 'elasticsearch' in _es_containers and 'securityContext' in _es_containers.elasticsearch else None }}" deploy_type: "{{ openshift_logging_elasticsearch_deployment_type }}" es_replicas: 1 - basic_auth_passwd: "{{ _logging_metrics_proxy_passwd | b64decode }}" + basic_auth_passwd: "{{ ( _logging_metrics_proxy_passwd['content'] | b64decode | from_yaml )[openshift_logging_elasticsearch_prometheus_sa]['passwd'] }}" es_number_of_shards: "{{ openshift_logging_es_number_of_shards | default(1) }}" es_number_of_replicas: "{{ openshift_logging_es_number_of_replicas| default(0) }}" diff --git a/roles/openshift_logging_elasticsearch/tasks/restart_cluster.yml b/roles/openshift_logging_elasticsearch/tasks/restart_cluster.yml index 14f2313e1..01247dd5d 100644 --- a/roles/openshift_logging_elasticsearch/tasks/restart_cluster.yml +++ b/roles/openshift_logging_elasticsearch/tasks/restart_cluster.yml @@ -65,6 +65,12 @@ {{ openshift_client_binary }} get dc -l component={{ _cluster_component }},provider=openshift -n {{ openshift_logging_elasticsearch_namespace }} -o jsonpath={.items[*].metadata.name} register: _cluster_dcs + # If we are currently restarting the "es" cluster we want to check if we are scaling up the number of es nodes + # If we are currently restarting the "es-ops" cluster we want to check if we are scaling up the number of ops nodes + # If we've created a new node for that cluster then the appropriate variable will be true, otherwise we default to false + - set_fact: + _skip_healthcheck: "{{ __logging_scale_up | default(false) if _cluster_component == 'es' else __logging_ops_scale_up | default(false) }}" + ## restart all dcs for full restart - name: "Restart ES node {{ _es_node }}" include_tasks: restart_es_node.yml @@ -94,6 +100,7 @@ {{ openshift_client_binary }} exec {{ _cluster_pods.stdout.split(' ')[0] }} -c elasticsearch -n {{ openshift_logging_elasticsearch_namespace }} -- {{ __es_local_curl }} -XPUT 'https://localhost:9200/_cluster/settings' -d '{ "transient": { "cluster.routing.allocation.enable" : "all" } }' register: _enable_output changed_when: "'\"acknowledged\":true' in _enable_output.stdout" + when: _cluster_pods.stdout != "" # Reenable external communication for {{ _cluster_component }} - name: Reenable external communication for logging-{{ _cluster_component }} diff --git a/roles/openshift_logging_elasticsearch/tasks/restart_es_node.yml b/roles/openshift_logging_elasticsearch/tasks/restart_es_node.yml index a1e172168..934ab886b 100644 --- a/roles/openshift_logging_elasticsearch/tasks/restart_es_node.yml +++ b/roles/openshift_logging_elasticsearch/tasks/restart_es_node.yml @@ -3,7 +3,8 @@ command: > {{ openshift_client_binary }} rollout latest {{ _es_node }} -n {{ openshift_logging_elasticsearch_namespace }} -- name: "Waiting for {{ _es_node }} to finish scaling up" +- when: not _skip_healthcheck | bool + name: "Waiting for {{ _es_node }} to finish scaling up" oc_obj: state: list name: "{{ _es_node }}" @@ -19,12 +20,14 @@ retries: 60 delay: 30 -- name: Gettings name(s) of replica pod(s) +- when: not _skip_healthcheck | bool + name: Gettings name(s) of replica pod(s) command: > {{ openshift_client_binary }} get pods -l deploymentconfig={{ _es_node }} -n {{ openshift_logging_elasticsearch_namespace }} -o jsonpath={.items[*].metadata.name} register: _pods -- name: "Waiting for ES to be ready for {{ _es_node }}" +- when: not _skip_healthcheck | bool + name: "Waiting for ES to be ready for {{ _es_node }}" shell: > {{ openshift_client_binary }} exec "{{ _pod }}" -c elasticsearch -n "{{ openshift_logging_elasticsearch_namespace }}" -- es_cluster_health with_items: "{{ _pods.stdout.split(' ') }}" -- cgit v1.2.3