summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--playbooks/adhoc/docker_loopback_to_lvm/docker-storage-setup2
-rw-r--r--playbooks/adhoc/docker_loopback_to_lvm/docker_loopback_to_direct_lvm.yml151
-rw-r--r--playbooks/adhoc/docker_storage_cleanup/docker_storage_cleanup.yml69
-rw-r--r--roles/os_zabbix/tasks/main.yml9
-rw-r--r--roles/os_zabbix/vars/template_openshift_master.yml17
-rw-r--r--roles/os_zabbix/vars/template_openshift_node.yml20
6 files changed, 268 insertions, 0 deletions
diff --git a/playbooks/adhoc/docker_loopback_to_lvm/docker-storage-setup b/playbooks/adhoc/docker_loopback_to_lvm/docker-storage-setup
new file mode 100644
index 000000000..059058823
--- /dev/null
+++ b/playbooks/adhoc/docker_loopback_to_lvm/docker-storage-setup
@@ -0,0 +1,2 @@
+DEVS=/dev/xvdb
+VG=docker_vg
diff --git a/playbooks/adhoc/docker_loopback_to_lvm/docker_loopback_to_direct_lvm.yml b/playbooks/adhoc/docker_loopback_to_lvm/docker_loopback_to_direct_lvm.yml
new file mode 100644
index 000000000..c9ae923bb
--- /dev/null
+++ b/playbooks/adhoc/docker_loopback_to_lvm/docker_loopback_to_direct_lvm.yml
@@ -0,0 +1,151 @@
+---
+# This playbook coverts docker to go from loopback to direct-lvm (the Red Hat recommended way to run docker)
+# in AWS. This adds an additional EBS volume and creates the Volume Group on this EBS volume to use.
+#
+# To run:
+# 1. Source your AWS credentials (make sure it's the corresponding AWS account) into your environment
+# export AWS_ACCESS_KEY_ID='XXXXX'
+# export AWS_SECRET_ACCESS_KEY='XXXXXX'
+#
+# 2. run the playbook:
+# ansible-playbook -e 'cli_tag_name=<tag-name>' -e "cli_volume_size=30" docker_loopback_to_direct_lvm.yml
+#
+# Example:
+# ansible-playbook -e 'cli_tag_name=ops-master-12345' -e "cli_volume_size=30" docker_loopback_to_direct_lvm.yml
+#
+# Notes:
+# * By default this will do a 30GB volume.
+# * iops are calculated by Disk Size * 30. e.g ( 30GB * 30) = 900 iops
+# * This will remove /var/lib/docker!
+# * You may need to re-deploy docker images after this is run (like monitoring)
+#
+
+- name: Fix docker to have a provisioned iops drive
+ hosts: "tag_Name_{{ cli_tag_name }}"
+ user: root
+ connection: ssh
+ gather_facts: no
+
+ vars:
+ cli_volume_type: io1
+ cli_volume_size: 30
+ cli_volume_iops: "{{ 30 * cli_volume_size }}"
+
+ pre_tasks:
+ - fail:
+ msg: "This playbook requires {{item}} to be set."
+ when: "{{ item }} is not defined or {{ item }} == ''"
+ with_items:
+ - cli_tag_name
+ - cli_volume_size
+
+ - debug:
+ var: hosts
+
+ - name: start docker
+ service:
+ name: docker
+ state: started
+
+ - name: Determine if loopback
+ shell: docker info | grep 'Data file:.*loop'
+ register: loop_device_check
+ ignore_errors: yes
+
+ - debug:
+ var: loop_device_check
+
+ - name: fail if we don't detect loopback
+ fail:
+ msg: loopback not detected! Please investigate manually.
+ when: loop_device_check.rc == 1
+
+ - name: stop zagg client monitoring container
+ service:
+ name: oso-rhel7-zagg-client
+ state: stopped
+ ignore_errors: yes
+
+ - name: stop pcp client monitoring container
+ service:
+ name: oso-f22-host-monitoring
+ state: stopped
+ ignore_errors: yes
+
+ - name: stop docker
+ service:
+ name: docker
+ state: stopped
+
+ - name: delete /var/lib/docker
+ command: rm -rf /var/lib/docker
+
+ - name: remove /var/lib/docker
+ command: rm -rf /var/lib/docker
+
+ - name: check to see if /dev/xvdb exists
+ command: test -e /dev/xvdb
+ register: xvdb_check
+ ignore_errors: yes
+
+ - debug: var=xvdb_check
+
+ - name: fail if /dev/xvdb already exists
+ fail:
+ msg: /dev/xvdb already exists. Please investigate
+ when: xvdb_check.rc == 0
+
+ - name: Create a volume and attach it
+ delegate_to: localhost
+ ec2_vol:
+ state: present
+ instance: "{{ ec2_id }}"
+ region: "{{ ec2_region }}"
+ volume_size: "{{ cli_volume_size | default(30, True)}}"
+ volume_type: "{{ cli_volume_type }}"
+ device_name: /dev/xvdb
+ iops: "{{ 30 * cli_volume_size }}"
+ register: vol
+
+ - debug: var=vol
+
+ - name: tag the vol with a name
+ delegate_to: localhost
+ ec2_tag: region={{ ec2_region }} resource={{ vol.volume_id }}
+ args:
+ tags:
+ Name: "{{ ec2_tag_Name }}"
+ env: "{{ ec2_tag_environment }}"
+ register: voltags
+
+ - name: Wait for volume to attach
+ pause:
+ seconds: 30
+
+ - name: copy the docker-storage-setup config file
+ copy:
+ src: docker-storage-setup
+ dest: /etc/sysconfig/docker-storage-setup
+ owner: root
+ group: root
+ mode: 0664
+
+ - name: docker storage setup
+ command: docker-storage-setup
+ register: setup_output
+
+ - debug: var=setup_output
+
+ - name: start docker
+ command: systemctl start docker.service
+ register: dockerstart
+
+ - debug: var=dockerstart
+
+ - name: Wait for docker to stabilize
+ pause:
+ seconds: 30
+
+ # leaving off the '-t' for docker exec. With it, it doesn't work with ansible and tty support
+ - name: update zabbix docker items
+ command: docker exec -i oso-rhel7-zagg-client /usr/local/bin/cron-send-docker-metrics.py
diff --git a/playbooks/adhoc/docker_storage_cleanup/docker_storage_cleanup.yml b/playbooks/adhoc/docker_storage_cleanup/docker_storage_cleanup.yml
new file mode 100644
index 000000000..1946a5f4f
--- /dev/null
+++ b/playbooks/adhoc/docker_storage_cleanup/docker_storage_cleanup.yml
@@ -0,0 +1,69 @@
+---
+# This playbook attempts to cleanup unwanted docker files to help alleviate docker disk space issues.
+#
+# To run:
+#
+# 1. run the playbook:
+#
+# ansible-playbook -e 'cli_tag_name=<tag-name>' docker_storage_cleanup.yml
+#
+# Example:
+#
+# ansible-playbook -e 'cli_tag_name=ops-node-compute-12345' docker_storage_cleanup.yml
+#
+# Notes:
+# * This *should* not interfere with running docker images
+#
+
+- name: Clean up Docker Storage
+ gather_facts: no
+ hosts: "tag_Name_{{ cli_tag_name }}"
+ user: root
+ connection: ssh
+
+ pre_tasks:
+
+ - fail:
+ msg: "This playbook requires {{item}} to be set."
+ when: "{{ item }} is not defined or {{ item }} == ''"
+ with_items:
+ - cli_tag_name
+
+ - name: Ensure docker is running
+ service:
+ name: docker
+ state: started
+ enabled: yes
+
+ - name: Get docker info
+ command: docker info
+ register: docker_info
+
+ - name: Show docker info
+ debug:
+ var: docker_info.stdout_lines
+
+ - name: Remove exited and dead containers
+ shell: "docker ps -a | awk '/Exited|Dead/ {print $1}' | xargs --no-run-if-empty docker rm"
+ ignore_errors: yes
+
+ - name: Remove dangling docker images
+ shell: "docker images -q -f dangling=true | xargs --no-run-if-empty docker rmi"
+ ignore_errors: yes
+
+ - name: Remove non-running docker images
+ shell: "docker images -aq | xargs --no-run-if-empty docker rmi 2>/dev/null"
+ ignore_errors: yes
+
+ # leaving off the '-t' for docker exec. With it, it doesn't work with ansible and tty support
+ - name: update zabbix docker items
+ command: docker exec -i oso-rhel7-zagg-client /usr/local/bin/cron-send-docker-metrics.py
+
+ # Get and show docker info again.
+ - name: Get docker info
+ command: docker info
+ register: docker_info
+
+ - name: Show docker info
+ debug:
+ var: docker_info.stdout_lines
diff --git a/roles/os_zabbix/tasks/main.yml b/roles/os_zabbix/tasks/main.yml
index f9672b9c4..8347e9a61 100644
--- a/roles/os_zabbix/tasks/main.yml
+++ b/roles/os_zabbix/tasks/main.yml
@@ -11,6 +11,7 @@
- include_vars: template_os_linux.yml
- include_vars: template_docker.yml
- include_vars: template_openshift_master.yml
+- include_vars: template_openshift_node.yml
- name: Include Template Heartbeat
include: ../../lib_zabbix/tasks/create_template.yml
@@ -43,3 +44,11 @@
server: "{{ ozb_server }}"
user: "{{ ozb_user }}"
password: "{{ ozb_password }}"
+
+- name: Include Template Openshift Node
+ include: ../../lib_zabbix/tasks/create_template.yml
+ vars:
+ template: "{{ g_template_openshift_node }}"
+ server: "{{ ozb_server }}"
+ user: "{{ ozb_user }}"
+ password: "{{ ozb_password }}"
diff --git a/roles/os_zabbix/vars/template_openshift_master.yml b/roles/os_zabbix/vars/template_openshift_master.yml
index 728423ac1..c71e07910 100644
--- a/roles/os_zabbix/vars/template_openshift_master.yml
+++ b/roles/os_zabbix/vars/template_openshift_master.yml
@@ -6,8 +6,25 @@ g_template_openshift_master:
applications:
- Openshift Master
key: create_app
+
+ - key: openshift.master.process.count
+ description: Shows number of master processes running
+ type: int
+ applications:
+ - Openshift Master
+
ztriggers:
- name: 'Application creation has failed on {HOST.NAME}'
expression: '{Template Openshift Master:create_app.last(#1)}=1 and {Template Openshift Master:create_app.last(#2)}=1'
url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/check_create_app.asciidoc'
priority: avg
+
+ - name: 'Openshift Master process not running on {HOST.NAME}'
+ expression: '{Template Openshift Master:openshift.master.process.count.max(#3)}<1'
+ url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/openshift_master.asciidoc'
+ priority: high
+
+ - name: 'Too many Openshift Master processes running on {HOST.NAME}'
+ expression: '{Template Openshift Master:openshift.master.process.count.min(#3)}>1'
+ url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/openshift_master.asciidoc'
+ priority: high
diff --git a/roles/os_zabbix/vars/template_openshift_node.yml b/roles/os_zabbix/vars/template_openshift_node.yml
new file mode 100644
index 000000000..36f9cc4a3
--- /dev/null
+++ b/roles/os_zabbix/vars/template_openshift_node.yml
@@ -0,0 +1,20 @@
+---
+g_template_openshift_node:
+ name: Template Openshift Node
+ zitems:
+ - key: openshift.node.process.count
+ description: Shows number of OpenShift Node processes running
+ type: int
+ applications:
+ - Openshift Node
+
+ ztriggers:
+ - name: 'Openshift Node process not running on {HOST.NAME}'
+ expression: '{Template Openshift Node:openshift.node.process.count.max(#3)}<1'
+ url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/openshift_node.asciidoc'
+ priority: high
+
+ - name: 'Too many Openshift Node processes running on {HOST.NAME}'
+ expression: '{Template Openshift Node:openshift.node.process.count.min(#3)}>1'
+ url: 'https://github.com/openshift/ops-sop/blob/master/V3/Alerts/openshift_node.asciidoc'
+ priority: high