diff options
-rw-r--r-- | docs/consistency.txt | 1 | ||||
-rw-r--r-- | opts.sh | 2 | ||||
-rw-r--r-- | playbooks/ands-backup.yml | 9 | ||||
-rw-r--r-- | playbooks/ands-monitor.yml | 4 | ||||
-rw-r--r-- | roles/ands_monitor/defaults/main.yml | 1 | ||||
-rw-r--r-- | roles/ands_monitor/tasks/main.yml | 9 | ||||
-rwxr-xr-x | roles/ands_monitor/templates/scripts/check_server_status.sh.j2 | 34 | ||||
-rwxr-xr-x | roles/ands_monitor/templates/scripts/check_uptime_status.sh.j2 | 8 | ||||
-rwxr-xr-x | scripts/gluster.sh | 2 | ||||
-rwxr-xr-x | setup.sh | 6 | ||||
-rw-r--r-- | setup/projects/adei/vars/globals.yml | 4 |
11 files changed, 79 insertions, 1 deletions
diff --git a/docs/consistency.txt b/docs/consistency.txt index 1b7a600..caaaf36 100644 --- a/docs/consistency.txt +++ b/docs/consistency.txt @@ -43,6 +43,7 @@ Networking ADEI ==== + - MySQL replication is working - No caching pods are hung (for whatever reason)
\ No newline at end of file @@ -62,6 +62,8 @@ Actions: Host system managment software - Install additionaly configured software + monitor - Install monitoring scripts + backup - Install backup scripts current - Current managmenet playbook with various temorary actions Custom actions diff --git a/playbooks/ands-backup.yml b/playbooks/ands-backup.yml new file mode 100644 index 0000000..fbfec4c --- /dev/null +++ b/playbooks/ands-backup.yml @@ -0,0 +1,9 @@ +- name: Common setup procedures + hosts: ands + roles: + - role: ands_facts + +- name: Configure Backup + hosts: ands_servers + roles: + - role: ands_backup diff --git a/playbooks/ands-monitor.yml b/playbooks/ands-monitor.yml new file mode 100644 index 0000000..5e06604 --- /dev/null +++ b/playbooks/ands-monitor.yml @@ -0,0 +1,4 @@ +- name: Configure Monitoring + hosts: ands_servers + roles: + - role: ands_monitor diff --git a/roles/ands_monitor/defaults/main.yml b/roles/ands_monitor/defaults/main.yml new file mode 100644 index 0000000..ee3cb7b --- /dev/null +++ b/roles/ands_monitor/defaults/main.yml @@ -0,0 +1 @@ +ands_script_path: "/opt/scripts" diff --git a/roles/ands_monitor/tasks/main.yml b/roles/ands_monitor/tasks/main.yml new file mode 100644 index 0000000..ac70d28 --- /dev/null +++ b/roles/ands_monitor/tasks/main.yml @@ -0,0 +1,9 @@ +- name: Create scripts directory + file: path="{{ ands_script_path }}" state=directory + +- name: "Deploy scripts" + template: src="{{ item | quote }}" dest="{{ ands_script_path }}/{{ script_name }}" owner=root group=root mode=0755 + vars: + script_name: "{{ item | basename | regex_replace('\\.j2','') }}" + with_fileglob: + - "{{ role_path }}/templates/scripts/*.j2" diff --git a/roles/ands_monitor/templates/scripts/check_server_status.sh.j2 b/roles/ands_monitor/templates/scripts/check_server_status.sh.j2 new file mode 100755 index 0000000..caa63ce --- /dev/null +++ b/roles/ands_monitor/templates/scripts/check_server_status.sh.j2 @@ -0,0 +1,34 @@ +#!/bin/bash + +fs=`df -lm / | grep -vi Filesystem | sed -e 's/[[:space:]]\+/ /g' | cut -d ' ' -f 4` +datafs=`df -lm /mnt/ands | grep -vi Filesystem | sed -e 's/[[:space:]]\+/ /g' | cut -d ' ' -f 4` +mem=`free -g | grep "Mem" | sed -e 's/[[:space:]]\+/ /g' | cut -d ' ' -f 7` +cpu=`uptime | sed -e "s/[[:space:]]/\n/g" | tail -n 1` + +if [ $fs -le 8192 ]; then + echo "Only $(($fs / 1024)) GB left in the root file system" +fi + +if [ $datafs -le 1048576 ]; then + echo "Only $(($datafs / 1024)) GB left in the data file system" +fi + +if [ $mem -le 16 ]; then + echo "The system is starving on memory, $mem GB left free" +fi + +if [ `echo "$cpu < 20" | bc` -eq 0 ]; then + echo "The system is starving on cpu, $cpu is load average for the last 15 min" +fi + +vol=$(/opt/MegaRAID/storcli/storcli64 /c0/v0 show | grep -P "^0/0" | grep "Optl" | wc -l) +if [ -z "$vol" -o "$vol" -eq 0 ]; then + echo "Raid volume is not optimal:" + /opt/MegaRAID/storcli/storcli64 /c0/v0 show | grep -P "^0/0" +fi + +disks=$(/opt/MegaRAID/storcli/storcli64 /c0 show | grep -P "(HDD|SSD)" | grep -v "Onln" | wc -l) +if [ -z "$disks" -o "$disks" -ne 0 ]; then + echo "Not all disks are online:" + /opt/MegaRAID/storcli/storcli64 /c0 show | grep -P "(HDD|SSD)" | grep -v "Onln" +fi diff --git a/roles/ands_monitor/templates/scripts/check_uptime_status.sh.j2 b/roles/ands_monitor/templates/scripts/check_uptime_status.sh.j2 new file mode 100755 index 0000000..0602fcb --- /dev/null +++ b/roles/ands_monitor/templates/scripts/check_uptime_status.sh.j2 @@ -0,0 +1,8 @@ +#!/bin/bash + +up=$(uptime | cut -d ' ' -f 4- | cut -d ',' -f 1 | sed -re 's/^\s*//') +load=$(uptime | cut -d ' ' -f 4- | cut -d ',' -f 4- | cut -d ':' -f 2 | cut -d ',' -f 3 | sed -re 's/^\s*//') +disks=$(/opt/MegaRAID/storcli/storcli64 /c0 show | grep -P "(HDD|SSD)" | grep "Onln" | wc -l) +data=`df -lh /mnt/ands | grep -vi Filesystem | sed -e 's/[[:space:]]\+/ /g' | cut -d ' ' -f 4` + +echo -n "1 Up $up \${color gray}/ $disks disks, $data free, load: $load" diff --git a/scripts/gluster.sh b/scripts/gluster.sh index 07ca7f9..a3ff186 100755 --- a/scripts/gluster.sh +++ b/scripts/gluster.sh @@ -11,7 +11,7 @@ shift function info { vol=$1 - status=$(gluster volume info databases | grep -P 'Status' | awk '{ print $2 }' | tr -d '\r\n') + status=$(gluster volume info "$vol" | grep -P 'Status' | awk '{ print $2 }' | tr -d '\r\n') bricks=$(gluster volume info "$vol" | grep -P 'Number of Bricks' | awk '{ print $NF }' | tr -d '\r\n') avail=$(gluster volume status "$vol" detail | grep Brick | wc -l) online=$(gluster volume status "$vol" detail | grep Online | grep Y | wc -l) @@ -155,6 +155,12 @@ case "$action" in current) apply playbooks/current.yml "$@" || exit ;; + monitor) + apply playbooks/ands-monitor.yml "$@" || exit 1 + ;; + backup) + apply playbooks/ands-backup.yml "$@" || exit 1 + ;; setup) subrole=$2 shift diff --git a/setup/projects/adei/vars/globals.yml b/setup/projects/adei/vars/globals.yml index 3da00c6..8638e44 100644 --- a/setup/projects/adei/vars/globals.yml +++ b/setup/projects/adei/vars/globals.yml @@ -9,6 +9,10 @@ adei_pod_env: value: "/data/home" - name: "SHELL" value: "/bin/bash" + - name: "MYSQL_MASTER_SERVER" + value: "mysql-master.adei.svc.cluster.local" + - name: "MYSQL_SLAVE_SERVER" + value: "mysql-slave.adei.svc.cluster.local" - name: "MYSQL_PORT" value: "3306" - name: "MYSQL_USER" |