From 55783753ae8f2d857a7225b7a93c1d47039e5a90 Mon Sep 17 00:00:00 2001 From: "Suren A. Chilingaryan" Date: Mon, 16 Apr 2018 10:30:15 +0200 Subject: OpenShift monitoring --- service/README | 11 +++++ service/check_adei.sh | 115 +++++++++++++++++++++++++++++++++++++++++++++++ service/check_gluster.sh | 83 ++++++++++++++++++++++++++++++++++ service/check_kaas.sh | 37 +++++++++++++++ service/opts.sh | 22 +++++++++ 5 files changed, 268 insertions(+) create mode 100644 service/README create mode 100755 service/check_adei.sh create mode 100755 service/check_gluster.sh create mode 100755 service/check_kaas.sh create mode 100644 service/opts.sh (limited to 'service') diff --git a/service/README b/service/README new file mode 100644 index 0000000..51d5122 --- /dev/null +++ b/service/README @@ -0,0 +1,11 @@ +* Message (important or local service messages) +Status messages to be printed bellow all services + +Online Healthy [Short Message] + + +Statuses +======== + 0 - Bad + 1 - Good + * - Intermediate diff --git a/service/check_adei.sh b/service/check_adei.sh new file mode 100755 index 0000000..f08ab5f --- /dev/null +++ b/service/check_adei.sh @@ -0,0 +1,115 @@ +#! /bin/bash + +MAX_SOURCE_OFFSET=3600 +MAX_MASTER_OFFSET=300 +MAX_SLAVE_OFFSET=300 + +function query { + if [ "$2" == "ecode" ]; then + url="$1&mysql=master" + resp=$(curl -f "$url" 2>&1 | grep "returned error") + else + if [ "$2" == "slave" ]; then + url="$1&mysql=slave" + else + url="$1&mysql=master" + fi + + resp=$(curl -sf "$url") + err=$? + [ $err -eq 0 ] || resp="" + fi + echo $resp +} + +function format_time { + offset=$1 + if [ $offset -ge 86400 ]; then + echo "$((offset / 86400))d" + elif [ $offset -ge 3600 ]; then + echo "$((offset / 3600))h" + elif [ $offset -ge 60 ]; then + echo "$((offset / 60))m" + else + echo "${offset}s" + fi +} + + +cd "$(dirname "$0")" +id=$1 + +auth=$(cat ../security/adei.txt | grep -P "^$id" | awk '{ print $2 }') +[ -n $auth ] && auth="$auth@" + +host=$(echo $2 | cut -d '/' -f 1) +url="http://$auth$2/services" + +src=$3 + +# Check if online +online=$(../scripts/ping.pl "$host") +healthy=$online + +# Check if healthy and find version +version=$(query "$url/info.php?target=version&encoding=text") +if [ -z "$version" ]; then + err=$(query "$url/info.php?target=version&encoding=text" "ecode") + echo $err + healthy=0 +else + if [[ "$version" =~ "Error:" ]]; then + echo $version + healthy=0 + version="" + else + version="ADEI $version" + fi +fi + +# Get current database size +size=$(query "$url/info.php?target=size&encoding=text") +[[ "$size" =~ "Error:" ]] && size="" +[ -n "$size" ] && msg="\${color gray}/ $((size / 1024 / 1024 / 1024)) GB" + + +# Verify offset (for selected database) +if [ $healthy -ne 0 -a -n "$src" ]; then + now=$(date +%s) + sdate=$(query "$url/getdata.php?$src&db_mask=0&format=csv&window=-1&rt=1&cache=1&time_format=U" slave | cut -d ',' -f 1 | cut -d '.' -f 1) + mdate=$(query "$url/getdata.php?$src&db_mask=0&format=csv&window=-1&rt=1&cache=1&time_format=U" | cut -d ',' -f 1 | cut -d '.' -f 1) + + if [ -z "$sdate" -o -z "$mdate" ]; then + echo "Error querying data from '$src'" + healthy=2 + else + master_offset=$(($now - $mdate)) + slave_offset=$(($mdate - $sdate)) + + if [ $master_offset -gt $MAX_SOURCE_OFFSET ]; then + rdate=$(query "$url/getdata.php?$src&db_mask=0&format=csv&window=-1&rt=1&time_format=U" | cut -d ',' -f 1 | cut -d '.' -f 1) + cache_offset=$(($rdate - $mdate)) + if [ $cache_offset -gt $MAX_MASTER_OFFSET ]; then + echo "ADEI cache is off by $(format_time $cache_offset) for '$src'" + healthy=2 + else + offset=$(($now - $rdate)) +# echo "Source '$src' is not updated for $(format_time $offset)" + fi + fi + + if [ $slave_offset -gt $MAX_SLAVE_OFFSET ]; then + echo "MySQL slave is off by $(format_time $slave_offset) for '$src'" + healthy=2 + fi + + [ -n "$msg" ] && msg="${msg}, " + [ -z "$msg" ] && msg="\${color gray}/ " + msg="${msg}cache $(format_time $master_offset)" + [ $slave_offset -gt 0 ] && msg="$msg & slave $(format_time $slave_offset)" + msg="$msg off" + fi + +fi + +echo "$online $healthy $version $msg" diff --git a/service/check_gluster.sh b/service/check_gluster.sh new file mode 100755 index 0000000..e591e7e --- /dev/null +++ b/service/check_gluster.sh @@ -0,0 +1,83 @@ +#! /bin/bash + +cd "$(dirname "$0")" +. opts.sh + +if [ -f "../security/$host.kubeconfig" ]; then + gpod=$(get_gluster_pod) + + function gluster { + oc -n glusterfs rsh po/$gpod gluster "$@" + } + +# check if gluster pods are running + if [ -n "$gpod" ]; then + online=1 + else + oc -n glusterfs get pods -l 'glusterfs=storage-pod' | sed 's/^/* /' + online=0 + fi +else + echo "0 0 Not supported" + exit +fi + +function check { + vol=$1 + vol_bricks=$(gluster volume info "$vol" | grep -P 'Number of Bricks' | awk '{ print $NF }' | tr -d '\r\n') + vol_online=$(gluster volume status "$vol" detail | grep Online | grep Y | wc -l) + + if [ -z "$vol_bricks" -o -z "$vol_online" -o "$vol_bricks" -ne "$vol_online" ]; then + vol_status=$(gluster volume info "$vol" | grep -P 'Status' | awk '{ print $2 }' | tr -d '\r\n') + vol_avail=$(gluster volume status "$vol" detail | grep Brick | wc -l) + echo "* Volume $vol: $vol_status (Bricks: $vol_bricks, Available: $vol_avail, Online: $vol_online)" + + if [ "$vol_status" == "Started" -a "$vol_online" -ge 0 ]; then + return 2 + else + return 0 + fi + else + return 1 + fi +} + +version=$(gluster --version | head -n 1 | awk '{ print $2 }' | tr -d '\r') +if [ -z "$version" ]; then + online=0 +else + version="GlusterFS $version" +fi + +volumes=0 +partial=0 +failed=0 +healthy=$online + +if [ $online -eq 1 ]; then + vols=$(gluster volume info | grep -P '^Volume Name' | awk '{ print $NF }' | tr '\r\n' ' ') + for vol in $vols; do + [[ "$vol" =~ [0-9] ]] && continue + [[ "$vol" =~ ^vol_ ]] && continue + [[ "$vol" =~ ^heketi ]] && continue + check ${vol} + ret=$? + + volumes=$((volumes + 1)) + if [ $ret -eq 0 ]; then + healthy=0 + failed=$((failed + 1)) + elif [ $ret -ne 1 ]; then + [ $healthy -gt 0 ] && healthy=$ret + partial=$((partial + 1)) + fi + done +fi + +if [ $healthy -eq 1 ]; then + msg="\${color gray}/ $volumes volumes" +else + msg="\${color gray}/ $volumes volumes, $failed failed, $partial bricks missing" +fi + +echo "$online $healthy $version $msg" diff --git a/service/check_kaas.sh b/service/check_kaas.sh new file mode 100755 index 0000000..d6e7300 --- /dev/null +++ b/service/check_kaas.sh @@ -0,0 +1,37 @@ +#! /bin/bash + +cd "$(dirname "$0")" +. opts.sh + +e_nodes=$2 + + +online=$(../scripts/ping.pl "$host") +healthy=$online + +version=$(oc version | head -n 1 | awk '{ print $2 }') +if [ -z "$version" ]; then + healthy=0 +else + version="OpenShift $version" +fi + +etcd=$(oc get cs | grep etcd | grep "Healthy" | wc -l) +if [ -z "$etcd" -o "$etcd" -lt 3 ]; then + healthy=2 + oc get cs | grep etcd | grep "Healthy" | sed -r -e 's/\s+/ /g' | awk '{ print $1, $2 }' | sed 's/^/* /' +fi + +if [ $healthy -ne 0 ]; then + nodes=$(oc get nodes | grep Ready | wc -l) + if [ $nodes -ge $e_nodes ]; then + nodes=" / \${color gray}$etcd etcd, $nodes nodes" + else + offline=$(oc get nodes | grep -v "STATUS" | grep -v "Ready" | wc -l) + nodes=" / \${color gray}$etcd etcd, $nodes ready, $offline offline" + healthy=2 + oc get nodes | grep -v "STATUS" | grep -v "Ready" | awk '{ print $1, $2 }' | sed 's/^/* /' + fi +fi + +echo "$online $healthy $version $nodes" diff --git a/service/opts.sh b/service/opts.sh new file mode 100644 index 0000000..3628708 --- /dev/null +++ b/service/opts.sh @@ -0,0 +1,22 @@ +host=$1 + +function oc { + cfg="../security/$host.kubeconfig" + /usr/bin/oc --config "$cfg" "$@" +} + +function node { + ip=$1 + shift + + ssh -xq root@192.168.26.$ip "$@" +} + +function get_gluster_pod { + oc -n glusterfs get pods -l 'glusterfs=storage-pod' | grep Running | awk '{ print $1 }' | head -n 1 +} + +function heketi { + node 1 heketi-cli -s http://heketi-storage.glusterfs.svc.cluster.local:8080 --user admin --secret "$(oc get secret heketi-storage-admin-secret -n glusterfs -o jsonpath='{.data.key}' | base64 -d)" "$@" +} + -- cgit v1.2.3