diff --git a/checkmk/custom-files/local/gluster b/checkmk/custom-files/local/gluster index a70ffc65834b177adbdae779ce51b3aac6773a16..0c5e97860168af6b3857a0481479ad8e572b3e70 100644 --- a/checkmk/custom-files/local/gluster +++ b/checkmk/custom-files/local/gluster @@ -1,49 +1,51 @@ #!/bin/bash -# gluster agent plugin +# we parse the output of gluster volume heal [vol] info split-brain +# which looks like that: # -# * determine all volumes -# * determie all bricks for this volume -# * discover brick information - yields output like that: +# Brick t3web-00.it.hs-hannover.de:/srv/gluster/fileadmin +# Status: Connected +# Number of entries in split-brain: 0 # -# Status of volume: fileadmin -# ------------------------------------------------------------------------------ -# Brick : Brick t3web-00.it.hs-hannover.de:/srv/gluster/fileadmin -# TCP Port : 49152 -# RDMA Port : 0 -# Online : Y -# Pid : 609 -# File System : xfs -# Device : /dev/sdd1 -# Mount Options : rw,relatime,attr2,inode64,noquota -# Inode Size : N/A -# Disk Space Free : 108.5GB -# Total Disk Space : 120.0GB -# Inode Count : 62914048 -# Free Inodes : 62693863 -# -# We currently only utilize the Online: Y information. +# Brick t3web-01.it.hs-hannover.de:/srv/gluster/fileadmin +# Status: Connected +# Number of entries in split-brain: 0 hostname=$(hostname) +entries_warn=500 +entries_crit=1000 +entries_min=0 +entries_max=2000 # iterate over all available volumns for volume in $(gluster volume list) do - # find all bricks for this volume - for brick in $(gluster volume info $volume | grep -E '^Brick[0-9]: ' | sed -e 's/Brick[0-9]: //g') - do - # skip if this is not your own brick! - if [[ "$brick" != *"$hostname"* ]]; then - continue - fi - # grab the full status of this brick - brick_status=$(gluster volume status $volume $brick detail) - online=$(echo "$brick_status" | grep -E '^Online' | cut -d ':' -f2 | tr -d '[:space:]') - case "$online" in - Y) brick_status=0 ;; # Online means OK - N) brick_status=1 ;; # Not online is WARN - *) brick_status=3 ;; # everything else we don't know... - esac - echo "$brick_status $brick - Online status of volume '$volume' is: '$online'" - done + gluster volume heal $volume info summary > /tmp/gluster_heal_summary + # check heal info summary + while read -r brickline; do + read -r statusline + read -r entriesline + # we do not use the 3 lines below - may use them for more detailed metrics + read -r entriespendingline + read -r entriessplitbrainline + read -r entrieshealingline + # bricks are separated by empty line + read -r emptyline + # skip chunks that are not ours + if [[ "$brickline" != *"$hostname"* ]] + then + continue + fi + # parse lines of the last brick, since it is located on this gluster cluster node + brick=$(echo "$brickline" | cut -f2 -d' ') + heal_status=$(echo "$statusline" | cut -d ':' -f2 | tr -d '[:space:]') + entries=$(echo "$entriesline" | cut -d ':' -f2 | tr -d '[:space:]') + case "$heal_status" in + Connected) chmk_status=0 ;; # Online means OK + *) chmk_status=3 ;; # everything else we don't know... + esac + metrics="healing_entries=$entries;$entries_warn;$entries_crit;$entries_min;$entries_max" + msg="Online status of volume '$volume' brick '$brick' is: '$heal_status'" + echo "$chmk_status gluster_volume:$volume $metrics $msg" + done < /tmp/gluster_heal_summary done