Compare commits
22 Commits
83081b757b
...
master
| Author | SHA1 | Date | |
|---|---|---|---|
| 8822bfad31 | |||
| edf1588c27 | |||
| 815044fca0 | |||
| 2d6a52f439 | |||
| 9f8f59ecd3 | |||
| 6e74d5d992 | |||
| 4e7c418b04 | |||
| c7f1097948 | |||
| 822c5193b8 | |||
| e7c70934f6 | |||
| dd66a4c03d | |||
| dc4b522ef5 | |||
| 6f85b4e493 | |||
| ebfd7fbc3a | |||
| 85de49d8ec | |||
| 98ab2f8f30 | |||
| e3b59c08c8 | |||
| 0d500ca2b9 | |||
| 05d6be51b8 | |||
| daa098849c | |||
| 4d63d9f3e9 | |||
| dee3bce537 |
1
.gitignore
vendored
Normal file
1
.gitignore
vendored
Normal file
@@ -0,0 +1 @@
|
|||||||
|
*.swp
|
||||||
2
LICENSE
2
LICENSE
@@ -1,6 +1,6 @@
|
|||||||
MIT License
|
MIT License
|
||||||
|
|
||||||
Copyright (c) <year> <copyright holders>
|
Copyright (c) 2021 Vintage Salt
|
||||||
|
|
||||||
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
|
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
|
||||||
|
|
||||||
|
|||||||
33
check_apt_pending
Executable file
33
check_apt_pending
Executable file
@@ -0,0 +1,33 @@
|
|||||||
|
#! /bin/bash
|
||||||
|
#
|
||||||
|
# Gets the number of pending APT package updates and returns differently
|
||||||
|
# depending on how many have yet to be applied
|
||||||
|
#
|
||||||
|
|
||||||
|
threshold_warn=10
|
||||||
|
threshold_crit=20
|
||||||
|
|
||||||
|
# https://askubuntu.com/questions/269606/apt-get-count-the-number-of-updates-available
|
||||||
|
pending="$(apt-get -q -y --ignore-hold --allow-change-held-packages --allow-unauthenticated -s dist-upgrade | \
|
||||||
|
/bin/grep ^Inst | \
|
||||||
|
wc -l)"
|
||||||
|
|
||||||
|
if [ -n "$pending" ]; then
|
||||||
|
if (( pending >= threshold_crit )); then
|
||||||
|
echo "CRITICAL - $pending package updates pending"
|
||||||
|
exit 2
|
||||||
|
elif (( pending >= threshold_warn )); then
|
||||||
|
echo "WARNING - $pending package updates pending"
|
||||||
|
exit 1
|
||||||
|
elif (( pending > 0 )); then
|
||||||
|
echo "OK - $pending package updates pending"
|
||||||
|
exit 0
|
||||||
|
else
|
||||||
|
echo "OK - No updates available"
|
||||||
|
exit 0
|
||||||
|
fi
|
||||||
|
else
|
||||||
|
echo "UNKNOWN - Unable to get pending updates"
|
||||||
|
exit 3
|
||||||
|
fi
|
||||||
|
|
||||||
192
check_cpu_stats
Executable file
192
check_cpu_stats
Executable file
@@ -0,0 +1,192 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
# This program is free software; you can redistribute it and/or modify
|
||||||
|
# it under the terms of the GNU General Public License as published by
|
||||||
|
# the Free Software Foundation; either version 2 of the License, or
|
||||||
|
# (at your option) any later version.
|
||||||
|
#
|
||||||
|
# This program is distributed in the hope that it will be useful,
|
||||||
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
# GNU General Public License for more details.
|
||||||
|
#
|
||||||
|
# You should have received a copy of the GNU General Public License
|
||||||
|
# along with this program; if not, write to the Free Software
|
||||||
|
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
||||||
|
|
||||||
|
PROGNAME=`basename $0`
|
||||||
|
VERSION="Version 1.0,"
|
||||||
|
AUTHOR="2009, Mike Adolphs (http://www.matejunkie.com/)"
|
||||||
|
|
||||||
|
ST_OK=0
|
||||||
|
ST_WR=1
|
||||||
|
ST_CR=2
|
||||||
|
ST_UK=3
|
||||||
|
|
||||||
|
interval=1
|
||||||
|
|
||||||
|
print_version() {
|
||||||
|
echo "$VERSION $AUTHOR"
|
||||||
|
}
|
||||||
|
|
||||||
|
print_help() {
|
||||||
|
print_version $PROGNAME $VERSION
|
||||||
|
echo ""
|
||||||
|
echo "$PROGNAME is a Nagios plugin to monitor CPU utilization. It makes"
|
||||||
|
echo "use of /proc/stat and calculates it through Jiffies rather than"
|
||||||
|
echo "using another frontend tool like iostat or top."
|
||||||
|
echo "When using optional warning/critical thresholds all values except"
|
||||||
|
echo "idle are aggregated and compared to the thresholds. There's"
|
||||||
|
echo "currently no support for warning/critical thresholds for specific"
|
||||||
|
echo "usage parameters."
|
||||||
|
echo ""
|
||||||
|
echo "$PROGNAME [-i/--interval] [-w/--warning] [-c/--critical]"
|
||||||
|
echo ""
|
||||||
|
echo "Options:"
|
||||||
|
echo " --interval|-i)"
|
||||||
|
echo " Defines the pause between the two times /proc/stat is being"
|
||||||
|
echo " parsed. Higher values could lead to more accurate result."
|
||||||
|
echo " Default is: 1 second"
|
||||||
|
echo " --warning|-w)"
|
||||||
|
echo " Sets a warning level for CPU user. Default is: off"
|
||||||
|
echo " --critical|-c)"
|
||||||
|
echo " Sets a critical level for CPU user. Default is: off"
|
||||||
|
exit $ST_UK
|
||||||
|
}
|
||||||
|
|
||||||
|
while test -n "$1"; do
|
||||||
|
case "$1" in
|
||||||
|
--help|-h)
|
||||||
|
print_help
|
||||||
|
exit $ST_UK
|
||||||
|
;;
|
||||||
|
--version|-v)
|
||||||
|
print_version $PROGNAME $VERSION
|
||||||
|
exit $ST_UK
|
||||||
|
;;
|
||||||
|
--interval|-i)
|
||||||
|
interval=$2
|
||||||
|
shift
|
||||||
|
;;
|
||||||
|
--warning|-w)
|
||||||
|
warn=$2
|
||||||
|
shift
|
||||||
|
;;
|
||||||
|
--critical|-c)
|
||||||
|
crit=$2
|
||||||
|
shift
|
||||||
|
;;
|
||||||
|
*)
|
||||||
|
echo "Unknown argument: $1"
|
||||||
|
print_help
|
||||||
|
exit $ST_UK
|
||||||
|
;;
|
||||||
|
esac
|
||||||
|
shift
|
||||||
|
done
|
||||||
|
|
||||||
|
val_wcdiff() {
|
||||||
|
if [ ${warn} -gt ${crit} ]
|
||||||
|
then
|
||||||
|
wcdiff=1
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
|
||||||
|
get_cpuvals() {
|
||||||
|
tmp1_cpu_user=`grep -m1 '^cpu' /proc/stat|awk '{print $2}'`
|
||||||
|
tmp1_cpu_nice=`grep -m1 '^cpu' /proc/stat|awk '{print $3}'`
|
||||||
|
tmp1_cpu_sys=`grep -m1 '^cpu' /proc/stat|awk '{print $4}'`
|
||||||
|
tmp1_cpu_idle=`grep -m1 '^cpu' /proc/stat|awk '{print $5}'`
|
||||||
|
tmp1_cpu_iowait=`grep -m1 '^cpu' /proc/stat|awk '{print $6}'`
|
||||||
|
tmp1_cpu_irq=`grep -m1 '^cpu' /proc/stat|awk '{print $7}'`
|
||||||
|
tmp1_cpu_softirq=`grep -m1 '^cpu' /proc/stat|awk '{print $8}'`
|
||||||
|
tmp1_cpu_total=`expr $tmp1_cpu_user + $tmp1_cpu_nice + $tmp1_cpu_sys + \
|
||||||
|
$tmp1_cpu_idle + $tmp1_cpu_iowait + $tmp1_cpu_irq + $tmp1_cpu_softirq`
|
||||||
|
|
||||||
|
sleep $interval
|
||||||
|
|
||||||
|
tmp2_cpu_user=`grep -m1 '^cpu' /proc/stat|awk '{print $2}'`
|
||||||
|
tmp2_cpu_nice=`grep -m1 '^cpu' /proc/stat|awk '{print $3}'`
|
||||||
|
tmp2_cpu_sys=`grep -m1 '^cpu' /proc/stat|awk '{print $4}'`
|
||||||
|
tmp2_cpu_idle=`grep -m1 '^cpu' /proc/stat|awk '{print $5}'`
|
||||||
|
tmp2_cpu_iowait=`grep -m1 '^cpu' /proc/stat|awk '{print $6}'`
|
||||||
|
tmp2_cpu_irq=`grep -m1 '^cpu' /proc/stat|awk '{print $7}'`
|
||||||
|
tmp2_cpu_softirq=`grep -m1 '^cpu' /proc/stat|awk '{print $8}'`
|
||||||
|
tmp2_cpu_total=`expr $tmp2_cpu_user + $tmp2_cpu_nice + $tmp2_cpu_sys + \
|
||||||
|
$tmp2_cpu_idle + $tmp2_cpu_iowait + $tmp2_cpu_irq + $tmp2_cpu_softirq`
|
||||||
|
|
||||||
|
diff_cpu_user=`echo "${tmp2_cpu_user} - ${tmp1_cpu_user}" | bc -l`
|
||||||
|
diff_cpu_nice=`echo "${tmp2_cpu_nice} - ${tmp1_cpu_nice}" | bc -l`
|
||||||
|
diff_cpu_sys=`echo "${tmp2_cpu_sys} - ${tmp1_cpu_sys}" | bc -l`
|
||||||
|
diff_cpu_idle=`echo "${tmp2_cpu_idle} - ${tmp1_cpu_idle}" | bc -l`
|
||||||
|
diff_cpu_iowait=`echo "${tmp2_cpu_iowait} - ${tmp1_cpu_iowait}" | bc -l`
|
||||||
|
diff_cpu_irq=`echo "${tmp2_cpu_irq} - ${tmp1_cpu_irq}" | bc -l`
|
||||||
|
diff_cpu_softirq=`echo "${tmp2_cpu_softirq} - ${tmp1_cpu_softirq}" \
|
||||||
|
| bc -l`
|
||||||
|
diff_cpu_total=`echo "${tmp2_cpu_total} - ${tmp1_cpu_total}" | bc -l`
|
||||||
|
|
||||||
|
cpu_user=`echo "scale=2; (1000*${diff_cpu_user}/${diff_cpu_total}+5)/10" \
|
||||||
|
| bc -l | sed 's/^\./0./'`
|
||||||
|
cpu_nice=`echo "scale=2; (1000*${diff_cpu_nice}/${diff_cpu_total}+5)/10" \
|
||||||
|
| bc -l | sed 's/^\./0./'`
|
||||||
|
cpu_sys=`echo "scale=2; (1000*${diff_cpu_sys}/${diff_cpu_total}+5)/10" \
|
||||||
|
| bc -l | sed 's/^\./0./'`
|
||||||
|
cpu_idle=`echo "scale=2; (1000*${diff_cpu_idle}/${diff_cpu_total}+5)/10" \
|
||||||
|
| bc -l | sed 's/^\./0./'`
|
||||||
|
cpu_iowait=`echo "scale=2; (1000*${diff_cpu_iowait}/${diff_cpu_total}+5)\\
|
||||||
|
/10" | bc -l | sed 's/^\./0./'`
|
||||||
|
cpu_irq=`echo "scale=2; (1000*${diff_cpu_irq}/${diff_cpu_total}+5)/10" \
|
||||||
|
| bc -l | sed 's/^\./0./'`
|
||||||
|
cpu_softirq=`echo "scale=2; (1000*${diff_cpu_softirq}/${diff_cpu_total}\\
|
||||||
|
+5)/10" | bc -l | sed 's/^\./0./'`
|
||||||
|
cpu_total=`echo "scale=2; (1000*${diff_cpu_total}/${diff_cpu_total}+5)\\
|
||||||
|
/10" | bc -l | sed 's/^\./0./'`
|
||||||
|
cpu_usage=`echo "(${cpu_user}+${cpu_nice}+${cpu_sys}+${cpu_iowait}+\\
|
||||||
|
${cpu_irq}+${cpu_softirq})/1" | bc`
|
||||||
|
}
|
||||||
|
|
||||||
|
do_output() {
|
||||||
|
output="user: ${cpu_user}, nice: ${cpu_nice}, sys: ${cpu_sys}, \
|
||||||
|
iowait: ${cpu_iowait}, irq: ${cpu_irq}, softirq: ${cpu_softirq} \
|
||||||
|
idle: ${cpu_idle}"
|
||||||
|
}
|
||||||
|
|
||||||
|
do_perfdata() {
|
||||||
|
perfdata="'user'=${cpu_user} 'nice'=${cpu_nice} 'sys'=${cpu_sys} \
|
||||||
|
'softirq'=${cpu_softirq} 'iowait'=${cpu_iowait} 'irq'=${cpu_irq} \
|
||||||
|
'idle'=${cpu_idle}"
|
||||||
|
}
|
||||||
|
|
||||||
|
if [ -n "$warn" -a -n "$crit" ]
|
||||||
|
then
|
||||||
|
val_wcdiff
|
||||||
|
if [ "$wcdiff" = 1 ]
|
||||||
|
then
|
||||||
|
echo "Please adjust your warning/critical thresholds. The warning\\
|
||||||
|
must be lower than the critical level!"
|
||||||
|
exit $ST_UK
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
|
||||||
|
get_cpuvals
|
||||||
|
do_output
|
||||||
|
do_perfdata
|
||||||
|
|
||||||
|
if [ -n "$warn" -a -n "$crit" ]
|
||||||
|
then
|
||||||
|
if [ "$cpu_usage" -ge "$warn" -a "$cpu_usage" -lt "$crit" ]
|
||||||
|
then
|
||||||
|
echo "WARNING - ${output} | ${perfdata}"
|
||||||
|
exit $ST_WR
|
||||||
|
elif [ "$cpu_usage" -ge "$crit" ]
|
||||||
|
then
|
||||||
|
echo "CRITICAL - ${output} | ${perfdata}"
|
||||||
|
exit $ST_CR
|
||||||
|
else
|
||||||
|
echo "OK - ${output} | ${perfdata}"
|
||||||
|
exit $ST_OK
|
||||||
|
fi
|
||||||
|
else
|
||||||
|
echo "OK - ${output} | ${perfdata}"
|
||||||
|
exit $ST_OK
|
||||||
|
fi
|
||||||
996
check_docker
Executable file
996
check_docker
Executable file
@@ -0,0 +1,996 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
# logging.basicConfig(level=logging.DEBUG)
|
||||||
|
import argparse
|
||||||
|
import json
|
||||||
|
import logging
|
||||||
|
import math
|
||||||
|
import os
|
||||||
|
import re
|
||||||
|
import socket
|
||||||
|
import stat
|
||||||
|
import traceback
|
||||||
|
from collections import deque, namedtuple, UserDict, defaultdict
|
||||||
|
from concurrent import futures
|
||||||
|
from datetime import datetime, timezone
|
||||||
|
from functools import lru_cache
|
||||||
|
from http.client import HTTPConnection
|
||||||
|
from sys import argv
|
||||||
|
from urllib import request
|
||||||
|
from urllib.error import HTTPError, URLError
|
||||||
|
from urllib.request import AbstractHTTPHandler, HTTPHandler, HTTPSHandler, OpenerDirector, HTTPRedirectHandler, \
|
||||||
|
Request, HTTPBasicAuthHandler
|
||||||
|
|
||||||
|
logger = logging.getLogger()
|
||||||
|
__author__ = 'Tim Laurence'
|
||||||
|
__copyright__ = "Copyright 2019"
|
||||||
|
__credits__ = ['Tim Laurence']
|
||||||
|
__license__ = "GPL"
|
||||||
|
__version__ = "2.2.2"
|
||||||
|
|
||||||
|
'''
|
||||||
|
nrpe compatible check for docker containers.
|
||||||
|
|
||||||
|
Requires Python 3
|
||||||
|
|
||||||
|
Note: I really would have preferred to have used requests for all the network connections but that would have added a
|
||||||
|
dependency.
|
||||||
|
'''
|
||||||
|
|
||||||
|
DEFAULT_SOCKET = '/var/run/docker.sock'
|
||||||
|
DEFAULT_TIMEOUT = 10.0
|
||||||
|
DEFAULT_PORT = 2375
|
||||||
|
DEFAULT_MEMORY_UNITS = 'B'
|
||||||
|
DEFAULT_HEADERS = [('Accept', 'application/vnd.docker.distribution.manifest.v2+json')]
|
||||||
|
DEFAULT_PUBLIC_REGISTRY = 'registry-1.docker.io'
|
||||||
|
|
||||||
|
# The second value is the power to raise the base to.
|
||||||
|
UNIT_ADJUSTMENTS_TEMPLATE = {
|
||||||
|
'%': 0,
|
||||||
|
'B': 0,
|
||||||
|
'KB': 1,
|
||||||
|
'MB': 2,
|
||||||
|
'GB': 3,
|
||||||
|
'TB': 4
|
||||||
|
}
|
||||||
|
unit_adjustments = None
|
||||||
|
|
||||||
|
# Reduce message to a single OK unless a checks fail.
|
||||||
|
no_ok = False
|
||||||
|
|
||||||
|
# Suppress performance data reporting
|
||||||
|
no_performance = False
|
||||||
|
|
||||||
|
OK_RC = 0
|
||||||
|
WARNING_RC = 1
|
||||||
|
CRITICAL_RC = 2
|
||||||
|
UNKNOWN_RC = 3
|
||||||
|
|
||||||
|
# These hold the final results
|
||||||
|
rc = -1
|
||||||
|
messages = []
|
||||||
|
performance_data = []
|
||||||
|
|
||||||
|
ImageName = namedtuple('ImageName', "registry name tag full_name")
|
||||||
|
|
||||||
|
|
||||||
|
class ThresholdSpec(UserDict):
|
||||||
|
def __init__(self, warn, crit, units=''):
|
||||||
|
super().__init__(warn=warn, crit=crit, units=units)
|
||||||
|
|
||||||
|
def __getattr__(self, item):
|
||||||
|
if item in ('warn', 'crit', 'units'):
|
||||||
|
return self.data[item]
|
||||||
|
return super().__getattr__(item)
|
||||||
|
|
||||||
|
|
||||||
|
# How much threading can we do? We are generally not CPU bound so I am using this a worse case cap
|
||||||
|
DEFAULT_PARALLELISM = 10
|
||||||
|
|
||||||
|
# Holds list of all threads
|
||||||
|
threads = []
|
||||||
|
|
||||||
|
# This is used during testing
|
||||||
|
DISABLE_THREADING = False
|
||||||
|
|
||||||
|
|
||||||
|
# Hacked up urllib to handle sockets
|
||||||
|
#############################################################################################
|
||||||
|
# Docker runs a http connection over a socket. http.client is knows how to deal with these
|
||||||
|
# but lacks some niceties. Urllib wraps that and makes up for some of the deficiencies but
|
||||||
|
# cannot fix the fact http.client can't read from socket files. In order to take advantage of
|
||||||
|
# urllib and http.client's capabilities the class below tweaks HttpConnection and passes it
|
||||||
|
# to urllib registering for socket:// connections
|
||||||
|
|
||||||
|
# This is all side effect so excluding coverage
|
||||||
|
class SocketFileHandler(AbstractHTTPHandler):
|
||||||
|
class SocketFileToHttpConnectionAdaptor(HTTPConnection): # pragma: no cover
|
||||||
|
def __init__(self, socket_file, timeout=DEFAULT_TIMEOUT):
|
||||||
|
super().__init__(host='', port=0, timeout=timeout)
|
||||||
|
self.socket_file = socket_file
|
||||||
|
|
||||||
|
def connect(self):
|
||||||
|
self.sock = socket.socket(family=socket.AF_UNIX, type=socket.SOCK_STREAM, proto=0, fileno=None)
|
||||||
|
self.sock.settimeout(self.timeout)
|
||||||
|
self.sock.connect(self.socket_file)
|
||||||
|
|
||||||
|
def socket_open(self, req):
|
||||||
|
socket_file, path = req.selector.split(':', 1)
|
||||||
|
req.host = socket_file
|
||||||
|
req.selector = path
|
||||||
|
return self.do_open(self.SocketFileToHttpConnectionAdaptor, req)
|
||||||
|
|
||||||
|
|
||||||
|
# Tokens are not cached because I expect the callers to cache the responses
|
||||||
|
class Oauth2TokenAuthHandler(HTTPBasicAuthHandler):
|
||||||
|
auth_failure_tracker = defaultdict(int)
|
||||||
|
|
||||||
|
def http_response(self, request, response):
|
||||||
|
code, hdrs = response.code, response.headers
|
||||||
|
|
||||||
|
www_authenticate_header = response.headers.get('www-authenticate', None)
|
||||||
|
if code == 401 and www_authenticate_header:
|
||||||
|
scheme = www_authenticate_header.split()[0]
|
||||||
|
if scheme.lower() == 'bearer':
|
||||||
|
return self.process_oauth2(request, response, www_authenticate_header)
|
||||||
|
|
||||||
|
return response
|
||||||
|
|
||||||
|
https_response = http_response
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _get_outh2_token(www_authenticate_header):
|
||||||
|
auth_fields = dict(re.findall(r"""(?:(?P<key>[^ ,=]+)="([^"]+)")""", www_authenticate_header))
|
||||||
|
|
||||||
|
auth_url = "{realm}?scope={scope}&service={service}".format(
|
||||||
|
realm=auth_fields['realm'],
|
||||||
|
scope=auth_fields['scope'],
|
||||||
|
service=auth_fields['service'],
|
||||||
|
)
|
||||||
|
token_request = Request(auth_url)
|
||||||
|
token_request.add_header("Content-Type", "application/x-www-form-urlencoded; charset=utf-8")
|
||||||
|
token_response = request.urlopen(token_request)
|
||||||
|
return process_urllib_response(token_response)['token']
|
||||||
|
|
||||||
|
def process_oauth2(self, request, response, www_authenticate_header):
|
||||||
|
|
||||||
|
# This keeps infinite auth loops from happening
|
||||||
|
full_url = request.full_url
|
||||||
|
self.auth_failure_tracker[full_url] += 1
|
||||||
|
if self.auth_failure_tracker[full_url] > 1:
|
||||||
|
raise HTTPError(full_url, 401, "Stopping Oauth2 failure loop for {}".format(full_url),
|
||||||
|
response.headers, response)
|
||||||
|
|
||||||
|
auth_token = self._get_outh2_token(www_authenticate_header)
|
||||||
|
|
||||||
|
request.add_unredirected_header('Authorization', 'Bearer ' + auth_token)
|
||||||
|
return self.parent.open(request, timeout=request.timeout)
|
||||||
|
|
||||||
|
|
||||||
|
better_urllib_get = OpenerDirector()
|
||||||
|
better_urllib_get.addheaders = DEFAULT_HEADERS.copy()
|
||||||
|
better_urllib_get.add_handler(HTTPHandler())
|
||||||
|
better_urllib_get.add_handler(HTTPSHandler())
|
||||||
|
better_urllib_get.add_handler(HTTPRedirectHandler())
|
||||||
|
better_urllib_get.add_handler(SocketFileHandler())
|
||||||
|
better_urllib_get.add_handler(Oauth2TokenAuthHandler())
|
||||||
|
|
||||||
|
|
||||||
|
class RegistryError(Exception):
|
||||||
|
def __init__(self, response):
|
||||||
|
self.response_obj = response
|
||||||
|
|
||||||
|
|
||||||
|
# Util functions
|
||||||
|
#############################################################################################
|
||||||
|
def parse_thresholds(spec, include_units=True, units_required=True):
|
||||||
|
"""
|
||||||
|
Given a spec string break it up into ':' separated chunks. Convert strings to ints as it makes sense
|
||||||
|
|
||||||
|
:param spec: The threshold specification being parsed
|
||||||
|
:param include_units: Specifies that units should be processed and returned if present
|
||||||
|
:param units_required: Mark spec as invalid if the units are missing.
|
||||||
|
:return: A list containing the thresholds in order of warn, crit, and units(if included and present)
|
||||||
|
"""
|
||||||
|
parts = deque(spec.split(':'))
|
||||||
|
if not all(parts):
|
||||||
|
raise ValueError("Blanks are not allowed in a threshold specification: {}".format(spec))
|
||||||
|
|
||||||
|
# Warn
|
||||||
|
warn = int(parts.popleft())
|
||||||
|
# Crit
|
||||||
|
crit = int(parts.popleft())
|
||||||
|
|
||||||
|
units = ''
|
||||||
|
if include_units:
|
||||||
|
if len(parts):
|
||||||
|
# units
|
||||||
|
units = parts.popleft()
|
||||||
|
elif units_required:
|
||||||
|
raise ValueError("Missing units in {}".format(spec))
|
||||||
|
|
||||||
|
if len(parts) != 0:
|
||||||
|
raise ValueError("Too many threshold specifiers in {}".format(spec))
|
||||||
|
|
||||||
|
return ThresholdSpec(warn=warn, crit=crit, units=units)
|
||||||
|
|
||||||
|
|
||||||
|
def pretty_time(seconds):
|
||||||
|
remainder = seconds
|
||||||
|
result = []
|
||||||
|
if remainder > 24 * 60 * 60:
|
||||||
|
days, remainder = divmod(remainder, 24 * 60 * 60)
|
||||||
|
result.append("{}d".format(int(days)))
|
||||||
|
if remainder > 60 * 60:
|
||||||
|
hours, remainder = divmod(remainder, 60 * 60)
|
||||||
|
result.append("{}h".format(int(hours)))
|
||||||
|
if remainder > 60:
|
||||||
|
minutes, remainder = divmod(remainder, 60)
|
||||||
|
result.append("{}min".format(int(minutes)))
|
||||||
|
result.append("{}s".format(int(remainder)))
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
def evaluate_numeric_thresholds(container, value, thresholds, name, short_name,
|
||||||
|
min=None, max=None, greater_than=True):
|
||||||
|
rounder = lambda x: round(x, 2)
|
||||||
|
|
||||||
|
INTEGER_UNITS = ['B', '%', '']
|
||||||
|
|
||||||
|
# Some units don't have decimal places
|
||||||
|
rounded_value = int(value) if thresholds.units in INTEGER_UNITS else rounder(value)
|
||||||
|
|
||||||
|
perf_string = "{container}_{short_name}={value}{units};{warn};{crit}".format(
|
||||||
|
container=container,
|
||||||
|
short_name=short_name,
|
||||||
|
value=rounded_value,
|
||||||
|
**thresholds)
|
||||||
|
if min is not None:
|
||||||
|
rounded_min = math.floor(min) if thresholds.units in INTEGER_UNITS else rounder(min)
|
||||||
|
perf_string += ';{}'.format(rounded_min)
|
||||||
|
if max is not None:
|
||||||
|
rounded_max = math.ceil(max) if thresholds.units in INTEGER_UNITS else rounder(max)
|
||||||
|
perf_string += ';{}'.format(rounded_max)
|
||||||
|
|
||||||
|
global performance_data
|
||||||
|
performance_data.append(perf_string)
|
||||||
|
|
||||||
|
if thresholds.units == 's':
|
||||||
|
nice_time = ' '.join(pretty_time(rounded_value)[:2])
|
||||||
|
results_str = "{} {} is {}".format(container, name, nice_time)
|
||||||
|
else:
|
||||||
|
results_str = "{} {} is {}{}".format(container, name, rounded_value, thresholds.units)
|
||||||
|
|
||||||
|
if greater_than:
|
||||||
|
comparator = lambda value, threshold: value >= threshold
|
||||||
|
else:
|
||||||
|
comparator = lambda value, threshold: value <= threshold
|
||||||
|
|
||||||
|
if comparator(value, thresholds.crit):
|
||||||
|
critical(results_str)
|
||||||
|
elif comparator(value, thresholds.warn):
|
||||||
|
warning(results_str)
|
||||||
|
else:
|
||||||
|
ok(results_str)
|
||||||
|
|
||||||
|
|
||||||
|
@lru_cache(maxsize=None)
|
||||||
|
def get_url(url):
|
||||||
|
logger.debug("get_url: {}".format(url))
|
||||||
|
response = better_urllib_get.open(url, timeout=timeout)
|
||||||
|
logger.debug("get_url: {} {}".format(url, response.status))
|
||||||
|
return process_urllib_response(response), response.status
|
||||||
|
|
||||||
|
|
||||||
|
def process_urllib_response(response):
|
||||||
|
response_bytes = response.read()
|
||||||
|
body = response_bytes.decode('utf-8')
|
||||||
|
# logger.debug("BODY: {}".format(body))
|
||||||
|
return json.loads(body)
|
||||||
|
|
||||||
|
|
||||||
|
def get_container_info(name):
|
||||||
|
content, _ = get_url(daemon + '/containers/{container}/json'.format(container=name))
|
||||||
|
return content
|
||||||
|
|
||||||
|
|
||||||
|
def get_image_info(name):
|
||||||
|
content, _ = get_url(daemon + '/images/{image}/json'.format(image=name))
|
||||||
|
return content
|
||||||
|
|
||||||
|
|
||||||
|
def get_state(container):
|
||||||
|
return get_container_info(container)['State']
|
||||||
|
|
||||||
|
|
||||||
|
def get_stats(container):
|
||||||
|
content, _ = get_url(daemon + '/containers/{container}/stats?stream=0'.format(container=container))
|
||||||
|
return content
|
||||||
|
|
||||||
|
|
||||||
|
def get_ps_name(name_list):
|
||||||
|
# Pick the name that starts with a '/' but doesn't contain a '/' and return that value
|
||||||
|
for name in name_list:
|
||||||
|
if '/' not in name[1:] and name[0] == '/':
|
||||||
|
return name[1:]
|
||||||
|
else:
|
||||||
|
raise NameError("Error when trying to identify 'ps' name in {}".format(name_list))
|
||||||
|
|
||||||
|
|
||||||
|
def get_containers(names, require_present):
|
||||||
|
containers_list, _ = get_url(daemon + '/containers/json?all=1')
|
||||||
|
|
||||||
|
all_container_names = set(get_ps_name(x['Names']) for x in containers_list)
|
||||||
|
|
||||||
|
if 'all' in names:
|
||||||
|
return all_container_names
|
||||||
|
|
||||||
|
filtered = set()
|
||||||
|
for matcher in names:
|
||||||
|
found = False
|
||||||
|
for candidate in all_container_names:
|
||||||
|
if re.match("^{}$".format(matcher), candidate):
|
||||||
|
filtered.add(candidate)
|
||||||
|
found = True
|
||||||
|
# If we don't find a container that matches out regex
|
||||||
|
if require_present and not found:
|
||||||
|
critical("No containers match {}".format(matcher))
|
||||||
|
|
||||||
|
return filtered
|
||||||
|
|
||||||
|
|
||||||
|
def get_container_image_id(container):
|
||||||
|
# find registry and tag
|
||||||
|
inspection = get_container_info(container)
|
||||||
|
return inspection['Image']
|
||||||
|
|
||||||
|
|
||||||
|
def get_container_image_urls(container):
|
||||||
|
inspection = get_container_info(container)
|
||||||
|
image_id = inspection['Image']
|
||||||
|
image_info = get_image_info(image_id)
|
||||||
|
return image_info['RepoTags']
|
||||||
|
|
||||||
|
|
||||||
|
def normalize_image_name_to_manifest_url(image_name, insecure_registries):
|
||||||
|
parsed_url = parse_image_name(image_name)
|
||||||
|
|
||||||
|
lower_insecure = [reg.lower() for reg in insecure_registries]
|
||||||
|
|
||||||
|
# Registry query url
|
||||||
|
scheme = 'http' if parsed_url.registry.lower() in lower_insecure else 'https'
|
||||||
|
url = '{scheme}://{registry}/v2/{image_name}/manifests/{image_tag}'.format(scheme=scheme,
|
||||||
|
registry=parsed_url.registry,
|
||||||
|
image_name=parsed_url.name,
|
||||||
|
image_tag=parsed_url.tag)
|
||||||
|
return url, parsed_url.registry
|
||||||
|
|
||||||
|
|
||||||
|
# Auth servers seem picky about being hit too hard. Can't figure out why. ;)
|
||||||
|
# As result it is best to single thread this check
|
||||||
|
# This is based on https://docs.docker.com/registry/spec/auth/token/#requesting-a-token
|
||||||
|
def get_digest_from_registry(url):
|
||||||
|
logger.debug("get_digest_from_registry")
|
||||||
|
# query registry
|
||||||
|
# TODO: Handle logging in if needed
|
||||||
|
registry_info, status_code = get_url(url=url)
|
||||||
|
|
||||||
|
if status_code != 200:
|
||||||
|
raise RegistryError(response=registry_info)
|
||||||
|
return registry_info['config'].get('digest', None)
|
||||||
|
|
||||||
|
|
||||||
|
def set_rc(new_rc):
|
||||||
|
global rc
|
||||||
|
rc = new_rc if new_rc > rc else rc
|
||||||
|
|
||||||
|
|
||||||
|
def ok(message):
|
||||||
|
set_rc(OK_RC)
|
||||||
|
messages.append('OK: ' + message)
|
||||||
|
|
||||||
|
|
||||||
|
def warning(message):
|
||||||
|
set_rc(WARNING_RC)
|
||||||
|
messages.append('WARNING: ' + message)
|
||||||
|
|
||||||
|
|
||||||
|
def critical(message):
|
||||||
|
set_rc(CRITICAL_RC)
|
||||||
|
messages.append('CRITICAL: ' + message)
|
||||||
|
|
||||||
|
|
||||||
|
def unknown(message):
|
||||||
|
set_rc(UNKNOWN_RC)
|
||||||
|
messages.append('UNKNOWN: ' + message)
|
||||||
|
|
||||||
|
|
||||||
|
def require_running(name):
|
||||||
|
def inner_decorator(func):
|
||||||
|
def wrapper(container, *args, **kwargs):
|
||||||
|
container_state = get_state(container)
|
||||||
|
state = normalize_state(container_state)
|
||||||
|
if state.lower() == "running":
|
||||||
|
func(container, *args, **kwargs)
|
||||||
|
else:
|
||||||
|
# container is not running, can't perform check
|
||||||
|
critical('{container} is not "running", cannot check {check}"'.format(container=container,
|
||||||
|
check=name))
|
||||||
|
|
||||||
|
return wrapper
|
||||||
|
|
||||||
|
return inner_decorator
|
||||||
|
|
||||||
|
|
||||||
|
def multithread_execution(disable_threading=DISABLE_THREADING):
|
||||||
|
def inner_decorator(func):
|
||||||
|
def wrapper(container, *args, **kwargs):
|
||||||
|
if DISABLE_THREADING:
|
||||||
|
func(container, *args, **kwargs)
|
||||||
|
else:
|
||||||
|
threads.append(parallel_executor.submit(func, container, *args, **kwargs))
|
||||||
|
|
||||||
|
return wrapper
|
||||||
|
|
||||||
|
return inner_decorator
|
||||||
|
|
||||||
|
|
||||||
|
def singlethread_execution(disable_threading=DISABLE_THREADING):
|
||||||
|
def inner_decorator(func):
|
||||||
|
def wrapper(container, *args, **kwargs):
|
||||||
|
if DISABLE_THREADING:
|
||||||
|
func(container, *args, **kwargs)
|
||||||
|
else:
|
||||||
|
threads.append(serial_executor.submit(func, container, *args, **kwargs))
|
||||||
|
|
||||||
|
return wrapper
|
||||||
|
|
||||||
|
return inner_decorator
|
||||||
|
|
||||||
|
|
||||||
|
def parse_image_name(image_name):
|
||||||
|
"""
|
||||||
|
Parses image names into their constituent parts.
|
||||||
|
:param image_name:
|
||||||
|
:return: ImageName
|
||||||
|
"""
|
||||||
|
|
||||||
|
# These are based on information found here
|
||||||
|
# https://docs.docker.com/engine/reference/commandline/tag/#extended-description
|
||||||
|
# https://github.com/docker/distribution/blob/master/reference/regexp.go
|
||||||
|
host_segment_re = '[a-zA-Z0-9]([a-zA-Z0-9-]*[a-zA-Z0-9])?'
|
||||||
|
hostname_re = r'({host_segment}\.)+{host_segment}'.format(host_segment=host_segment_re)
|
||||||
|
registry_re = r'((?P<registry>({hostname_re}(:\d+)?|{host_segment_re}:\d+))/)'.format(
|
||||||
|
host_segment_re=host_segment_re, hostname_re=hostname_re)
|
||||||
|
name_component_ends_re = '[a-z0-9]'
|
||||||
|
name_component_middle_re = '[a-z0-9._-]' # Ignoring spec limit of two _
|
||||||
|
name_component_re = '({end}{middle}*{end}|{end})'.format(end=name_component_ends_re,
|
||||||
|
middle=name_component_middle_re)
|
||||||
|
image_name_re = "(?P<image_name>({name_component}/)*{name_component})".format(name_component=name_component_re)
|
||||||
|
image_tag_re = '(?P<image_tag>[a-zA-Z0-9_][a-zA-Z0-9_.-]*)'
|
||||||
|
full_re = '^{registry}?{image_name}(:{image_tag})?$'.format(registry=registry_re, image_name=image_name_re,
|
||||||
|
image_tag=image_tag_re)
|
||||||
|
parsed = re.match(full_re, image_name)
|
||||||
|
|
||||||
|
registry = parsed.group('registry') if parsed.group('registry') else DEFAULT_PUBLIC_REGISTRY
|
||||||
|
|
||||||
|
image_name = parsed.group('image_name')
|
||||||
|
image_name = image_name if '/' in image_name or registry != DEFAULT_PUBLIC_REGISTRY else 'library/' + image_name
|
||||||
|
|
||||||
|
image_tag = parsed.group('image_tag')
|
||||||
|
image_tag = image_tag if image_tag else 'latest'
|
||||||
|
|
||||||
|
full_image_name = "{registry}/{image_name}:{image_tag}".format(
|
||||||
|
registry=registry,
|
||||||
|
image_name=image_name,
|
||||||
|
image_tag=image_tag)
|
||||||
|
|
||||||
|
return ImageName(registry=registry, name=image_name, tag=image_tag, full_name=full_image_name)
|
||||||
|
|
||||||
|
|
||||||
|
def normalize_state(status_info):
|
||||||
|
# Ugh, docker used to report state in as silly way then they figured out how to do it better.
|
||||||
|
# This tries the simpler new way and if that doesn't work fails back to the old way
|
||||||
|
|
||||||
|
# On new docker engines the status holds whatever the current state is, running, stopped, paused, etc.
|
||||||
|
if "Status" in status_info:
|
||||||
|
return status_info['Status']
|
||||||
|
|
||||||
|
status = 'Exited'
|
||||||
|
if status_info["Restarting"]:
|
||||||
|
status = 'Restarting'
|
||||||
|
elif status_info["Paused"]:
|
||||||
|
status = 'Paused'
|
||||||
|
elif status_info["Dead"]:
|
||||||
|
status = 'Dead'
|
||||||
|
elif status_info["Running"]:
|
||||||
|
return "Running"
|
||||||
|
return status
|
||||||
|
|
||||||
|
|
||||||
|
# Checks
|
||||||
|
#############################################################################################
|
||||||
|
|
||||||
|
@multithread_execution()
|
||||||
|
@require_running(name='memory')
|
||||||
|
def check_memory(container, thresholds):
|
||||||
|
if not thresholds.units in unit_adjustments:
|
||||||
|
unknown("Memory units must be one of {}".format(list(unit_adjustments.keys())))
|
||||||
|
return
|
||||||
|
|
||||||
|
inspection = get_stats(container)
|
||||||
|
|
||||||
|
# Subtracting cache to match what `docker stats` does.
|
||||||
|
adjusted_usage = inspection['memory_stats']['usage'] - inspection['memory_stats']['stats']['total_cache']
|
||||||
|
if thresholds.units == '%':
|
||||||
|
max = 100
|
||||||
|
usage = int(100 * adjusted_usage / inspection['memory_stats']['limit'])
|
||||||
|
else:
|
||||||
|
max = inspection['memory_stats']['limit'] / unit_adjustments[thresholds.units]
|
||||||
|
usage = adjusted_usage / unit_adjustments[thresholds.units]
|
||||||
|
|
||||||
|
evaluate_numeric_thresholds(container=container, value=usage, thresholds=thresholds, name='memory',
|
||||||
|
short_name='mem', min=0, max=max)
|
||||||
|
|
||||||
|
|
||||||
|
@multithread_execution()
|
||||||
|
def check_status(container, desired_state):
|
||||||
|
normized_desired_state = desired_state.lower()
|
||||||
|
normalized_state = normalize_state(get_state(container)).lower()
|
||||||
|
if normized_desired_state != normalized_state:
|
||||||
|
critical("{} state is not {}".format(container, desired_state))
|
||||||
|
return
|
||||||
|
ok("{} status is {}".format(container, desired_state))
|
||||||
|
|
||||||
|
|
||||||
|
@multithread_execution()
|
||||||
|
@require_running('health')
|
||||||
|
def check_health(container):
|
||||||
|
state = get_state(container)
|
||||||
|
if "Health" in state and "Status" in state["Health"]:
|
||||||
|
health = state["Health"]["Status"]
|
||||||
|
message = "{} is {}".format(container, health)
|
||||||
|
if health == 'healthy':
|
||||||
|
ok(message)
|
||||||
|
elif health == 'unhealthy':
|
||||||
|
critical(message)
|
||||||
|
else:
|
||||||
|
unknown(message)
|
||||||
|
else:
|
||||||
|
unknown('{} has no health check data'.format(container))
|
||||||
|
|
||||||
|
|
||||||
|
@multithread_execution()
|
||||||
|
@require_running('uptime')
|
||||||
|
def check_uptime(container, thresholds):
|
||||||
|
inspection = get_container_info(container)['State']['StartedAt']
|
||||||
|
only_secs = inspection[0:19]
|
||||||
|
start = datetime.strptime(only_secs, "%Y-%m-%dT%H:%M:%S")
|
||||||
|
start = start.replace(tzinfo=timezone.utc)
|
||||||
|
now = datetime.now(timezone.utc)
|
||||||
|
uptime = (now - start).total_seconds()
|
||||||
|
|
||||||
|
graph_padding = 2
|
||||||
|
thresholds.units = 's'
|
||||||
|
evaluate_numeric_thresholds(container=container, value=uptime, thresholds=thresholds, name='uptime',
|
||||||
|
short_name='up', min=0, max=graph_padding, greater_than=False)
|
||||||
|
|
||||||
|
|
||||||
|
@multithread_execution()
|
||||||
|
def check_image_age(container, thresholds):
|
||||||
|
container_image = get_container_info(container)['Image']
|
||||||
|
image_created = get_image_info(container_image)['Created']
|
||||||
|
only_secs = image_created[0:19]
|
||||||
|
start = datetime.strptime(only_secs, "%Y-%m-%dT%H:%M:%S")
|
||||||
|
start = start.replace(tzinfo=timezone.utc)
|
||||||
|
now = datetime.now(timezone.utc)
|
||||||
|
image_age = (now - start).days
|
||||||
|
|
||||||
|
graph_padding = 2
|
||||||
|
thresholds.units = 'd'
|
||||||
|
evaluate_numeric_thresholds(container=container, value=image_age, thresholds=thresholds, name='image_age',
|
||||||
|
short_name='age', min=0, max=graph_padding, greater_than=True)
|
||||||
|
|
||||||
|
|
||||||
|
@multithread_execution()
|
||||||
|
@require_running('restarts')
|
||||||
|
def check_restarts(container, thresholds):
|
||||||
|
inspection = get_container_info(container)
|
||||||
|
|
||||||
|
restarts = int(inspection['RestartCount'])
|
||||||
|
graph_padding = 2
|
||||||
|
evaluate_numeric_thresholds(container=container, value=restarts, thresholds=thresholds, name='restarts',
|
||||||
|
short_name='re', min=0, max=graph_padding)
|
||||||
|
|
||||||
|
|
||||||
|
@singlethread_execution()
|
||||||
|
def check_version(container, insecure_registries):
|
||||||
|
image_id = get_container_image_id(container)
|
||||||
|
logger.debug("Local container image ID: {}".format(image_id))
|
||||||
|
if image_id is None:
|
||||||
|
unknown('Checksum missing for "{}", try doing a pull'.format(container))
|
||||||
|
return
|
||||||
|
|
||||||
|
image_urls = get_container_image_urls(container=container)
|
||||||
|
if len(image_urls) > 1:
|
||||||
|
unknown('"{}" has multiple tags/names. Unsure which one to use to check the version.'.format(container))
|
||||||
|
return
|
||||||
|
elif len(image_urls) == 0:
|
||||||
|
unknown('"{}" has last no repository tag. Is this anywhere else?'.format(container))
|
||||||
|
return
|
||||||
|
|
||||||
|
url, registry = normalize_image_name_to_manifest_url(image_urls[0], insecure_registries)
|
||||||
|
logger.debug("Looking up image digest here {}".format(url))
|
||||||
|
try:
|
||||||
|
registry_hash = get_digest_from_registry(url)
|
||||||
|
except URLError as e:
|
||||||
|
if hasattr(e.reason, 'reason') and e.reason.reason == 'UNKNOWN_PROTOCOL':
|
||||||
|
unknown(
|
||||||
|
"TLS error connecting to registry {} for {}, should you use the '--insecure-registry' flag?" \
|
||||||
|
.format(registry, container))
|
||||||
|
return
|
||||||
|
elif hasattr(e.reason, 'strerror') and e.reason.strerror == 'nodename nor servname provided, or not known':
|
||||||
|
unknown(
|
||||||
|
"Cannot reach registry for {} at {}".format(container, url))
|
||||||
|
return
|
||||||
|
else:
|
||||||
|
raise e
|
||||||
|
except RegistryError as e:
|
||||||
|
unknown("Cannot check version, couldn't retrieve digest for {} while checking {}.".format(container, url))
|
||||||
|
return
|
||||||
|
logger.debug("Image digests, local={} remote={}".format(image_id, registry_hash))
|
||||||
|
if registry_hash == image_id:
|
||||||
|
ok("{}'s version matches registry".format(container))
|
||||||
|
return
|
||||||
|
critical("{}'s version does not match registry".format(container))
|
||||||
|
|
||||||
|
|
||||||
|
def calculate_cpu_capacity_precentage(info, stats):
|
||||||
|
host_config = info['HostConfig']
|
||||||
|
|
||||||
|
if 'online_cpus' in stats['cpu_stats']:
|
||||||
|
num_cpus = stats['cpu_stats']['online_cpus']
|
||||||
|
else:
|
||||||
|
num_cpus = len(stats['cpu_stats']['cpu_usage']['percpu_usage'])
|
||||||
|
|
||||||
|
# Identify limit system being used
|
||||||
|
# --cpus
|
||||||
|
if 'NanoCpus' in host_config and host_config['NanoCpus'] != 0:
|
||||||
|
period = 1000000000
|
||||||
|
quota = host_config['NanoCpus']
|
||||||
|
# --cpu-quota
|
||||||
|
elif 'CpuQuota' in host_config and host_config['CpuQuota'] != 0:
|
||||||
|
period = 100000 if host_config['CpuPeriod'] == 0 else host_config['CpuPeriod']
|
||||||
|
quota = host_config['CpuQuota']
|
||||||
|
# unlimited
|
||||||
|
else:
|
||||||
|
period = 1
|
||||||
|
quota = num_cpus
|
||||||
|
|
||||||
|
if period * num_cpus < quota:
|
||||||
|
# This handles the case where the quota is actually bigger than amount available by all the cpus.
|
||||||
|
available_limit_ratio = 1
|
||||||
|
else:
|
||||||
|
available_limit_ratio = (period * num_cpus) / quota
|
||||||
|
|
||||||
|
cpu_delta = stats['cpu_stats']['cpu_usage']['total_usage'] - stats['precpu_stats']['cpu_usage']['total_usage']
|
||||||
|
system_delta = stats['cpu_stats']['system_cpu_usage'] - stats['precpu_stats']['system_cpu_usage']
|
||||||
|
usage = (cpu_delta / system_delta) * available_limit_ratio
|
||||||
|
usage = round(usage * 100, 0)
|
||||||
|
return usage
|
||||||
|
|
||||||
|
|
||||||
|
@multithread_execution()
|
||||||
|
@require_running('cpu')
|
||||||
|
def check_cpu(container, thresholds):
|
||||||
|
info = get_container_info(container)
|
||||||
|
|
||||||
|
stats = get_stats(container=container)
|
||||||
|
|
||||||
|
usage = calculate_cpu_capacity_precentage(info=info, stats=stats)
|
||||||
|
|
||||||
|
max = 100
|
||||||
|
thresholds.units = '%'
|
||||||
|
evaluate_numeric_thresholds(container=container, value=usage, thresholds=thresholds, name='cpu', short_name='cpu',
|
||||||
|
min=0, max=max)
|
||||||
|
|
||||||
|
|
||||||
|
def process_args(args):
|
||||||
|
parser = argparse.ArgumentParser(description='Check docker containers.')
|
||||||
|
|
||||||
|
# Connect to local socket or ip address
|
||||||
|
connection_group = parser.add_mutually_exclusive_group()
|
||||||
|
connection_group.add_argument('--connection',
|
||||||
|
dest='connection',
|
||||||
|
action='store',
|
||||||
|
default=DEFAULT_SOCKET,
|
||||||
|
type=str,
|
||||||
|
metavar='[/<path to>/docker.socket|<ip/host address>:<port>]',
|
||||||
|
help='Where to find docker daemon socket. (default: %(default)s)')
|
||||||
|
|
||||||
|
connection_group.add_argument('--secure-connection',
|
||||||
|
dest='secure_connection',
|
||||||
|
action='store',
|
||||||
|
type=str,
|
||||||
|
metavar='[<ip/host address>:<port>]',
|
||||||
|
help='Where to find TLS protected docker daemon socket.')
|
||||||
|
|
||||||
|
base_group = parser.add_mutually_exclusive_group()
|
||||||
|
base_group.add_argument('--binary_units',
|
||||||
|
dest='units_base',
|
||||||
|
action='store_const',
|
||||||
|
const=1024,
|
||||||
|
help='Use a base of 1024 when doing calculations of KB, MB, GB, & TB (This is default)')
|
||||||
|
|
||||||
|
base_group.add_argument('--decimal_units',
|
||||||
|
dest='units_base',
|
||||||
|
action='store_const',
|
||||||
|
const=1000,
|
||||||
|
help='Use a base of 1000 when doing calculations of KB, MB, GB, & TB')
|
||||||
|
parser.set_defaults(units_base=1024)
|
||||||
|
|
||||||
|
# Connection timeout
|
||||||
|
parser.add_argument('--timeout',
|
||||||
|
dest='timeout',
|
||||||
|
action='store',
|
||||||
|
type=float,
|
||||||
|
default=DEFAULT_TIMEOUT,
|
||||||
|
help='Connection timeout in seconds. (default: %(default)s)')
|
||||||
|
|
||||||
|
# Container name
|
||||||
|
parser.add_argument('--containers',
|
||||||
|
dest='containers',
|
||||||
|
action='store',
|
||||||
|
nargs='+',
|
||||||
|
type=str,
|
||||||
|
default=['all'],
|
||||||
|
help='One or more RegEx that match the names of the container(s) to check. If omitted all containers are checked. (default: %(default)s)')
|
||||||
|
|
||||||
|
# Container name
|
||||||
|
parser.add_argument('--present',
|
||||||
|
dest='present',
|
||||||
|
default=False,
|
||||||
|
action='store_true',
|
||||||
|
help='Modifies --containers so that each RegEx must match at least one container.')
|
||||||
|
|
||||||
|
# Threads
|
||||||
|
parser.add_argument('--threads',
|
||||||
|
dest='threads',
|
||||||
|
default=DEFAULT_PARALLELISM,
|
||||||
|
action='store',
|
||||||
|
type=int,
|
||||||
|
help='This + 1 is the maximum number of concurent threads/network connections. (default: %(default)s)')
|
||||||
|
|
||||||
|
# CPU
|
||||||
|
parser.add_argument('--cpu',
|
||||||
|
dest='cpu',
|
||||||
|
action='store',
|
||||||
|
type=str,
|
||||||
|
metavar='WARN:CRIT',
|
||||||
|
help='Check cpu usage percentage taking into account any limits.')
|
||||||
|
|
||||||
|
# Memory
|
||||||
|
parser.add_argument('--memory',
|
||||||
|
dest='memory',
|
||||||
|
action='store',
|
||||||
|
type=str,
|
||||||
|
metavar='WARN:CRIT:UNITS',
|
||||||
|
help='Check memory usage taking into account any limits. Valid values for units are %%,B,KB,MB,GB.')
|
||||||
|
|
||||||
|
# State
|
||||||
|
parser.add_argument('--status',
|
||||||
|
dest='status',
|
||||||
|
action='store',
|
||||||
|
type=str,
|
||||||
|
help='Desired container status (running, exited, etc).')
|
||||||
|
|
||||||
|
# Health
|
||||||
|
parser.add_argument('--health',
|
||||||
|
dest='health',
|
||||||
|
default=None,
|
||||||
|
action='store_true',
|
||||||
|
help="Check container's health check status")
|
||||||
|
|
||||||
|
# Age
|
||||||
|
parser.add_argument('--uptime',
|
||||||
|
dest='uptime',
|
||||||
|
action='store',
|
||||||
|
type=str,
|
||||||
|
metavar='WARN:CRIT',
|
||||||
|
help='Minimum container uptime in seconds. Use when infrequent crashes are tolerated.')
|
||||||
|
|
||||||
|
# Image Age
|
||||||
|
parser.add_argument('--image-age',
|
||||||
|
dest='image_age',
|
||||||
|
action='store',
|
||||||
|
type=str,
|
||||||
|
metavar='WARN:CRIT',
|
||||||
|
help='Maximum image age in days.')
|
||||||
|
|
||||||
|
# Version
|
||||||
|
parser.add_argument('--version',
|
||||||
|
dest='version',
|
||||||
|
default=None,
|
||||||
|
action='store_true',
|
||||||
|
help='Check if the running images are the same version as those in the registry. Useful for finding stale images. Does not support login.')
|
||||||
|
|
||||||
|
# Version
|
||||||
|
parser.add_argument('--insecure-registries',
|
||||||
|
dest='insecure_registries',
|
||||||
|
action='store',
|
||||||
|
nargs='+',
|
||||||
|
type=str,
|
||||||
|
default=[],
|
||||||
|
help='List of registries to connect to with http(no TLS). Useful when using "--version" with images from insecure registries.')
|
||||||
|
|
||||||
|
# Restart
|
||||||
|
parser.add_argument('--restarts',
|
||||||
|
dest='restarts',
|
||||||
|
action='store',
|
||||||
|
type=str,
|
||||||
|
metavar='WARN:CRIT',
|
||||||
|
help='Container restart thresholds.')
|
||||||
|
|
||||||
|
# no-ok
|
||||||
|
parser.add_argument('--no-ok',
|
||||||
|
dest='no_ok',
|
||||||
|
action='store_true',
|
||||||
|
help='Make output terse suppressing OK messages. If all checks are OK return a single OK.')
|
||||||
|
|
||||||
|
# no-performance
|
||||||
|
parser.add_argument('--no-performance',
|
||||||
|
dest='no_performance',
|
||||||
|
action='store_true',
|
||||||
|
help='Suppress performance data. Reduces output when performance data is not being used.')
|
||||||
|
|
||||||
|
parser.add_argument('-V', action='version', version='%(prog)s {}'.format(__version__))
|
||||||
|
|
||||||
|
if len(args) == 0:
|
||||||
|
parser.print_help()
|
||||||
|
|
||||||
|
parsed_args = parser.parse_args(args=args)
|
||||||
|
|
||||||
|
global timeout
|
||||||
|
timeout = parsed_args.timeout
|
||||||
|
|
||||||
|
global daemon
|
||||||
|
global connection_type
|
||||||
|
if parsed_args.secure_connection:
|
||||||
|
daemon = 'https://' + parsed_args.secure_connection
|
||||||
|
connection_type = 'https'
|
||||||
|
elif parsed_args.connection:
|
||||||
|
if parsed_args.connection[0] == '/':
|
||||||
|
daemon = 'socket://' + parsed_args.connection + ':'
|
||||||
|
connection_type = 'socket'
|
||||||
|
else:
|
||||||
|
daemon = 'http://' + parsed_args.connection
|
||||||
|
connection_type = 'http'
|
||||||
|
|
||||||
|
return parsed_args
|
||||||
|
|
||||||
|
|
||||||
|
def no_checks_present(parsed_args):
|
||||||
|
# Look for all functions whose name starts with 'check_'
|
||||||
|
checks = [key[6:] for key in globals().keys() if key.startswith('check_')]
|
||||||
|
# Act like --present is a check though it is not implemented like one
|
||||||
|
return all(getattr(parsed_args, check) is None for check in checks) and not parsed_args.present
|
||||||
|
|
||||||
|
|
||||||
|
def socketfile_permissions_failure(parsed_args):
|
||||||
|
if connection_type == 'socket':
|
||||||
|
return not (os.path.exists(parsed_args.connection)
|
||||||
|
and stat.S_ISSOCK(os.stat(parsed_args.connection).st_mode)
|
||||||
|
and os.access(parsed_args.connection, os.R_OK)
|
||||||
|
and os.access(parsed_args.connection, os.W_OK))
|
||||||
|
else:
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def print_results():
|
||||||
|
if no_ok:
|
||||||
|
# Remove all the "OK"s
|
||||||
|
filtered_messages = [message for message in messages if not message.startswith('OK: ')]
|
||||||
|
if len(filtered_messages) == 0:
|
||||||
|
messages_concat = 'OK'
|
||||||
|
else:
|
||||||
|
messages_concat = '; '.join(filtered_messages)
|
||||||
|
|
||||||
|
else:
|
||||||
|
messages_concat = '; '.join(messages)
|
||||||
|
|
||||||
|
if no_performance or len(performance_data) == 0:
|
||||||
|
print(messages_concat)
|
||||||
|
else:
|
||||||
|
perfdata_concat = ' '.join(performance_data)
|
||||||
|
print(messages_concat + '|' + perfdata_concat)
|
||||||
|
|
||||||
|
|
||||||
|
def perform_checks(raw_args):
|
||||||
|
args = process_args(raw_args)
|
||||||
|
|
||||||
|
global parallel_executor
|
||||||
|
parallel_executor = futures.ThreadPoolExecutor(max_workers=args.threads)
|
||||||
|
global serial_executor
|
||||||
|
serial_executor = futures.ThreadPoolExecutor(max_workers=1)
|
||||||
|
|
||||||
|
global unit_adjustments
|
||||||
|
unit_adjustments = {key: args.units_base ** value for key, value in UNIT_ADJUSTMENTS_TEMPLATE.items()}
|
||||||
|
|
||||||
|
global no_ok
|
||||||
|
no_ok = args.no_ok
|
||||||
|
|
||||||
|
global no_performance
|
||||||
|
no_performance = args.no_ok
|
||||||
|
|
||||||
|
if socketfile_permissions_failure(args):
|
||||||
|
unknown("Cannot access docker socket file. User ID={}, socket file={}".format(os.getuid(), args.connection))
|
||||||
|
return
|
||||||
|
|
||||||
|
if args.containers == ["all"] and args.present:
|
||||||
|
unknown("You can not use --present without --containers")
|
||||||
|
return
|
||||||
|
|
||||||
|
if no_checks_present(args):
|
||||||
|
unknown("No checks specified.")
|
||||||
|
return
|
||||||
|
|
||||||
|
# Here is where all the work happens
|
||||||
|
#############################################################################################
|
||||||
|
containers = get_containers(args.containers, args.present)
|
||||||
|
|
||||||
|
if len(containers) == 0 and not args.present:
|
||||||
|
unknown("No containers names found matching criteria")
|
||||||
|
return
|
||||||
|
|
||||||
|
for container in containers:
|
||||||
|
|
||||||
|
# Check status
|
||||||
|
if args.status:
|
||||||
|
check_status(container, args.status)
|
||||||
|
|
||||||
|
# Check version
|
||||||
|
if args.version:
|
||||||
|
check_version(container, args.insecure_registries)
|
||||||
|
|
||||||
|
# below are checks that require a 'running' status
|
||||||
|
|
||||||
|
# Check status
|
||||||
|
if args.health:
|
||||||
|
check_health(container)
|
||||||
|
|
||||||
|
# Check cpu usage
|
||||||
|
if args.cpu:
|
||||||
|
check_cpu(container, parse_thresholds(args.cpu, units_required=False))
|
||||||
|
|
||||||
|
# Check memory usage
|
||||||
|
if args.memory:
|
||||||
|
check_memory(container, parse_thresholds(args.memory, units_required=False))
|
||||||
|
|
||||||
|
# Check uptime
|
||||||
|
if args.uptime:
|
||||||
|
check_uptime(container, parse_thresholds(args.uptime, include_units=False))
|
||||||
|
|
||||||
|
# Check image age
|
||||||
|
if args.image_age:
|
||||||
|
check_image_age(container, parse_thresholds(args.image_age, include_units=False))
|
||||||
|
|
||||||
|
# Check restart count
|
||||||
|
if args.restarts:
|
||||||
|
check_restarts(container, parse_thresholds(args.restarts, include_units=False))
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
try:
|
||||||
|
perform_checks(argv[1:])
|
||||||
|
|
||||||
|
# get results to let exceptions in threads bubble out
|
||||||
|
[x.result() for x in futures.as_completed(threads)]
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
traceback.print_exc()
|
||||||
|
unknown("Exception raised during check': {}".format(repr(e)))
|
||||||
|
print_results()
|
||||||
|
exit(rc)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
main()
|
||||||
51
check_executables_in_tmp
Executable file
51
check_executables_in_tmp
Executable file
@@ -0,0 +1,51 @@
|
|||||||
|
#! /bin/bash
|
||||||
|
#
|
||||||
|
# check_executables_in_tmpdir
|
||||||
|
# Check a directory for executables and become angry if we find them
|
||||||
|
#
|
||||||
|
# Copyright (C) 2022 Jacob Babor <jacob@babor.tech>
|
||||||
|
#
|
||||||
|
# Distributed under terms of the MIT license.
|
||||||
|
#
|
||||||
|
|
||||||
|
set -e
|
||||||
|
|
||||||
|
tmpdir="/tmp"
|
||||||
|
minfileage="3600"
|
||||||
|
|
||||||
|
#
|
||||||
|
# Compile a list of executables found in /tmp
|
||||||
|
#
|
||||||
|
# Note that we deliberately use the -perm flag instead of the -executable flag
|
||||||
|
#
|
||||||
|
# This is by design, as -executable will fail on systems with noexec on the
|
||||||
|
# filesystem we're checking. This runs counter to our goal here, which is just
|
||||||
|
# to see if some skid has dumped a cryptominer on the machine.
|
||||||
|
#
|
||||||
|
executables=""
|
||||||
|
while read line; do
|
||||||
|
# Ignore recently-created files
|
||||||
|
# This is so things like Ansible plays don't trigger us
|
||||||
|
filetimestamp="$(stat -c %Y -- "$line")"
|
||||||
|
now="$(date +%s)"
|
||||||
|
age="$(( now - filetimestamp ))"
|
||||||
|
if (( age <= minfileage )); then
|
||||||
|
continue
|
||||||
|
fi
|
||||||
|
# Add it to the list
|
||||||
|
if [ -z "$executables" ]; then
|
||||||
|
executables="$line"
|
||||||
|
else
|
||||||
|
executables="$executables, $line"
|
||||||
|
fi
|
||||||
|
done < <(find "$tmpdir" -type f -perm /u=x,g=x,o=x 2>/dev/null || true )
|
||||||
|
|
||||||
|
|
||||||
|
# If we found any, become angry
|
||||||
|
if [ -n "$executables" ]; then
|
||||||
|
echo "CRITICAL: Found executables in $tmpdir: $executables"
|
||||||
|
exit 2
|
||||||
|
else
|
||||||
|
echo "OK: No executables in $tmpdir older than ${minfileage}s"
|
||||||
|
exit 0
|
||||||
|
fi
|
||||||
170
check_file_age
Executable file
170
check_file_age
Executable file
@@ -0,0 +1,170 @@
|
|||||||
|
#! /bin/bash
|
||||||
|
#
|
||||||
|
# check_file_age
|
||||||
|
# Alerts if a file's age is beyond a specific threshold
|
||||||
|
# Copyright (C) 2021 Vintage Salt <rehashedsalt@cock.li>
|
||||||
|
#
|
||||||
|
# Distributed under terms of the MIT license.
|
||||||
|
#
|
||||||
|
set -e
|
||||||
|
|
||||||
|
# Read-only set-once variables
|
||||||
|
declare -r _name="$(basename -- "$0")"
|
||||||
|
# Options
|
||||||
|
declare -i _optcritthresh
|
||||||
|
declare -i _optwarnthresh
|
||||||
|
declare -i _opthelp
|
||||||
|
declare -i _optverbose
|
||||||
|
# Working variables
|
||||||
|
declare -a _args
|
||||||
|
declare _return
|
||||||
|
|
||||||
|
# Helper functions
|
||||||
|
log() {
|
||||||
|
# Print a line to the terminal if _optverbose is greater than $2
|
||||||
|
# $2 defaults to 0
|
||||||
|
# loglevel 0: Daily-use messages
|
||||||
|
# loglevel 1: Detailed but not quite debugging
|
||||||
|
# loglevel 2: Definitely debugging
|
||||||
|
[ -z "$1" ] && return 1
|
||||||
|
if (( _optverbose >= ${2:-0} )); then
|
||||||
|
printf "%s\\n" "$1"
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
warn() {
|
||||||
|
# Print a yellow line to the terminal, respecting _optverbose
|
||||||
|
[ -z "$1" ] && return 1
|
||||||
|
if (( _optverbose >= ${2:-0} )); then
|
||||||
|
if [ -t 1 ]; then
|
||||||
|
printf "\\e[33m%s\\e[0m\\n" "$1"
|
||||||
|
else
|
||||||
|
printf "WARN: %s\\n" "$1"
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
error() {
|
||||||
|
# Print a red line to the terminal, exit if $2 is specified
|
||||||
|
[ -z "$1" ] && return 1
|
||||||
|
if [ -t 2 ]; then
|
||||||
|
printf "\\e[31m%s\\e[0m\\n" "$1" 1>&2
|
||||||
|
else
|
||||||
|
printf "ERROR: %s\\n" "$1" 1>&2
|
||||||
|
fi
|
||||||
|
[ -z "$2" ] && return
|
||||||
|
exit "${2:-1}"
|
||||||
|
}
|
||||||
|
has() {
|
||||||
|
# Parse out all arguments and try to find them in path
|
||||||
|
# If an argument cannot be found, set _return and fail
|
||||||
|
for prog in "$@"; do
|
||||||
|
if ! command -v "$prog" > /dev/null 2>&1; then
|
||||||
|
_return="$prog"
|
||||||
|
return 1
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
|
||||||
|
# Core program functions
|
||||||
|
checkage() {
|
||||||
|
# Check a file's age against the values of _optwarnthres and _optcritthresh
|
||||||
|
file="${_args[0]}"
|
||||||
|
log "Checking file $file" 1
|
||||||
|
# Get some times
|
||||||
|
filetimestamp="$(stat -c %Y -- "$file")"
|
||||||
|
now="$(date +%s)"
|
||||||
|
# Get our age
|
||||||
|
age="$(( now - filetimestamp ))"
|
||||||
|
log "File is $age seconds old" 1
|
||||||
|
# Throw out warns and crits as necessary
|
||||||
|
if (( age >= _optcritthresh )); then
|
||||||
|
echo "CRITICAL: $file is $age seconds old (max $_optcritthresh)"
|
||||||
|
exit 2
|
||||||
|
elif (( age >= _optwarnthresh)); then
|
||||||
|
echo "WARNING: $file is $age seconds old (max $_optwarnthresh)"
|
||||||
|
exit 1
|
||||||
|
else
|
||||||
|
echo "OK: $file"
|
||||||
|
exit 0
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
printhelp() {
|
||||||
|
cat << EOF
|
||||||
|
Usage: $_name <FILE> [OPTION]...
|
||||||
|
A Nagios monitoring plugin for checking a file's age
|
||||||
|
|
||||||
|
Arguments:
|
||||||
|
FILE The file to monitor
|
||||||
|
|
||||||
|
Flags:
|
||||||
|
-c The maximum age of the file in seconds before a CRITICAL status is
|
||||||
|
returned
|
||||||
|
-h Print this help text
|
||||||
|
-v Print more status messages. Stacks
|
||||||
|
-w The maximum age of the file in seconds before a WARNING status is
|
||||||
|
returned
|
||||||
|
|
||||||
|
Copyright (c) 2021 rehashedsalt@cock.li
|
||||||
|
Licensed under the MIT license
|
||||||
|
EOF
|
||||||
|
}
|
||||||
|
|
||||||
|
# Main
|
||||||
|
main() {
|
||||||
|
# Parse out arguments
|
||||||
|
while [ -n "$1" ]; do
|
||||||
|
# Parse out flags
|
||||||
|
while getopts ":c:hvw:" opt; do
|
||||||
|
case $opt in
|
||||||
|
c)
|
||||||
|
_optcritthresh="$OPTARG"
|
||||||
|
;;
|
||||||
|
h)
|
||||||
|
_opthelp=1
|
||||||
|
;;
|
||||||
|
v)
|
||||||
|
_optverbose+=1
|
||||||
|
;;
|
||||||
|
w)
|
||||||
|
_optwarnthresh="$OPTARG"
|
||||||
|
;;
|
||||||
|
:)
|
||||||
|
error "Option requires argument: -$OPTARG" 2
|
||||||
|
;;
|
||||||
|
*)
|
||||||
|
error "Invalid option: -$OPTARG" 2
|
||||||
|
;;
|
||||||
|
esac
|
||||||
|
done
|
||||||
|
# Store arguments
|
||||||
|
shift $((OPTIND - 1))
|
||||||
|
if [ -n "$1" ]; then
|
||||||
|
_args+=("$1")
|
||||||
|
shift
|
||||||
|
fi
|
||||||
|
unset OPTIND
|
||||||
|
done
|
||||||
|
# Early hook for help
|
||||||
|
[ -n "$_opthelp" ] && printhelp && exit 0
|
||||||
|
# Validate critical options
|
||||||
|
if [ -z "${_args[0]}" ]; then
|
||||||
|
error "Must specify a file" 50
|
||||||
|
fi
|
||||||
|
if ! (( _optcritthresh > 0 )) 2>/dev/null; then
|
||||||
|
error "Critical threshold must be an integer greater than 0" 50
|
||||||
|
fi
|
||||||
|
if ! (( _optwarnthresh > 0 )) 2>/dev/null; then
|
||||||
|
error "Warning threshold must be an integer greater than 0" 50
|
||||||
|
fi
|
||||||
|
# Validate core program dependencies
|
||||||
|
log "Validating dependencies" 2
|
||||||
|
if ! has basename; then
|
||||||
|
error "Failed to find program: $_return" 50
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Do the do
|
||||||
|
checkage
|
||||||
|
}
|
||||||
|
|
||||||
|
main "$@"
|
||||||
|
|
||||||
409
check_mem_justintime
Executable file
409
check_mem_justintime
Executable file
@@ -0,0 +1,409 @@
|
|||||||
|
#!/usr/bin/perl -w
|
||||||
|
|
||||||
|
# Heavily based on the script from:
|
||||||
|
# check_mem.pl Copyright (C) 2000 Dan Larsson <dl@tyfon.net>
|
||||||
|
# heavily modified by
|
||||||
|
# Justin Ellison <justin@techadvise.com>
|
||||||
|
#
|
||||||
|
# The MIT License (MIT)
|
||||||
|
# Copyright (c) 2011 justin@techadvise.com
|
||||||
|
|
||||||
|
# Permission is hereby granted, free of charge, to any person obtaining a copy of this
|
||||||
|
# software and associated documentation files (the "Software"), to deal in the Software
|
||||||
|
# without restriction, including without limitation the rights to use, copy, modify,
|
||||||
|
# merge, publish, distribute, sublicense, and/or sell copies of the Software, and to
|
||||||
|
# permit persons to whom the Software is furnished to do so, subject to the following conditions:
|
||||||
|
|
||||||
|
# The above copyright notice and this permission notice shall be included in all copies
|
||||||
|
# or substantial portions of the Software.
|
||||||
|
|
||||||
|
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
|
||||||
|
# INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
|
||||||
|
# PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE
|
||||||
|
# FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT
|
||||||
|
# OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||||
|
# OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
|
||||||
|
# Tell Perl what we need to use
|
||||||
|
use strict;
|
||||||
|
use Getopt::Std;
|
||||||
|
|
||||||
|
#TODO - Convert to Nagios::Plugin
|
||||||
|
#TODO - Use an alarm
|
||||||
|
|
||||||
|
# Predefined exit codes for Nagios
|
||||||
|
use vars qw($opt_c $opt_f $opt_u $opt_w $opt_C $opt_v $opt_h %exit_codes);
|
||||||
|
%exit_codes = ('UNKNOWN' , 3,
|
||||||
|
'OK' , 0,
|
||||||
|
'WARNING' , 1,
|
||||||
|
'CRITICAL', 2,
|
||||||
|
);
|
||||||
|
|
||||||
|
# Get our variables, do our checking:
|
||||||
|
init();
|
||||||
|
|
||||||
|
# Get the numbers:
|
||||||
|
my ($free_memory_kb,$used_memory_kb,$caches_kb,$hugepages_kb) = get_memory_info();
|
||||||
|
print "$free_memory_kb Free\n$used_memory_kb Used\n$caches_kb Cache\n" if ($opt_v);
|
||||||
|
print "$hugepages_kb Hugepages\n" if ($opt_v and $opt_h);
|
||||||
|
|
||||||
|
if ($opt_C) { #Do we count caches as free?
|
||||||
|
$used_memory_kb -= $caches_kb;
|
||||||
|
$free_memory_kb += $caches_kb;
|
||||||
|
}
|
||||||
|
|
||||||
|
if ($opt_h) {
|
||||||
|
$used_memory_kb -= $hugepages_kb;
|
||||||
|
}
|
||||||
|
|
||||||
|
print "$used_memory_kb Used (after Hugepages)\n" if ($opt_v);
|
||||||
|
|
||||||
|
# Round to the nearest KB
|
||||||
|
$free_memory_kb = sprintf('%.0f',$free_memory_kb);
|
||||||
|
$used_memory_kb = sprintf('%.0f',$used_memory_kb);
|
||||||
|
$caches_kb = sprintf('%.0f',$caches_kb);
|
||||||
|
|
||||||
|
# Tell Nagios what we came up with
|
||||||
|
tell_nagios($used_memory_kb,$free_memory_kb,$caches_kb,$hugepages_kb);
|
||||||
|
|
||||||
|
|
||||||
|
sub tell_nagios {
|
||||||
|
my ($used,$free,$caches,$hugepages) = @_;
|
||||||
|
|
||||||
|
# Calculate Total Memory
|
||||||
|
my $total = $free + $used;
|
||||||
|
print "$total Total\n" if ($opt_v);
|
||||||
|
|
||||||
|
my $perf_warn;
|
||||||
|
my $perf_crit;
|
||||||
|
if ( $opt_u ) {
|
||||||
|
$perf_warn = int(${total} * $opt_w / 100);
|
||||||
|
$perf_crit = int(${total} * $opt_c / 100);
|
||||||
|
} else {
|
||||||
|
$perf_warn = int(${total} * ( 100 - $opt_w ) / 100);
|
||||||
|
$perf_crit = int(${total} * ( 100 - $opt_c ) / 100);
|
||||||
|
}
|
||||||
|
|
||||||
|
my $perfdata = "|TOTAL=${total}KB;;;; USED=${used}KB;${perf_warn};${perf_crit};; FREE=${free}KB;;;; CACHES=${caches}KB;;;;";
|
||||||
|
$perfdata .= " HUGEPAGES=${hugepages}KB;;;;" if ($opt_h);
|
||||||
|
|
||||||
|
if ($opt_f) {
|
||||||
|
my $percent = sprintf "%.1f", ($free / $total * 100);
|
||||||
|
if ($percent <= $opt_c) {
|
||||||
|
finish("CRITICAL - $percent% ($free kB) free!$perfdata",$exit_codes{'CRITICAL'});
|
||||||
|
}
|
||||||
|
elsif ($percent <= $opt_w) {
|
||||||
|
finish("WARNING - $percent% ($free kB) free!$perfdata",$exit_codes{'WARNING'});
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
finish("OK - $percent% ($free kB) free.$perfdata",$exit_codes{'OK'});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
elsif ($opt_u) {
|
||||||
|
my $percent = sprintf "%.1f", ($used / $total * 100);
|
||||||
|
if ($percent >= $opt_c) {
|
||||||
|
finish("CRITICAL - $percent% ($used kB) used!$perfdata",$exit_codes{'CRITICAL'});
|
||||||
|
}
|
||||||
|
elsif ($percent >= $opt_w) {
|
||||||
|
finish("WARNING - $percent% ($used kB) used!$perfdata",$exit_codes{'WARNING'});
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
finish("OK - $percent% ($used kB) used.$perfdata",$exit_codes{'OK'});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Show usage
|
||||||
|
sub usage() {
|
||||||
|
print "\ncheck_mem.pl v1.0 - Nagios Plugin\n\n";
|
||||||
|
print "usage:\n";
|
||||||
|
print " check_mem.pl -<f|u> -w <warnlevel> -c <critlevel>\n\n";
|
||||||
|
print "options:\n";
|
||||||
|
print " -f Check FREE memory\n";
|
||||||
|
print " -u Check USED memory\n";
|
||||||
|
print " -C Count OS caches as FREE memory\n";
|
||||||
|
print " -h Remove hugepages from the total memory count\n";
|
||||||
|
print " -w PERCENT Percent free/used when to warn\n";
|
||||||
|
print " -c PERCENT Percent free/used when critical\n";
|
||||||
|
print "\nCopyright (C) 2000 Dan Larsson <dl\@tyfon.net>\n";
|
||||||
|
print "check_mem.pl comes with absolutely NO WARRANTY either implied or explicit\n";
|
||||||
|
print "This program is licensed under the terms of the\n";
|
||||||
|
print "MIT License (check source code for details)\n";
|
||||||
|
exit $exit_codes{'UNKNOWN'};
|
||||||
|
}
|
||||||
|
|
||||||
|
sub get_memory_info {
|
||||||
|
my $used_memory_kb = 0;
|
||||||
|
my $free_memory_kb = 0;
|
||||||
|
my $total_memory_kb = 0;
|
||||||
|
my $caches_kb = 0;
|
||||||
|
my $hugepages_nr = 0;
|
||||||
|
my $hugepages_size = 0;
|
||||||
|
my $hugepages_kb = 0;
|
||||||
|
|
||||||
|
my $uname;
|
||||||
|
if ( -e '/usr/bin/uname') {
|
||||||
|
$uname = `/usr/bin/uname -a`;
|
||||||
|
}
|
||||||
|
elsif ( -e '/bin/uname') {
|
||||||
|
$uname = `/bin/uname -a`;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
die "Unable to find uname in /usr/bin or /bin!\n";
|
||||||
|
}
|
||||||
|
print "uname returns $uname" if ($opt_v);
|
||||||
|
if ( $uname =~ /Linux/ ) {
|
||||||
|
my @meminfo = `/bin/cat /proc/meminfo`;
|
||||||
|
foreach (@meminfo) {
|
||||||
|
chomp;
|
||||||
|
if (/^Mem(Total|Free):\s+(\d+) kB/) {
|
||||||
|
my $counter_name = $1;
|
||||||
|
if ($counter_name eq 'Free') {
|
||||||
|
$free_memory_kb = $2;
|
||||||
|
}
|
||||||
|
elsif ($counter_name eq 'Total') {
|
||||||
|
$total_memory_kb = $2;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
elsif (/^(Buffers|Cached|SReclaimable):\s+(\d+) kB/) {
|
||||||
|
$caches_kb += $2;
|
||||||
|
}
|
||||||
|
elsif (/^Shmem:\s+(\d+) kB/) {
|
||||||
|
$caches_kb -= $1;
|
||||||
|
}
|
||||||
|
# These variables will most likely be overwritten once we look into
|
||||||
|
# /sys/kernel/mm/hugepages, unless we are running on linux <2.6.27
|
||||||
|
# and have to rely on them
|
||||||
|
elsif (/^HugePages_Total:\s+(\d+)/) {
|
||||||
|
$hugepages_nr = $1;
|
||||||
|
}
|
||||||
|
elsif (/^Hugepagesize:\s+(\d+) kB/) {
|
||||||
|
$hugepages_size = $1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
$hugepages_kb = $hugepages_nr * $hugepages_size;
|
||||||
|
$used_memory_kb = $total_memory_kb - $free_memory_kb;
|
||||||
|
|
||||||
|
# Read hugepages info from the newer sysfs interface if available
|
||||||
|
my $hugepages_sysfs_dir = '/sys/kernel/mm/hugepages';
|
||||||
|
if ( -d $hugepages_sysfs_dir ) {
|
||||||
|
# Reset what we read from /proc/meminfo
|
||||||
|
$hugepages_kb = 0;
|
||||||
|
opendir(my $dh, $hugepages_sysfs_dir)
|
||||||
|
|| die "Can't open $hugepages_sysfs_dir: $!";
|
||||||
|
while (my $entry = readdir $dh) {
|
||||||
|
if ($entry =~ /^hugepages-(\d+)kB/) {
|
||||||
|
$hugepages_size = $1;
|
||||||
|
my $hugepages_nr_file = "$hugepages_sysfs_dir/$entry/nr_hugepages";
|
||||||
|
open(my $fh, '<', $hugepages_nr_file)
|
||||||
|
|| die "Can't open $hugepages_nr_file for reading: $!";
|
||||||
|
$hugepages_nr = <$fh>;
|
||||||
|
close($fh);
|
||||||
|
$hugepages_kb += $hugepages_nr * $hugepages_size;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
closedir($dh);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
elsif ( $uname =~ /HP-UX/ ) {
|
||||||
|
# HP-UX, thanks to Christoph Fürstaller
|
||||||
|
my @meminfo = `/usr/bin/sudo /usr/local/bin/kmeminfo`;
|
||||||
|
foreach (@meminfo) {
|
||||||
|
chomp;
|
||||||
|
if (/^Physical memory\s\s+=\s+(\d+)\s+(\d+.\d)g/) {
|
||||||
|
$total_memory_kb = ($2 * 1024 * 1024);
|
||||||
|
}
|
||||||
|
elsif (/^Free memory\s\s+=\s+(\d+)\s+(\d+.\d)g/) {
|
||||||
|
$free_memory_kb = ($2 * 1024 * 1024);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
$used_memory_kb = $total_memory_kb - $free_memory_kb;
|
||||||
|
}
|
||||||
|
elsif ( $uname =~ /FreeBSD/ ) {
|
||||||
|
# The FreeBSD case. 2013-03-19 www.claudiokuenzler.com
|
||||||
|
# free mem = Inactive*Page Size + Cache*Page Size + Free*Page Size
|
||||||
|
my $pagesize = `sysctl vm.stats.vm.v_page_size`;
|
||||||
|
$pagesize =~ s/[^0-9]//g;
|
||||||
|
my $mem_inactive = 0;
|
||||||
|
my $mem_cache = 0;
|
||||||
|
my $mem_free = 0;
|
||||||
|
my $mem_total = 0;
|
||||||
|
my $free_memory = 0;
|
||||||
|
my @meminfo = `/sbin/sysctl vm.stats.vm`;
|
||||||
|
foreach (@meminfo) {
|
||||||
|
chomp;
|
||||||
|
if (/^vm.stats.vm.v_inactive_count:\s+(\d+)/) {
|
||||||
|
$mem_inactive = ($1 * $pagesize);
|
||||||
|
}
|
||||||
|
elsif (/^vm.stats.vm.v_cache_count:\s+(\d+)/) {
|
||||||
|
$mem_cache = ($1 * $pagesize);
|
||||||
|
}
|
||||||
|
elsif (/^vm.stats.vm.v_free_count:\s+(\d+)/) {
|
||||||
|
$mem_free = ($1 * $pagesize);
|
||||||
|
}
|
||||||
|
elsif (/^vm.stats.vm.v_page_count:\s+(\d+)/) {
|
||||||
|
$mem_total = ($1 * $pagesize);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
$free_memory = $mem_inactive + $mem_cache + $mem_free;
|
||||||
|
$free_memory_kb = ( $free_memory / 1024);
|
||||||
|
$total_memory_kb = ( $mem_total / 1024);
|
||||||
|
$used_memory_kb = $total_memory_kb - $free_memory_kb;
|
||||||
|
$caches_kb = ($mem_cache / 1024);
|
||||||
|
}
|
||||||
|
elsif ( $uname =~ /joyent/ ) {
|
||||||
|
# The SmartOS case. 2014-01-10 www.claudiokuenzler.com
|
||||||
|
# free mem = pagesfree * pagesize
|
||||||
|
my $pagesize = `pagesize`;
|
||||||
|
my $phys_pages = `kstat -p unix:0:system_pages:pagestotal | awk '{print \$NF}'`;
|
||||||
|
my $free_pages = `kstat -p unix:0:system_pages:pagesfree | awk '{print \$NF}'`;
|
||||||
|
my $arc_size = `kstat -p zfs:0:arcstats:size | awk '{print \$NF}'`;
|
||||||
|
my $arc_size_kb = $arc_size / 1024;
|
||||||
|
|
||||||
|
print "Pagesize is $pagesize" if ($opt_v);
|
||||||
|
print "Total pages is $phys_pages" if ($opt_v);
|
||||||
|
print "Free pages is $free_pages" if ($opt_v);
|
||||||
|
print "Arc size is $arc_size" if ($opt_v);
|
||||||
|
|
||||||
|
$caches_kb += $arc_size_kb;
|
||||||
|
|
||||||
|
$total_memory_kb = $phys_pages * $pagesize / 1024;
|
||||||
|
$free_memory_kb = $free_pages * $pagesize / 1024;
|
||||||
|
$used_memory_kb = $total_memory_kb - $free_memory_kb;
|
||||||
|
}
|
||||||
|
elsif ( $uname =~ /SunOS/ ) {
|
||||||
|
eval "use Sun::Solaris::Kstat";
|
||||||
|
if ($@) { #Kstat not available
|
||||||
|
if ($opt_C) {
|
||||||
|
print "You can't report on Solaris caches without Sun::Solaris::Kstat available!\n";
|
||||||
|
exit $exit_codes{UNKNOWN};
|
||||||
|
}
|
||||||
|
my @vmstat = `/usr/bin/vmstat 1 2`;
|
||||||
|
my $line;
|
||||||
|
foreach (@vmstat) {
|
||||||
|
chomp;
|
||||||
|
$line = $_;
|
||||||
|
}
|
||||||
|
$free_memory_kb = (split(/ /,$line))[5] / 1024;
|
||||||
|
my @prtconf = `/usr/sbin/prtconf`;
|
||||||
|
foreach (@prtconf) {
|
||||||
|
if (/^Memory size: (\d+) Megabytes/) {
|
||||||
|
$total_memory_kb = $1 * 1024;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
$used_memory_kb = $total_memory_kb - $free_memory_kb;
|
||||||
|
|
||||||
|
}
|
||||||
|
else { # We have kstat
|
||||||
|
my $kstat = Sun::Solaris::Kstat->new();
|
||||||
|
my $phys_pages = ${kstat}->{unix}->{0}->{system_pages}->{physmem};
|
||||||
|
my $free_pages = ${kstat}->{unix}->{0}->{system_pages}->{freemem};
|
||||||
|
# We probably should account for UFS caching here, but it's unclear
|
||||||
|
# to me how to determine UFS's cache size. There's inode_cache,
|
||||||
|
# and maybe the physmem variable in the system_pages module??
|
||||||
|
# In the real world, it looks to be so small as not to really matter,
|
||||||
|
# so we don't grab it. If someone can give me code that does this,
|
||||||
|
# I'd be glad to put it in.
|
||||||
|
my $arc_size = (exists ${kstat}->{zfs} && ${kstat}->{zfs}->{0}->{arcstats}->{size}) ?
|
||||||
|
${kstat}->{zfs}->{0}->{arcstats}->{size} / 1024
|
||||||
|
: 0;
|
||||||
|
$caches_kb += $arc_size;
|
||||||
|
my $pagesize = `pagesize`;
|
||||||
|
|
||||||
|
$total_memory_kb = $phys_pages * $pagesize / 1024;
|
||||||
|
$free_memory_kb = $free_pages * $pagesize / 1024;
|
||||||
|
$used_memory_kb = $total_memory_kb - $free_memory_kb;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
elsif ( $uname =~ /Darwin/ ) {
|
||||||
|
$total_memory_kb = (split(/ /,`/usr/sbin/sysctl hw.memsize`))[1]/1024;
|
||||||
|
my $pagesize = (split(/ /,`/usr/sbin/sysctl hw.pagesize`))[1];
|
||||||
|
$caches_kb = 0;
|
||||||
|
my @vm_stat = `/usr/bin/vm_stat`;
|
||||||
|
foreach (@vm_stat) {
|
||||||
|
chomp;
|
||||||
|
if (/^(Pages free):\s+(\d+)\.$/) {
|
||||||
|
$free_memory_kb = $2*$pagesize/1024;
|
||||||
|
}
|
||||||
|
# 'caching' concept works different on MACH
|
||||||
|
# this should be a reasonable approximation
|
||||||
|
elsif (/^Pages (inactive|purgable):\s+(\d+).$/) {
|
||||||
|
$caches_kb += $2*$pagesize/1024;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
$used_memory_kb = $total_memory_kb - $free_memory_kb;
|
||||||
|
}
|
||||||
|
elsif ( $uname =~ /AIX/ ) {
|
||||||
|
my @meminfo = `/usr/bin/vmstat -vh`;
|
||||||
|
foreach (@meminfo) {
|
||||||
|
chomp;
|
||||||
|
if (/^\s*([0-9.]+)\s+(.*)/) {
|
||||||
|
my $counter_name = $2;
|
||||||
|
if ($counter_name eq 'memory pages') {
|
||||||
|
$total_memory_kb = $1*4;
|
||||||
|
}
|
||||||
|
if ($counter_name eq 'free pages') {
|
||||||
|
$free_memory_kb = $1*4;
|
||||||
|
}
|
||||||
|
if ($counter_name eq 'file pages') {
|
||||||
|
$caches_kb = $1*4;
|
||||||
|
}
|
||||||
|
if ($counter_name eq 'Number of 4k page frames loaned') {
|
||||||
|
$free_memory_kb += $1*4;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
$used_memory_kb = $total_memory_kb - $free_memory_kb;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
if ($opt_C) {
|
||||||
|
print "You can't report on $uname caches!\n";
|
||||||
|
exit $exit_codes{UNKNOWN};
|
||||||
|
}
|
||||||
|
my $command_line = `vmstat | tail -1 | awk '{print \$4,\$5}'`;
|
||||||
|
chomp $command_line;
|
||||||
|
my @memlist = split(/ /, $command_line);
|
||||||
|
|
||||||
|
# Define the calculating scalars
|
||||||
|
$used_memory_kb = $memlist[0]/1024;
|
||||||
|
$free_memory_kb = $memlist[1]/1024;
|
||||||
|
$total_memory_kb = $used_memory_kb + $free_memory_kb;
|
||||||
|
}
|
||||||
|
return ($free_memory_kb,$used_memory_kb,$caches_kb,$hugepages_kb);
|
||||||
|
}
|
||||||
|
|
||||||
|
sub init {
|
||||||
|
# Get the options
|
||||||
|
if ($#ARGV le 0) {
|
||||||
|
&usage;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
getopts('c:fuChvw:');
|
||||||
|
}
|
||||||
|
|
||||||
|
# Shortcircuit the switches
|
||||||
|
if (!$opt_w or $opt_w == 0 or !$opt_c or $opt_c == 0) {
|
||||||
|
print "*** You must define WARN and CRITICAL levels!\n";
|
||||||
|
&usage;
|
||||||
|
}
|
||||||
|
elsif (!$opt_f and !$opt_u) {
|
||||||
|
print "*** You must select to monitor either USED or FREE memory!\n";
|
||||||
|
&usage;
|
||||||
|
}
|
||||||
|
|
||||||
|
# Check if levels are sane
|
||||||
|
if ($opt_w <= $opt_c and $opt_f) {
|
||||||
|
print "*** WARN level must not be less than CRITICAL when checking FREE memory!\n";
|
||||||
|
&usage;
|
||||||
|
}
|
||||||
|
elsif ($opt_w >= $opt_c and $opt_u) {
|
||||||
|
print "*** WARN level must not be greater than CRITICAL when checking USED memory!\n";
|
||||||
|
&usage;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
sub finish {
|
||||||
|
my ($msg,$state) = @_;
|
||||||
|
print "$msg\n";
|
||||||
|
exit $state;
|
||||||
|
}
|
||||||
150
check_minecraft
Executable file
150
check_minecraft
Executable file
@@ -0,0 +1,150 @@
|
|||||||
|
#!/usr/bin/env python2
|
||||||
|
# coding=utf8
|
||||||
|
|
||||||
|
from datetime import datetime, timedelta
|
||||||
|
import sys, string, socket, time, argparse
|
||||||
|
|
||||||
|
# Exit statuses recognized by Nagios.
|
||||||
|
STATE_OK = 0
|
||||||
|
STATE_WARNING = 1
|
||||||
|
STATE_CRITICAL = 2
|
||||||
|
STATE_UNKNOWN = 3
|
||||||
|
|
||||||
|
# Output formatting string.
|
||||||
|
OUTPUT_OK = "MINECRAFT OK: {0} - {1} bytes in {2:.3} second response time|time={2}s;{3};{4};0.0;{5}"
|
||||||
|
OUTPUT_WARNING = "MINECRAFT WARNING: {0} - {1} bytes in {2:.3} second response time|time={2}s;{3};{4};0.0;{5}"
|
||||||
|
OUTPUT_CRITICAL = "MINECRAFT CRITICAL: {0} - {1} bytes in {2:.3} second response time|time={2}s;{3};{4};0.0;{5}"
|
||||||
|
OUTPUT_EXCEPTION = "MINECRAFT CRITICAL: {0}"
|
||||||
|
OUTPUT_UNKNOWN = "MINECRAFT UNKNOWN: Invalid arguments"
|
||||||
|
|
||||||
|
# Minecraft packet ID:s, delimiters and encoding.
|
||||||
|
MC_SERVER_LIST_PING = "\xfe"
|
||||||
|
MC_DISCONNECT = "\xff"
|
||||||
|
MC_DELIMITER = u"\xa7"
|
||||||
|
MC_ENCODING = "utf-16be"
|
||||||
|
|
||||||
|
def log(start, message):
|
||||||
|
print("{0}: {1}".format(datetime.now() - start, message))
|
||||||
|
|
||||||
|
def get_server_info(host, port, num_checks, timeout, verbose):
|
||||||
|
start_time = datetime.now()
|
||||||
|
total_delta = timedelta()
|
||||||
|
byte_count = len(MC_SERVER_LIST_PING) * num_checks
|
||||||
|
|
||||||
|
# Contact the server multiple times to get a stable average response time.
|
||||||
|
for i in range(0, num_checks):
|
||||||
|
if (verbose): iteration = "Iteration {0}/{1}: ".format(i + 1, num_checks)
|
||||||
|
|
||||||
|
# Save start time and connect to server.
|
||||||
|
if (verbose): log(start_time, "{0}Connecting to {1} on port {2}.".format(iteration, host, port))
|
||||||
|
net_start_time = datetime.now()
|
||||||
|
s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
|
||||||
|
s.settimeout(timeout)
|
||||||
|
s.connect((host, port))
|
||||||
|
|
||||||
|
# Send Minecraft Server List Ping packet.
|
||||||
|
if (verbose): log(start_time, "{0}Sending Server List Ping.".format(iteration))
|
||||||
|
s.send(MC_SERVER_LIST_PING)
|
||||||
|
|
||||||
|
# Receive answer from server. The largest answer returned by the server that also works with the Minecraft client
|
||||||
|
# seems to be around 520 bytes (259 unicode character at 2 bytes each plus one start byte and one length byte).
|
||||||
|
if (verbose): log(start_time, "{0}Receiving data...".format(iteration))
|
||||||
|
data = s.recv(550)
|
||||||
|
data_len = len(data)
|
||||||
|
byte_count += data_len
|
||||||
|
if (verbose): log(start_time, "{0}Received {1} bytes".format(iteration, data_len))
|
||||||
|
|
||||||
|
s.close()
|
||||||
|
|
||||||
|
# Check if returned data seems valid. If not, throw AssertionError exception.
|
||||||
|
if (verbose):
|
||||||
|
if (data[0] == MC_DISCONNECT):
|
||||||
|
log(start_time, "Returned data seems valid.")
|
||||||
|
else:
|
||||||
|
log(start_time, "Returned data is invalid. First byte is {0:#x}.".format(ord(data[0])))
|
||||||
|
|
||||||
|
assert data[0] == MC_DISCONNECT
|
||||||
|
|
||||||
|
# Save response time for later average calculation.
|
||||||
|
delta = datetime.now() - net_start_time
|
||||||
|
total_delta += delta
|
||||||
|
|
||||||
|
time.sleep(0.1)
|
||||||
|
|
||||||
|
# Calculate the average response time in seconds
|
||||||
|
total_response = total_delta.seconds + total_delta.microseconds / 1000000.0
|
||||||
|
average_response = total_response / num_checks
|
||||||
|
|
||||||
|
# Decode and split returned skipping the first two bytes.
|
||||||
|
info = data[3:].decode(MC_ENCODING).split(MC_DELIMITER)
|
||||||
|
motd = info[:]
|
||||||
|
del motd[-1] # removing max_players
|
||||||
|
del motd[-1] # removing players
|
||||||
|
motd = ''.join(motd).replace("\n","") # removing newlines
|
||||||
|
|
||||||
|
return {'motd': motd,
|
||||||
|
'players': int(info[-2]),
|
||||||
|
'max_players': int(info[-1]),
|
||||||
|
'byte_count': byte_count,
|
||||||
|
'response_time': average_response}
|
||||||
|
|
||||||
|
def main():
|
||||||
|
parser = argparse.ArgumentParser(description="This plugin will try to connect to a Minecraft server.");
|
||||||
|
|
||||||
|
parser.add_argument('-H', '--hostname', dest='hostname', metavar='ADDRESS', required=True, help="host name or IP address")
|
||||||
|
parser.add_argument('-p', '--port', dest='port', type=int, default=25565, metavar='INTEGER', help="port number (default: 25565)")
|
||||||
|
parser.add_argument('-n', '--number-of-checks', dest='num_checks', type=int, default=5, metavar='INTEGER', help="number of checks to get stable average response time (default: 5)")
|
||||||
|
parser.add_argument('-m', '--motd', dest='motd', default='A Minecraft Server', metavar='STRING', help="expected motd in server response (default: A Minecraft Server)")
|
||||||
|
parser.add_argument('-f', '--warn-on-full', dest='full', action='store_true', help="generate warning if server is full")
|
||||||
|
parser.add_argument('-w', '--warning', dest='warning', type=float, default=0.0, metavar='DOUBLE', help="response time to result in warning status (seconds)")
|
||||||
|
parser.add_argument('-c', '--critical', dest='critical', type=float, default=0.0, metavar='DOUBLE', help="response time to result in critical status (seconds)")
|
||||||
|
parser.add_argument('-t', '--timeout', dest='timeout', type=float, default=10.0, metavar='DOUBLE', help="seconds before connection times out (default: 10)")
|
||||||
|
parser.add_argument('-v', '--verbose', dest='verbose', action='store_true', help="show details for command-line debugging (Nagios may truncate output)")
|
||||||
|
|
||||||
|
# Parse the arguments. If it failes, exit overriding exit code.
|
||||||
|
try:
|
||||||
|
args = parser.parse_args()
|
||||||
|
except SystemExit:
|
||||||
|
print(OUTPUT_UNKNOWN)
|
||||||
|
sys.exit(STATE_UNKNOWN)
|
||||||
|
|
||||||
|
try:
|
||||||
|
info = get_server_info(args.hostname, args.port, args.num_checks, args.timeout, args.verbose)
|
||||||
|
|
||||||
|
if string.find(info['motd'], args.motd) > -1:
|
||||||
|
# Check if response time is above critical level.
|
||||||
|
if args.critical and info['response_time'] > args.critical:
|
||||||
|
print(OUTPUT_CRITICAL.format("{0} second response time".format(info['response_time']), info['byte_count'], info['response_time'], args.warning, args.critical, args.timeout))
|
||||||
|
sys.exit(STATE_CRITICAL)
|
||||||
|
|
||||||
|
# Check if response time is above warning level.
|
||||||
|
if args.warning and info['response_time'] > args.warning:
|
||||||
|
print(OUTPUT_WARNING.format("{0} second response time".format(info['response_time']), info['byte_count'], info['response_time'], args.warning, args.critical, args.timeout))
|
||||||
|
sys.exit(STATE_WARNING)
|
||||||
|
|
||||||
|
# Check if server is full.
|
||||||
|
if args.full and info['players'] == info['max_players']:
|
||||||
|
print(OUTPUT_WARNING.format("Server full! {0} players online".format(info['players']), info['byte_count'], info['response_time'], args.warning, args.critical, args.timeout))
|
||||||
|
sys.exit(STATE_WARNING)
|
||||||
|
|
||||||
|
print(OUTPUT_OK.format("{0}/{1} players online".format(info['players'], info['max_players']), info['byte_count'], info['response_time'], args.warning, args.critical, args.timeout))
|
||||||
|
sys.exit(STATE_OK)
|
||||||
|
|
||||||
|
else:
|
||||||
|
print(OUTPUT_WARNING.format("Unexpected MOTD, {0}".format(info['motd']), info['byte_count'], info['response_time'], args.warning, args.critical, args.timeout))
|
||||||
|
sys.exit(STATE_WARNING)
|
||||||
|
|
||||||
|
except socket.error as msg:
|
||||||
|
print(OUTPUT_EXCEPTION.format(msg))
|
||||||
|
sys.exit(STATE_CRITICAL)
|
||||||
|
|
||||||
|
except AssertionError:
|
||||||
|
print(OUTPUT_EXCEPTION.format("Invalid data returned by server"))
|
||||||
|
sys.exit(STATE_CRITICAL)
|
||||||
|
|
||||||
|
except UnicodeDecodeError:
|
||||||
|
print(OUTPUT_EXCEPTION.format("Unable to decode server response"))
|
||||||
|
sys.exit(STATE_CRITICAL)
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
38
check_ping_by_hostname
Executable file
38
check_ping_by_hostname
Executable file
@@ -0,0 +1,38 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
#
|
||||||
|
# check_ping_by_hostname
|
||||||
|
#
|
||||||
|
# Attempts to resolve the name for and ping ourselves based on the device's
|
||||||
|
# hostname.
|
||||||
|
#
|
||||||
|
|
||||||
|
name="$HOSTNAME"
|
||||||
|
|
||||||
|
# Ensure $name is available
|
||||||
|
if [ -z "$name" ]; then
|
||||||
|
echo "CRITICAL: No hostname(?!)"
|
||||||
|
exit 2
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Ensure we have the tools we need
|
||||||
|
for cmd in dig ping; do
|
||||||
|
if ! command -v "$cmd" > /dev/null 2>&1; then
|
||||||
|
echo "UNKNOWN: Missing command for check: $cmd"
|
||||||
|
exit 3
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
|
||||||
|
# Ensure we can dig ourselves
|
||||||
|
if [ -z "$(dig +short "$name")" ]; then
|
||||||
|
echo "CRITICAL: Could not resolve hostname or found empty record: $name"
|
||||||
|
exit 2
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Ping ourselves
|
||||||
|
if ! ping "$name" -c 1 > /dev/null 2>&1; then
|
||||||
|
echo "CRITICAL: Could not ping self"
|
||||||
|
exit 2
|
||||||
|
else
|
||||||
|
echo "OK"
|
||||||
|
exit 0
|
||||||
|
fi
|
||||||
@@ -7,18 +7,31 @@
|
|||||||
#
|
#
|
||||||
|
|
||||||
rr="/var/run/reboot-required"
|
rr="/var/run/reboot-required"
|
||||||
|
rrpkgs="/var/run/reboot-required.pkgs"
|
||||||
# 604800 - 1 week in seconds
|
# 604800 - 1 week in seconds
|
||||||
threshold="${1:-604800}"
|
threshold_crit="${1:-604800}"
|
||||||
|
# 259200 - 3 days in seconds
|
||||||
|
threshold_warn="${2:-259200}"
|
||||||
if [ -f "$rr" ]; then
|
if [ -f "$rr" ]; then
|
||||||
# We have a pending reboot; alert in different states depending on its age
|
# We have a pending reboot; alert in different states depending on its age
|
||||||
lastmod=$(date +%s -r "$rr")
|
lastmod=$(date +%s -r "$rr")
|
||||||
now=$(date +%s)
|
now=$(date +%s)
|
||||||
if (( now - lastmod > threshold )); then
|
pkgs="$(cat "$rr")"
|
||||||
echo "CRITICAL - Pending reboot older than $threshold seconds: $(cat "$rr")"
|
if [ -f "$rrpkgs" ]; then
|
||||||
exit 2
|
pkgs="$(cat "$rrpkgs")"
|
||||||
else
|
fi
|
||||||
echo "WARNING - Pending reboot: $(cat "$rr")"
|
if [ -z "$pkgs" ]; then
|
||||||
|
pkgs="(No output)"
|
||||||
|
fi
|
||||||
|
if (( now - lastmod < threshold_warn )); then
|
||||||
|
echo "OK - Pending reboot: $pkgs"
|
||||||
|
exit 0
|
||||||
|
elif (( now - lastmod < threshold_crit )); then
|
||||||
|
echo "WARNING - Pending reboot older than $threshold_warn seconds: $pkgs"
|
||||||
exit 1
|
exit 1
|
||||||
|
else
|
||||||
|
echo "CRITICAL - Pending reboot older than $threshold_crit seconds: $pkgs"
|
||||||
|
exit 2
|
||||||
fi
|
fi
|
||||||
else
|
else
|
||||||
# We have no pending reboots
|
# We have no pending reboots
|
||||||
|
|||||||
295
check_temp
Executable file
295
check_temp
Executable file
@@ -0,0 +1,295 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
################################################################################
|
||||||
|
# #
|
||||||
|
# Copyright (C) 2011 Jack-Benny Persson <jake@cyberinfo.se> #
|
||||||
|
# #
|
||||||
|
# This program is free software; you can redistribute it and/or modify #
|
||||||
|
# it under the terms of the GNU General Public License as published by #
|
||||||
|
# the Free Software Foundation; either version 2 of the License, or #
|
||||||
|
# (at your option) any later version. #
|
||||||
|
# #
|
||||||
|
# This program is distributed in the hope that it will be useful, #
|
||||||
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of #
|
||||||
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the #
|
||||||
|
# GNU General Public License for more details. #
|
||||||
|
# #
|
||||||
|
# You should have received a copy of the GNU General Public License #
|
||||||
|
# along with this program; if not, write to the Free Software #
|
||||||
|
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA #
|
||||||
|
# #
|
||||||
|
################################################################################
|
||||||
|
|
||||||
|
###############################################################################
|
||||||
|
# #
|
||||||
|
# Nagios plugin to monitor CPU and M/B temperature with sensors. #
|
||||||
|
# Written in Bash (and uses sed & awk). #
|
||||||
|
# Latest version of check_temp can be found at the below URL: #
|
||||||
|
# https://github.com/jackbenny/check_temp #
|
||||||
|
# #
|
||||||
|
# If you are having problems getting it to work, check the instructions in #
|
||||||
|
# the README first. It walks you though install lm-sensors and getting it to #
|
||||||
|
# display sensor data. #
|
||||||
|
# #
|
||||||
|
###############################################################################
|
||||||
|
|
||||||
|
VERSION="Version 1.2"
|
||||||
|
AUTHOR="(c) 2011 Jack-Benny Persson (jack-benny@cyberinfo.se), (c) 2020 Onkobu Tanaake (oss@onkobutanaake.de)"
|
||||||
|
|
||||||
|
# Sensor program
|
||||||
|
SENSORPROG=$(whereis -b -B /{bin,sbin,usr} /{bin,sbin,usr}/* -f sensors | awk '{print $2}')
|
||||||
|
|
||||||
|
# Ryan's note: utils.sh is installed with nagios-plugins in with the plugins
|
||||||
|
# Check if utils.sh exists. This lets you use check_domain in a testing environment
|
||||||
|
# or outside of Nagios.
|
||||||
|
if [ -e "$PROGPATH/utils.sh" ]; then
|
||||||
|
. "$PROGPATH/utils.sh"
|
||||||
|
else
|
||||||
|
STATE_OK=0
|
||||||
|
STATE_WARNING=1
|
||||||
|
STATE_CRITICAL=2
|
||||||
|
STATE_UNKNOWN=3
|
||||||
|
# STATE_DEPENDENT=4 (Commented because it's unused.)
|
||||||
|
fi
|
||||||
|
|
||||||
|
shopt -s extglob
|
||||||
|
|
||||||
|
#### Functions ####
|
||||||
|
|
||||||
|
# Print version information
|
||||||
|
print_version()
|
||||||
|
{
|
||||||
|
echo "$0 - $VERSION"
|
||||||
|
}
|
||||||
|
|
||||||
|
#Print help information
|
||||||
|
print_help()
|
||||||
|
{
|
||||||
|
print_version
|
||||||
|
echo "$AUTHOR"
|
||||||
|
echo "Monitor temperature with the use of sensors"
|
||||||
|
/bin/cat <<EOT
|
||||||
|
|
||||||
|
Options:
|
||||||
|
-h, --help
|
||||||
|
Print detailed help screen
|
||||||
|
-V, --version
|
||||||
|
Print version information
|
||||||
|
-v, --verbose
|
||||||
|
Verbose output
|
||||||
|
|
||||||
|
-s, --sensor <WORD[,DISPLAY_NAME]>
|
||||||
|
Set what to monitor, for example CPU or MB (or M/B). Check sensors for the
|
||||||
|
correct word. Default is CPU. A different display name can be used in output,
|
||||||
|
by adding it next to sensor with a comma.
|
||||||
|
It can be used more than once, with different warning/critical thresholds optionally.
|
||||||
|
-w, --warning <INTEGER>
|
||||||
|
Exit with WARNING status if above INTEGER degrees
|
||||||
|
-c, --critical <INTEGER>
|
||||||
|
Exit with CRITICAL status if above INTEGER degrees
|
||||||
|
Warning and critical thresholds must be provided before the corresponding --sensor option.
|
||||||
|
-n
|
||||||
|
Use the new sed based filter in case classic filter yields no temperature.
|
||||||
|
|
||||||
|
Examples:
|
||||||
|
./check_temp.sh [-n] -w 65 -c 75 --sensor CPU
|
||||||
|
./check_temp.sh [-n] -w 65 -c 75 --sensor CPU --sensor temp1
|
||||||
|
./check_temp.sh [-n] -w 65 -c 75 --sensor CPU -w 75 -c 85 --sensor temp1,GPU
|
||||||
|
EOT
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
###### MAIN ########
|
||||||
|
|
||||||
|
# Warning threshold
|
||||||
|
thresh_warn=
|
||||||
|
# Critical threshold
|
||||||
|
thresh_crit=
|
||||||
|
# Hardware to monitor
|
||||||
|
default_sensor="CPU"
|
||||||
|
sensor_declared=false
|
||||||
|
|
||||||
|
STATE=$STATE_OK
|
||||||
|
|
||||||
|
# See if we have sensors program installed and can execute it
|
||||||
|
if [[ ! -x "$SENSORPROG" ]]; then
|
||||||
|
echo "It appears you don't have lm-sensors installed. You may find help in the readme for this script."
|
||||||
|
exit $STATE_UNKNOWN
|
||||||
|
fi
|
||||||
|
|
||||||
|
function set_state {
|
||||||
|
[[ "$STATE" -lt "$1" ]] && STATE=$1
|
||||||
|
}
|
||||||
|
|
||||||
|
function process_sensor {
|
||||||
|
sensor=$(echo $1 | cut -d, -f1)
|
||||||
|
sensor_display=$(echo $1 | cut -d, -f2)
|
||||||
|
# Check if a sensor were specified
|
||||||
|
if [[ -z "$sensor" ]]; then
|
||||||
|
# No sensor to monitor were specified
|
||||||
|
echo "No sensor specified"
|
||||||
|
print_help
|
||||||
|
exit $STATE_UNKNOWN
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Check if the thresholds have been set correctly
|
||||||
|
if [[ -z "$thresh_warn" || -z "$thresh_crit" ]]; then
|
||||||
|
# One or both thresholds were not specified
|
||||||
|
echo "Threshold not set"
|
||||||
|
print_help
|
||||||
|
exit $STATE_UNKNOWN
|
||||||
|
elif [[ "$thresh_crit" -lt "$thresh_warn" ]]; then
|
||||||
|
# The warning threshold must be lower than the critical threshold
|
||||||
|
echo "Warning temperature should be lower than critical"
|
||||||
|
print_help
|
||||||
|
exit $STATE_UNKNOWN
|
||||||
|
fi
|
||||||
|
# Get the temperature
|
||||||
|
# Grep the first float with a plus sign and keep only the integer
|
||||||
|
if [ $CLASSIC_FILTER -eq 1 ]; then
|
||||||
|
WHOLE_TEMP=$(${SENSORPROG} | grep "$sensor" | head -n1 | grep -o "+[0-9]\+\(\.[0-9]\+\)\?[^ \t,()]*" | head -n1)
|
||||||
|
else
|
||||||
|
WHOLE_TEMP=$(${SENSORPROG} -A "$sensor" | sed -n '2 p' | grep -o "+[0-9]\+\(\.[0-9]\+\)\?[^ \t,()]*" | head -n1)
|
||||||
|
fi
|
||||||
|
TEMPF=$(echo "$WHOLE_TEMP" | grep -o "[0-9]\+\(\.[0-9]\+\)\?")
|
||||||
|
TEMP=$(echo "$TEMPF" | cut -d. -f1)
|
||||||
|
|
||||||
|
# Verbose output
|
||||||
|
if [[ "$verbosity" -ge 1 ]]; then
|
||||||
|
/bin/cat <<__EOT
|
||||||
|
Debugging information:
|
||||||
|
Warning threshold: $thresh_warn
|
||||||
|
Critical threshold: $thresh_crit
|
||||||
|
Verbosity level: $verbosity
|
||||||
|
Current $sensor temperature: $TEMP
|
||||||
|
__EOT
|
||||||
|
echo "Temperature lines directly from sensors:"
|
||||||
|
${SENSORPROG}
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Get performance data for Nagios "Performance Data" field
|
||||||
|
PERFDATA="$PERFDATA $sensor_display=$TEMP;$thresh_warn;$thresh_crit"
|
||||||
|
|
||||||
|
# And finally check the temperature against our thresholds
|
||||||
|
if [[ "$TEMP" != +([0-9]) ]]; then
|
||||||
|
# Temperature not found for that sensor
|
||||||
|
OUTPUT_TEXT="$OUTPUT_TEXT, No data found for sensor ($sensor)"
|
||||||
|
set_state $STATE_UNKNOWN
|
||||||
|
elif [[ "$TEMP" -gt "$thresh_crit" ]]; then
|
||||||
|
# Temperature is above critical threshold
|
||||||
|
OUTPUT_TEXT="$OUTPUT_TEXT, $sensor_display has temperature: $WHOLE_TEMP"
|
||||||
|
set_state $STATE_CRITICAL
|
||||||
|
elif [[ "$TEMP" -gt "$thresh_warn" ]]; then
|
||||||
|
# Temperature is above warning threshold
|
||||||
|
OUTPUT_TEXT="$OUTPUT_TEXT, $sensor_display has temperature: $WHOLE_TEMP"
|
||||||
|
set_state $STATE_WARNING
|
||||||
|
else
|
||||||
|
# Temperature is ok
|
||||||
|
OUTPUT_TEXT="$OUTPUT_TEXT, $sensor_display has temperature: $WHOLE_TEMP"
|
||||||
|
set_state $STATE_OK
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
|
||||||
|
CLASSIC_FILTER=1
|
||||||
|
|
||||||
|
# Parse command line options
|
||||||
|
while [[ -n "$1" ]]; do
|
||||||
|
case "$1" in
|
||||||
|
|
||||||
|
-h | --help)
|
||||||
|
print_help
|
||||||
|
exit $STATE_OK
|
||||||
|
;;
|
||||||
|
|
||||||
|
-V | --version)
|
||||||
|
print_version
|
||||||
|
exit $STATE_OK
|
||||||
|
;;
|
||||||
|
|
||||||
|
-v | --verbose)
|
||||||
|
: $(( verbosity++ ))
|
||||||
|
shift
|
||||||
|
;;
|
||||||
|
|
||||||
|
-w | --warning)
|
||||||
|
if [[ -z "$2" ]]; then
|
||||||
|
# Threshold not provided
|
||||||
|
echo "Option $1 requires an argument"
|
||||||
|
print_help
|
||||||
|
exit $STATE_UNKNOWN
|
||||||
|
elif [[ "$2" = +([0-9]) ]]; then
|
||||||
|
# Threshold is an integer
|
||||||
|
thresh=$2
|
||||||
|
else
|
||||||
|
# Threshold is not an integer
|
||||||
|
echo "Threshold must be an integer"
|
||||||
|
print_help
|
||||||
|
exit $STATE_UNKNOWN
|
||||||
|
fi
|
||||||
|
thresh_warn=$thresh
|
||||||
|
shift 2
|
||||||
|
;;
|
||||||
|
|
||||||
|
-c | --critical)
|
||||||
|
if [[ -z "$2" ]]; then
|
||||||
|
# Threshold not provided
|
||||||
|
echo "Option '$1' requires an argument"
|
||||||
|
print_help
|
||||||
|
exit $STATE_UNKNOWN
|
||||||
|
elif [[ "$2" = +([0-9]) ]]; then
|
||||||
|
# Threshold is an integer
|
||||||
|
thresh=$2
|
||||||
|
else
|
||||||
|
# Threshold is not an integer
|
||||||
|
echo "Threshold must be an integer"
|
||||||
|
print_help
|
||||||
|
exit $STATE_UNKNOWN
|
||||||
|
fi
|
||||||
|
thresh_crit=$thresh
|
||||||
|
shift 2
|
||||||
|
;;
|
||||||
|
|
||||||
|
-s | --sensor)
|
||||||
|
if [[ -z "$2" ]]; then
|
||||||
|
echo "Option $1 requires an argument"
|
||||||
|
print_help
|
||||||
|
exit $STATE_UNKNOWN
|
||||||
|
fi
|
||||||
|
sensor_declared=true
|
||||||
|
sensors_to_check="$2"
|
||||||
|
shift 2
|
||||||
|
;;
|
||||||
|
|
||||||
|
-n | --new-filter)
|
||||||
|
CLASSIC_FILTER=0
|
||||||
|
shift 1
|
||||||
|
;;
|
||||||
|
*)
|
||||||
|
echo "Invalid option '$1'"
|
||||||
|
print_help
|
||||||
|
exit $STATE_UNKNOWN
|
||||||
|
;;
|
||||||
|
esac
|
||||||
|
|
||||||
|
# argument order is irrelevant, Icinga2 gives no guarantees
|
||||||
|
# as soon as there are enough output is generated
|
||||||
|
if [ ! -z "$thresh_warn" ] && [ ! -z "$thresh_crit" ] && [ ! -z "$sensors_to_check" -o $# -eq 0 ]; then
|
||||||
|
if [ "$sensor_declared" = false ]; then
|
||||||
|
process_sensor "$default_sensor"
|
||||||
|
else
|
||||||
|
process_sensor "$sensors_to_check"
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
|
||||||
|
|
||||||
|
case "$STATE" in
|
||||||
|
"$STATE_OK") STATE_TEXT="OK" ;;
|
||||||
|
"$STATE_WARNING") STATE_TEXT="WARNING" ;;
|
||||||
|
"$STATE_CRITICAL") STATE_TEXT="CRITICAL" ;;
|
||||||
|
"$STATE_UNKNOWN") STATE_TEXT="UNKNOWN" ;;
|
||||||
|
esac
|
||||||
|
|
||||||
|
OUTPUT_TEXT=$(echo $OUTPUT_TEXT | sed -e 's/, //')
|
||||||
|
echo "TEMPERATURE $STATE_TEXT - $OUTPUT_TEXT |$PERFDATA"
|
||||||
|
exit $STATE
|
||||||
Reference in New Issue
Block a user