Compare commits

...

3 Commits

Author SHA1 Message Date
5031833f39 Remove Package Updates check
It's just pointless noise to be honest, it's way too loud. Perhaps a proper patch management solution would be in order?
2021-12-15 20:06:12 -06:00
72697a3953 Move check_disk to those restricted checks, also exclude AppImage loopback mounts 2021-12-15 19:57:20 -06:00
54a4f1539b Add some sudo rules to nagios-checker so it can start doing restricted checks 2021-12-15 19:57:08 -06:00
2 changed files with 15 additions and 7 deletions

View File

@ -170,19 +170,12 @@
command: check_by_ssh!/usr/lib/nagios/plugins/check_load -r -w 5,4,3 -c 7,6,5
- name: CPU Utilization
command: check_by_ssh!/usr/local/bin/monitoring-scripts/check_cpu_stats -w 75 -c 90
- name: Disk Usage
command: check_by_ssh!/usr/lib/nagios/plugins/check_disk -M -u GB -X nfs -X tracefs -X cgroup -X tmpfs -X overlay -X shm -w 15% -c 10% -W 15% -K 10% -A -I '^/run/' -I '^udev$' -I '^/var/lib/kubelet/'
- name: DNS Resolution
command: check_by_ssh!/usr/lib/nagios/plugins/check_etc_resolv
- name: Last Ansible Play
command: check_by_ssh!/usr/local/bin/monitoring-scripts/check_file_age /var/lib/ansible-last-run -w 93600 -c 129600
- name: Memory Usage
command: check_by_ssh!/usr/lib/nagios/plugins/check_memory -w 20% -c 10%
- name: Package Updates
command: check_by_ssh!/usr/lib/nagios/plugins/check_packages
extra:
- key: notification_options
value: c,r
- name: Ping Self over DNS
command: check_by_ssh!/usr/local/bin/monitoring-scripts/check_ping_by_hostname
- name: Reboot Required
@ -209,6 +202,11 @@
hostgroup: "!role-hypervisor"
- name: Users
command: check_by_ssh!/usr/lib/nagios/plugins/check_users -w 3 -c 5
# Privileged checks
# Required because check_disk may attempt to get the free space of
# restricted mountpoints
- name: Disk Usage
command: check_by_ssh!/usr/bin/sudo /usr/lib/nagios/plugins/check_disk -M -u GB -X nfs -X tracefs -X cgroup -X tmpfs -X overlay -X shm -w 15% -c 10% -W 15% -K 10% -A -I '^/run/' -I '^udev$' -I '^/var/lib/kubelet/' -I'^/tmp/.mount_'
# Device role checks
# hypervisor (which is assumed to be Proxmox)
- name: PVE Unit pve-firewall.service

View File

@ -22,6 +22,16 @@
state: present
key: "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIKNavw28C0mKIQVRLQDW2aoovliU1XCGaenDhIMwumK/ Nagios monitoring"
tags: [ nagios ]
- name: assure nagios user sudo rule file
file: path=/etc/sudoers.d/50-nagios-checker mode=0750 owner=root group=root state=touch modification_time=preserve access_time=preserve
tags: [ nagios, sudo ]
- name: assure nagios user sudo rules
lineinfile:
path: /etc/sudoers.d/50-nagios-checker
line: "nagios-checker ALL = (root) NOPASSWD: {{ item }}"
with_items:
- /usr/lib/nagios/plugins/check_disk
tags: [ nagios, sudo ]
- hosts: all
tasks:
- name: disable nagios user when not tagged