Refactor Nagios checks into check_by_ssh instead of NRPE
I was never particularly fond of having a random one-off daemon doing my RCE. Sure, it offers some protection, but limiting my exposure to the open internet is far more ideal. I have tremendously more trust in the OpenSSH project than I do in Nagios. And for that reason, I'll be deprecating NRPE and shredding config files once these plays clean up
This commit is contained in:
parent
b38bb4bf62
commit
bad192e93e
30
playbooks/tags_nagios.yml
Executable file
30
playbooks/tags_nagios.yml
Executable file
@ -0,0 +1,30 @@
|
||||
#!/usr/bin/env ansible-playbook
|
||||
# vim:ft=ansible:
|
||||
---
|
||||
- hosts: tags_nagios
|
||||
roles:
|
||||
- role: git
|
||||
vars:
|
||||
git_repos:
|
||||
- repo: https://git.desu.ltd/salt/monitoring-scripts
|
||||
dest: /usr/local/bin/monitoring-scripts
|
||||
tags: [ nagios ]
|
||||
tasks:
|
||||
- name: assure nagios plugin packages
|
||||
apt: name=monitoring-plugins,nagios-plugins-contrib
|
||||
tags: [ nagios ]
|
||||
- name: assure nagios user
|
||||
user: name=nagios-checker state=present system=yes
|
||||
tags: [ nagios ]
|
||||
- name: assure nagios user ssh key
|
||||
authorized_key:
|
||||
user: nagios-checker
|
||||
state: present
|
||||
key: "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIKNavw28C0mKIQVRLQDW2aoovliU1XCGaenDhIMwumK/ Nagios monitoring"
|
||||
tags: [ nagios ]
|
||||
- hosts: all
|
||||
tasks:
|
||||
- name: disable nagios user when not tagged
|
||||
user: name=nagios-checker state=absent remove=yes
|
||||
when: "'tags_nagios' not in group_names"
|
||||
tags: [ nagios ]
|
@ -53,7 +53,7 @@ define hostgroup {
|
||||
{% for command in nagios_commands %}
|
||||
define command {
|
||||
command_name {{ command.name }}
|
||||
command_line {{ command.line }}
|
||||
command_line {{ command.command }}
|
||||
{% if command.extra is defined %}
|
||||
{% for kvp in command.extra %}
|
||||
{{ kvp.key }} {{ kvp.value }}
|
||||
@ -71,7 +71,7 @@ define service {
|
||||
use ansible-generic-service
|
||||
service_description {{ service.name }}
|
||||
check_command {{ service.command }}
|
||||
hostgroup_name {{ service.hostgroup }}
|
||||
hostgroup_name {{ service.hostgroup | default('ansible', true) }}
|
||||
{% if service.extra is defined %}
|
||||
{% for kvp in service.extra %}
|
||||
{{ kvp.key }} {{ kvp.value }}
|
||||
|
@ -9,6 +9,8 @@
|
||||
# This command is included in the container image
|
||||
- name: check_nrpe
|
||||
command: "$USER1$/check_nrpe -H $HOSTADDRESS$ -c $ARG1$"
|
||||
- name: check_by_ssh
|
||||
command: "$USER1$/check_by_ssh -H $HOSTADDRESS$ -F /opt/nagios/etc/ssh_config -q -i /opt/nagios/etc/id_ed25519 -l nagios-checker -C \"$ARG1$\""
|
||||
nagios_services:
|
||||
# Agentless checks
|
||||
- name: HTTP
|
||||
@ -19,44 +21,36 @@
|
||||
hostgroup: nagios-checkhttp
|
||||
- name: SSH
|
||||
command: check_ssh
|
||||
hostgroup: ansible
|
||||
# Agented checks
|
||||
# check_by_ssh checks
|
||||
- name: CPU Load
|
||||
command: check_nrpe!check_load
|
||||
hostgroup: nagios-nrpe
|
||||
command: check_by_ssh!/usr/lib/nagios/plugins/check_load -r -w 0.8,0.8,0.8 -c 1.0,0.9,0.9
|
||||
- name: Disk Usage
|
||||
command: check_nrpe!check_disk_all
|
||||
hostgroup: nagios-nrpe
|
||||
command: check_by_ssh!/usr/lib/nagios/plugins/check_disk -M -u GB -X nfs -X tracefs -X cgroup -X tmpfs -X overlay -X shm -w 15% -c 10% -W 15% -K 10% -A -I '^/run/' -I '^udev$' -I '^/var/lib/kubelet/'
|
||||
- name: Reboot Required
|
||||
command: check_nrpe!check_reboot_required
|
||||
hostgroup: nagios-nrpe
|
||||
command: check_by_ssh!/usr/local/bin/monitoring-scripts/check_reboot_required
|
||||
- name: Unit backup.service
|
||||
command: check_nrpe!check_systemd_backup_service
|
||||
hostgroup: nagios-nrpe
|
||||
command: check_by_ssh!/usr/local/bin/monitoring-scripts/check_systemd_unit backup.service
|
||||
- name: Unit backup.timer
|
||||
command: check_nrpe!check_systemd_backup_timer
|
||||
hostgroup: nagios-nrpe
|
||||
command: check_by_ssh!/usr/local/bin/monitoring-scripts/check_systemd_unit backup.timer
|
||||
- name: Unit docker.service
|
||||
command: check_nrpe!check_systemd_docker_service
|
||||
hostgroup: nagios-nrpe
|
||||
command: check_by_ssh!/usr/local/bin/monitoring-scripts/check_systemd_unit docker.service
|
||||
- name: Users
|
||||
command: check_nrpe!check_users
|
||||
hostgroup: nagios-nrpe
|
||||
command: check_by_ssh!/usr/lib/nagios/plugins/check_users -w 3 -c 5
|
||||
# Tag-specific checks
|
||||
# ansible-pull
|
||||
- name: Unit ansible-pull.service
|
||||
command: check_nrpe!check_systemd_ansiblepull_service
|
||||
command: check_by_ssh!/usr/local/bin/monitoring-scripts/check_systemd_unit ansible-pull.service
|
||||
hostgroup: ansible-pull
|
||||
- name: Unit ansible-pull.timer
|
||||
command: check_nrpe!check_systemd_ansiblepull_timer
|
||||
command: check_by_ssh!/usr/local/bin/monitoring-scripts/check_systemd_unit ansible-pull.timer
|
||||
hostgroup: ansible-pull
|
||||
# nagios-checkpgsql
|
||||
- name: PostgreSQL
|
||||
command: check_nrpe!check_pgsql
|
||||
command: "check_by_ssh!/usr/lib/nagios/plugins/check_pgsql -H localhost -l nagios -p {{ secret_postgresql_monitoring_password }} -w 2 -c 5"
|
||||
hostgroup: nagios-checkpgsql
|
||||
# nagios-nrpeswap
|
||||
- name: Swap Usage
|
||||
command: check_nrpe!check_swap
|
||||
command: check_by_ssh!/usr/lib/nagios/plugins/check_swap -w 20% -c 10%
|
||||
hostgroup: nagios-nrpeswap
|
||||
register: config
|
||||
tags: [ nagios, template ]
|
||||
|
1
site.yml
1
site.yml
@ -9,6 +9,7 @@
|
||||
# Tags for fundamental services
|
||||
- import_playbook: playbooks/tags_zerotier.yml
|
||||
- import_playbook: playbooks/tags_snmp.yml
|
||||
- import_playbook: playbooks/tags_nagios.yml
|
||||
- import_playbook: playbooks/tags_nagios-nrpe.yml
|
||||
# Device roles
|
||||
- import_playbook: playbooks/device_roles_pik8s-storage.yml
|
||||
|
Loading…
Reference in New Issue
Block a user