Compare commits

..

No commits in common. "c362effe2a7fb6d7d7db987c6fcf63485e763e08" and "8e845b5f4e1e8c4dc06005dad2eef13508fd789b" have entirely different histories.

5 changed files with 100 additions and 71 deletions

67
playbooks/tags_nagios-nrpe.yml Executable file
View File

@ -0,0 +1,67 @@
#!/usr/bin/env ansible-playbook
# vim:ft=ansible:
---
- hosts: tags_nagios-nrpe
roles:
- role: git
vars:
git_repos:
- repo: https://git.desu.ltd/salt/monitoring-scripts
dest: /usr/local/bin/monitoring-scripts
tags: [ nrpe ]
- role: nrpe
vars:
nrpe_server_bind_address: 0.0.0.0
nrpe_server_allowed_hosts:
- 127.0.0.0/24 # Local machines
- 192.168.0.0/16
- 172.16.0.0/12
- 10.0.0.0/8
- 45.79.24.6/32 # web3.desu.ltd
nrpe_plugin_packages:
- monitoring-plugins
- nagios-plugins-contrib
nrpe_command:
check_disk_all:
script: check_disk
option: -M -u GB -X nfs -X tracefs -X cgroup -X tmpfs -X overlay -X shm -w 15% -c 10% -W 15% -K 10% -A -I '^/run/' -I '^udev$' -I '^/var/lib/kubelet/'
check_load:
script: check_load
option: -r -w 0.8,0.8,0.8 -c 1.0,0.9,0.9
check_pgsql:
script: check_pgsql
option: "-H localhost -l nagios -p {{ secret_postgresql_monitoring_password }} -w 2 -c 5"
check_swap:
script: check_swap
option: -w 20% -c 10%
check_users:
script: check_users
option: -w 3 -c 5
nrpe_command_extra:
check_reboot_required:
script: /usr/local/bin/monitoring-scripts/check_reboot_required
option: 604800
check_systemd_ansiblepull_service:
script: /usr/local/bin/monitoring-scripts/check_systemd_unit
option: ansible-pull.service
check_systemd_ansiblepull_timer:
script: /usr/local/bin/monitoring-scripts/check_systemd_unit
option: ansible-pull.timer
check_systemd_backup_service:
script: /usr/local/bin/monitoring-scripts/check_systemd_unit
option: backup.service
check_systemd_backup_timer:
script: /usr/local/bin/monitoring-scripts/check_systemd_unit
option: backup.timer
check_systemd_docker_service:
script: /usr/local/bin/monitoring-scripts/check_systemd_unit
option: docker.service
tags: [ nrpe ]
- hosts: all
tasks:
- name: disable nrped when not tagged
systemd: name={{ item }} state=stopped enabled=no
with_items:
- nagios-nrpe-server.service
when: "'tags_nagios-nrpe' not in group_names and item in services"
tags: [ nrpe, zerotier ]

View File

@ -1,30 +0,0 @@
#!/usr/bin/env ansible-playbook
# vim:ft=ansible:
---
- hosts: tags_nagios
roles:
- role: git
vars:
git_repos:
- repo: https://git.desu.ltd/salt/monitoring-scripts
dest: /usr/local/bin/monitoring-scripts
tags: [ nagios ]
tasks:
- name: assure nagios plugin packages
apt: name=monitoring-plugins,nagios-plugins-contrib
tags: [ nagios ]
- name: assure nagios user
user: name=nagios-checker state=present system=yes
tags: [ nagios ]
- name: assure nagios user ssh key
authorized_key:
user: nagios-checker
state: present
key: "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIKNavw28C0mKIQVRLQDW2aoovliU1XCGaenDhIMwumK/ Nagios monitoring"
tags: [ nagios ]
- hosts: all
tasks:
- name: disable nagios user when not tagged
user: name=nagios-checker state=absent remove=yes
when: "'tags_nagios' not in group_names"
tags: [ nagios ]

View File

@ -1,9 +1,5 @@
#
# STOP
#
# This file is managed via Ansible; any changes made WILL be overwritten
# If you need to add site-specific configuration, do it in another file!
#
# This file is managed via Ansible
# Any changes made WILL be overwritten
# Templates
define host {
@ -41,27 +37,24 @@ define contact {
email rehashedsalt@cock.li
}
# Default hostgroup
# Default hostgroup and its checks
define hostgroup {
hostgroup_name ansible
alias Ansible-managed Hosts
}
define service {
use ansible-generic-service
service_description SSH
check_command check_ssh
hostgroup_name ansible
}
# Commands
# Everything here is defined in nagios_commands
{% if nagios_commands is defined %}
{% for command in nagios_commands %}
define command {
command_name {{ command.name }}
command_line {{ command.command }}
{% if command.extra is defined %}
{% for kvp in command.extra %}
{{ kvp.key }} {{ kvp.value }}
{% endfor %}
{% endif %}
# This command is included in the container image
command_name check_nrpe
command_line $USER1$/check_nrpe -H $HOSTADDRESS$ -c $ARG1$
}
{% endfor %}
{% endif %}
# Services
# Everything here is defined in nagios_services
@ -71,7 +64,7 @@ define service {
use ansible-generic-service
service_description {{ service.name }}
check_command {{ service.command }}
hostgroup_name {{ service.hostgroup | default('ansible', true) }}
hostgroup_name {{ service.hostgroup }}
{% if service.extra is defined %}
{% for kvp in service.extra %}
{{ kvp.key }} {{ kvp.value }}

View File

@ -5,12 +5,6 @@
- name: template out config for nagios
template: src=nagios-ansible.cfg.j2 dest=/data/nagios/etc/objects/ansible.cfg owner=root group=root mode=0644
vars:
nagios_commands:
# This command is included in the container image
- name: check_nrpe
command: "$USER1$/check_nrpe -H $HOSTADDRESS$ -c $ARG1$"
- name: check_by_ssh
command: "$USER1$/check_by_ssh -H $HOSTADDRESS$ -F /opt/nagios/etc/ssh_config -q -i /opt/nagios/etc/id_ed25519 -l nagios-checker -C \"$ARG1$\""
nagios_services:
# Agentless checks
- name: HTTP
@ -19,38 +13,43 @@
- name: HTTPS
command: check_http!--ssl
hostgroup: nagios-checkhttp
- name: SSH
command: check_ssh
# check_by_ssh checks
# Agented checks
- name: CPU Load
command: check_by_ssh!/usr/lib/nagios/plugins/check_load -r -w 0.8,0.8,0.8 -c 1.0,0.9,0.9
command: check_nrpe!check_load
hostgroup: nagios-nrpe
- name: Disk Usage
command: check_by_ssh!/usr/lib/nagios/plugins/check_disk -M -u GB -X nfs -X tracefs -X cgroup -X tmpfs -X overlay -X shm -w 15% -c 10% -W 15% -K 10% -A -I '^/run/' -I '^udev$' -I '^/var/lib/kubelet/'
command: check_nrpe!check_disk_all
hostgroup: nagios-nrpe
- name: Reboot Required
command: check_by_ssh!/usr/local/bin/monitoring-scripts/check_reboot_required
command: check_nrpe!check_reboot_required
hostgroup: nagios-nrpe
- name: Unit backup.service
command: check_by_ssh!/usr/local/bin/monitoring-scripts/check_systemd_unit backup.service
command: check_nrpe!check_systemd_backup_service
hostgroup: nagios-nrpe
- name: Unit backup.timer
command: check_by_ssh!/usr/local/bin/monitoring-scripts/check_systemd_unit backup.timer
command: check_nrpe!check_systemd_backup_timer
hostgroup: nagios-nrpe
- name: Unit docker.service
command: check_by_ssh!/usr/local/bin/monitoring-scripts/check_systemd_unit docker.service
command: check_nrpe!check_systemd_docker_service
hostgroup: nagios-nrpe
- name: Users
command: check_by_ssh!/usr/lib/nagios/plugins/check_users -w 3 -c 5
command: check_nrpe!check_users
hostgroup: nagios-nrpe
# Tag-specific checks
# ansible-pull
- name: Unit ansible-pull.service
command: check_by_ssh!/usr/local/bin/monitoring-scripts/check_systemd_unit ansible-pull.service
command: check_nrpe!check_systemd_ansiblepull_service
hostgroup: ansible-pull
- name: Unit ansible-pull.timer
command: check_by_ssh!/usr/local/bin/monitoring-scripts/check_systemd_unit ansible-pull.timer
command: check_nrpe!check_systemd_ansiblepull_timer
hostgroup: ansible-pull
# nagios-checkpgsql
- name: PostgreSQL
command: "check_by_ssh!/usr/lib/nagios/plugins/check_pgsql -H localhost -l nagios -p {{ secret_postgresql_monitoring_password }} -w 2 -c 5"
command: check_nrpe!check_pgsql
hostgroup: nagios-checkpgsql
# nagios-nrpeswap
- name: Swap Usage
command: check_by_ssh!/usr/lib/nagios/plugins/check_swap -w 20% -c 10%
command: check_nrpe!check_swap
hostgroup: nagios-nrpeswap
register: config
tags: [ nagios, template ]

View File

@ -9,7 +9,7 @@
# Tags for fundamental services
- import_playbook: playbooks/tags_zerotier.yml
- import_playbook: playbooks/tags_snmp.yml
- import_playbook: playbooks/tags_nagios.yml
- import_playbook: playbooks/tags_nagios-nrpe.yml
# Device roles
- import_playbook: playbooks/device_roles_pik8s-storage.yml
- import_playbook: playbooks/device_roles_pik8s.yml