From 8517e842b288661ac846d7d63b6229e73df48150 Mon Sep 17 00:00:00 2001 From: Salt Date: Mon, 10 Jan 2022 22:14:07 -0600 Subject: [PATCH] Add checks for the R720's thermal monitors, also add those thermal monitors --- .gitmodules | 3 +++ playbooks/device_types_r720.yml | 9 +++++++++ playbooks/prod_web.yml | 12 +++++++++--- playbooks/tags_nagios.yml | 1 + roles/ansible-role-lm-sensors | 1 + 5 files changed, 23 insertions(+), 3 deletions(-) create mode 100644 .gitmodules create mode 100755 playbooks/device_types_r720.yml create mode 160000 roles/ansible-role-lm-sensors diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 0000000..0463f85 --- /dev/null +++ b/.gitmodules @@ -0,0 +1,3 @@ +[submodule ".role-submodules/ansible-role-lm-sensors"] + path = roles/ansible-role-lm-sensors + url = https://github.com/aisbergg/ansible-role-lm-sensors diff --git a/playbooks/device_types_r720.yml b/playbooks/device_types_r720.yml new file mode 100755 index 0000000..2c689cc --- /dev/null +++ b/playbooks/device_types_r720.yml @@ -0,0 +1,9 @@ +#!/usr/bin/env ansible-playbook +# vim:ft=ansible: +--- +- hosts: device_types_r720 + roles: + - role: ansible-role-lm-sensors + vars: + lm_sensors_force_detection: yes + tags: [ lm-sensors ] diff --git a/playbooks/prod_web.yml b/playbooks/prod_web.yml index a8e2b55..60e02c3 100755 --- a/playbooks/prod_web.yml +++ b/playbooks/prod_web.yml @@ -218,11 +218,17 @@ hostgroup: "ansible,!role-hypervisor" - name: Users command: check_by_ssh!/usr/lib/nagios/plugins/check_users -w 3 -c 5 - # Privileged checks - # Required because check_disk may attempt to get the free space of - # restricted mountpoints + # Privileged checks + # Required because check_disk may attempt to get the free space of + # restricted mountpoints - name: Disk Usage command: check_by_ssh!/usr/bin/sudo /usr/lib/nagios/plugins/check_disk -M -u GB -X nfs -X tracefs -X cgroup -X tmpfs -X overlay -X shm -w 15% -c 10% -W 15% -K 10% -A -I '^/run/' -I '^udev$' -I '^/var/lib/kubelet/' -I'^/tmp/.mount_' + # Device type checks + # R720 + - name: CPU0 Temperature + command: check_by_ssh!/usr/bin/sudo /usr/local/bin/monitoring-scripts/check_temp -w 65 -c 75 --sensor coretemp-isa-0000 + - name: CPU1 Temperature + command: check_by_ssh!/usr/bin/sudo /usr/local/bin/monitoring-scripts/check_temp -w 65 -c 75 --sensor coretemp-isa-0001 # Device role checks # hypervisor (which is assumed to be Proxmox) - name: PVE Unit pve-firewall.service diff --git a/playbooks/tags_nagios.yml b/playbooks/tags_nagios.yml index c45a98a..b1e7058 100755 --- a/playbooks/tags_nagios.yml +++ b/playbooks/tags_nagios.yml @@ -31,6 +31,7 @@ line: "nagios-checker ALL = (root) NOPASSWD: {{ item }}" with_items: - /usr/lib/nagios/plugins/check_disk + - /usr/local/bin/monitoring-scripts/check_temp tags: [ nagios, sudo ] - hosts: all tasks: diff --git a/roles/ansible-role-lm-sensors b/roles/ansible-role-lm-sensors new file mode 160000 index 0000000..1a332f6 --- /dev/null +++ b/roles/ansible-role-lm-sensors @@ -0,0 +1 @@ +Subproject commit 1a332f6788d4ae24b52948850965358790861432