331 lines
15 KiB
YAML
Executable File
331 lines
15 KiB
YAML
Executable File
#!/usr/bin/env ansible-playbook
|
|
# vim:ft=ansible:
|
|
# Webservers
|
|
---
|
|
- hosts: web1.dallas.mgmt.desu.ltd
|
|
module_defaults:
|
|
docker_container:
|
|
state: started
|
|
restart_policy: unless-stopped
|
|
pull: yes
|
|
tasks:
|
|
- name: ensure docker network
|
|
docker_network: name=web
|
|
tags: [ docker ]
|
|
- name: include tasks for applications
|
|
include_tasks: tasks/{{ item }}
|
|
with_items:
|
|
- app/redis.yml
|
|
- web/9iron.yml
|
|
- web/desultd.yml
|
|
- web/gitea.yml
|
|
- web/nextcloud.yml
|
|
- web/srv.yml
|
|
tags: [ always ]
|
|
roles:
|
|
- role: backup
|
|
vars:
|
|
backup_s3backup_list_extra:
|
|
- /app/gitea/gitea
|
|
- /data
|
|
- /srv/desu.ltd
|
|
backup_s3backup_exclude_list_extra:
|
|
- /var/lib/gitea/log
|
|
- /data/gitea/data/gitea/log
|
|
tags: [ backup ]
|
|
- role: git
|
|
vars:
|
|
git_repos:
|
|
- repo: https://git.desu.ltd/salt/gitea-custom
|
|
dest: /data/gitea/data/gitea/custom
|
|
tags: [ web, git ]
|
|
- role: ingress
|
|
vars:
|
|
ingress_servers:
|
|
# desu.ltd
|
|
- name: desu.ltd
|
|
proxy_pass: http://desultd:80
|
|
locations:
|
|
- location: /.well-known/matrix/server
|
|
contents: |
|
|
default_type application/json;
|
|
return 200 '{"m.server":"matrix.desu.ltd:443"}';
|
|
- location: /.well-known/matrix/client
|
|
contents: |
|
|
default_type application/json;
|
|
return 200 '{"m.homeserver":{"base_url":"https://matrix.desu.ltd"}}';
|
|
- name: git.desu.ltd
|
|
proxy_pass: http://gitea:3000
|
|
- name: nc.desu.ltd
|
|
directives:
|
|
- "add_header Strict-Transport-Security \"max-age=31536000\""
|
|
- "client_max_body_size 0"
|
|
proxy_pass: http://nextcloud:80
|
|
locations:
|
|
- location: "^~ /.well-known"
|
|
contents: |
|
|
location = /.well-known/carddav { return 301 /remote.php/dav/; }
|
|
location = /.well-known/caldav { return 301 /remote.php/dav/; }
|
|
location ^~ /.well-known { return 301 /index.php$uri; }
|
|
try_files $uri $uri/ =404;
|
|
# 9iron
|
|
- name: www.9iron.club
|
|
directives:
|
|
- "return 301 $scheme://9iron.club$request_uri"
|
|
- name: 9iron.club
|
|
proxy_pass: http://9iron:80
|
|
- name: srv.9iron.club
|
|
proxy_pass: http://srv:80
|
|
tags: [ web, docker, ingress ]
|
|
- hosts: web2.dallas.mgmt.desu.ltd
|
|
module_defaults:
|
|
docker_container:
|
|
state: started
|
|
restart_policy: unless-stopped
|
|
pull: yes
|
|
pre_tasks:
|
|
- name: ensure docker network
|
|
docker_network: name=web
|
|
tags: [ docker ]
|
|
- name: include tasks for applications
|
|
include_tasks: tasks/{{ item }}
|
|
with_items:
|
|
- app/redis.yml
|
|
- web/jenkins.yml
|
|
- web/libreddit.yml
|
|
- web/pleroma.yml
|
|
tags: [ always ]
|
|
roles:
|
|
- role: backup
|
|
vars:
|
|
backup_s3backup_list_extra:
|
|
- /data
|
|
tags: [ backup ]
|
|
- role: ingress
|
|
vars:
|
|
ingress_servers:
|
|
- name: cowfee.moe
|
|
proxy_pass: http://pleroma:4000
|
|
- name: lr.cowfee.moe
|
|
directives:
|
|
- "access_log /dev/null"
|
|
- "error_log /dev/null"
|
|
proxy_pass: http://libreddit:8080
|
|
- name: jenkins.desu.ltd
|
|
locations:
|
|
- location: "/"
|
|
contents: |
|
|
proxy_set_header Host $host;
|
|
proxy_set_header X-Real-IP $remote_addr;
|
|
proxy_set_header X-Forwarded-Proto https;
|
|
proxy_set_header X-Forwarded-Port 443;
|
|
proxy_pass http://jenkins:8080;
|
|
- location: |
|
|
~ "^/static/[0-9a-fA-F]{8}\/(.*)$"
|
|
contents: |
|
|
rewrite "^/static/[0-9a-fA-F]{8}\/(.*)" /$1 last;
|
|
- location: "/userContent"
|
|
contents: |
|
|
root /data/jenkins/home/;
|
|
if (!-f $request_filename) {
|
|
rewrite (.*) /$1 last;
|
|
break;
|
|
}
|
|
tags: [ web, docker, ingress ]
|
|
- hosts: web3.dallas.mgmt.desu.ltd
|
|
module_defaults:
|
|
docker_container:
|
|
state: started
|
|
restart_policy: unless-stopped
|
|
pull: yes
|
|
tasks:
|
|
- name: ensure docker network
|
|
docker_network: name=web
|
|
tags: [ docker ]
|
|
- name: include tasks for applications
|
|
include_tasks: tasks/{{ item }}
|
|
with_items:
|
|
- app/redis.yml
|
|
- web/movienight.yml
|
|
- web/netbox.yml
|
|
tags: [ always ]
|
|
roles:
|
|
- role: backup
|
|
vars:
|
|
backup_s3backup_list_extra:
|
|
- /data
|
|
tags: [ backup ]
|
|
# TODO: Replace this with Naemon(?)
|
|
- role: nagios
|
|
vars:
|
|
nagios_matrix_server: "https://matrix.desu.ltd"
|
|
nagios_matrix_room: "!NWNCKlNmOTcarMcMIh:desu.ltd"
|
|
nagios_matrix_token: "{{ secret_nagios_matrix_token }}"
|
|
nagios_data_dir: /data/nagios
|
|
nagios_admin_pass: "{{ secret_nagios_admin_pass }}"
|
|
nagios_contacts:
|
|
- name: matrix
|
|
host_notification_commands: notify-host-by-matrix
|
|
service_notification_commands: notify-service-by-matrix
|
|
host_notification_period: ansible-not-late-at-night
|
|
service_notification_period: ansible-not-late-at-night
|
|
extra:
|
|
- key: contactgroups
|
|
value: ansible
|
|
- name: salt
|
|
host_notification_commands: notify-host-by-email
|
|
service_notification_commands: notify-service-by-email
|
|
extra:
|
|
- key: email
|
|
value: alerts@babor.tech
|
|
nagios_commands:
|
|
# This command is included in the container image
|
|
- name: check_nrpe
|
|
command: "$USER1$/check_nrpe -H $HOSTADDRESS$ -c $ARG1$"
|
|
- name: check_by_ssh
|
|
command: "$USER1$/check_by_ssh -H $HOSTADDRESS$ -F /opt/nagios/etc/ssh_config -t 30 -q -i /opt/nagios/etc/id_ed25519 -l nagios-checker -C \"$ARG1$\""
|
|
- name: notify-host-by-matrix
|
|
command: "/usr/bin/printf \"%b\" \"$NOTIFICATIONTYPE$ - $HOSTNAME$ is $HOSTSTATE$\\nAddress: $HOSTADDRESS$\\nInfo: $HOSTOUTPUT$\\nDate/Time: $LONGDATETIME$\" | /opt/Custom-Nagios-Plugins/notify-by-matrix"
|
|
- name: notify-service-by-matrix
|
|
command: "/usr/bin/printf \"%b\" \"$NOTIFICATIONTYPE$ - Service $HOSTALIAS$ - $SERVICEDESC$ is $SERVICESTATE$\\nInfo: $SERVICEOUTPUT$\\nDate/Time: $LONGDATETIME$\" | /opt/Custom-Nagios-Plugins/notify-by-matrix"
|
|
nagios_services:
|
|
# Agentless checks
|
|
- name: HTTP
|
|
command: check_http
|
|
hostgroup: tag-nagios-checkhttp
|
|
- name: HTTPS
|
|
command: check_http!--ssl
|
|
hostgroup: tag-nagios-checkhttp
|
|
- name: SSH
|
|
command: check_ssh
|
|
# check_by_ssh checks
|
|
- name: CPU Utilization
|
|
command: check_by_ssh!/usr/local/bin/monitoring-scripts/check_cpu_stats -w 75 -c 90
|
|
- name: DNS Resolution
|
|
command: check_by_ssh!/usr/lib/nagios/plugins/check_etc_resolv
|
|
- name: Last Ansible Play
|
|
command: check_by_ssh!/usr/local/bin/monitoring-scripts/check_file_age /var/lib/ansible-last-run -w 176400 -c 216000
|
|
- name: Memory Usage
|
|
command: check_by_ssh!/usr/lib/nagios/plugins/check_memory -w 10% -c 5%
|
|
hostgroup: "ansible,!tag-prov-zfs"
|
|
- name: Ping Self over DNS
|
|
command: check_by_ssh!/usr/local/bin/monitoring-scripts/check_ping_by_hostname
|
|
- name: Reboot Required
|
|
command: check_by_ssh!/usr/local/bin/monitoring-scripts/check_reboot_required
|
|
- name: Unit atd.service
|
|
command: check_by_ssh!/usr/local/bin/monitoring-scripts/check_systemd_unit atd.service
|
|
- name: Unit backup.service
|
|
command: check_by_ssh!/usr/local/bin/monitoring-scripts/check_systemd_unit backup.service
|
|
hostgroup: "ansible,!role-hypervisor"
|
|
- name: Unit backup.timer
|
|
command: check_by_ssh!/usr/local/bin/monitoring-scripts/check_systemd_unit backup.timer
|
|
hostgroup: "ansible,!role-hypervisor"
|
|
- name: Unit cron.service
|
|
command: check_by_ssh!/usr/local/bin/monitoring-scripts/check_systemd_unit cron.service
|
|
- name: Unit dbus.service
|
|
command: check_by_ssh!/usr/local/bin/monitoring-scripts/check_systemd_unit dbus.service
|
|
- name: Unit ssh.service
|
|
command: check_by_ssh!/usr/local/bin/monitoring-scripts/check_systemd_unit ssh.service
|
|
- name: Unit systemd-resolved.service
|
|
command: check_by_ssh!/usr/local/bin/monitoring-scripts/check_systemd_unit systemd-resolved.service
|
|
hostgroup: "ansible,!role-hypervisor"
|
|
- name: Users
|
|
command: check_by_ssh!/usr/lib/nagios/plugins/check_users -w 3 -c 5
|
|
# Privileged checks
|
|
# Required because check_disk may attempt to get the free space of
|
|
# restricted mountpoints
|
|
- name: Disk Usage
|
|
command: check_by_ssh!/usr/bin/sudo /usr/lib/nagios/plugins/check_disk -M -u GB -X nfs -X tracefs -X cgroup -X tmpfs -X overlay -X shm -w 15% -c 10% -W 15% -K 10% -A -I '^/run/' -I '^udev$' -I '^/var/lib/kubelet/' -I'^/tmp/.mount_'
|
|
# Device type checks
|
|
# R720
|
|
- name: CPU0 Temperature
|
|
command: check_by_ssh!/usr/bin/sudo /usr/local/bin/monitoring-scripts/check_temp -n -w 65 -c 75 --sensor coretemp-isa-0000
|
|
hostgroup: device-type-r720
|
|
- name: CPU1 Temperature
|
|
command: check_by_ssh!/usr/bin/sudo /usr/local/bin/monitoring-scripts/check_temp -n -w 65 -c 75 --sensor coretemp-isa-0001
|
|
hostgroup: device-type-r720
|
|
# Pi 4 4G
|
|
- name: CPU Temperature
|
|
command: check_by_ssh!/usr/bin/sudo /usr/local/bin/monitoring-scripts/check_temp -n -w 65 -c 75 --sensor cpu_thermal-virtual-0
|
|
hostgroup: device-type-pi4b-2g,device-type-pi4b-4g,device-type-pi4b-4g-storage
|
|
# Device role checks
|
|
# hypervisor (which is assumed to be Proxmox)
|
|
- name: PVE Unit pve-firewall.service
|
|
command: check_by_ssh!/usr/local/bin/monitoring-scripts/check_systemd_unit pve-firewall.service
|
|
hostgroup: role-hypervisor
|
|
- name: PVE Unit spiceproxy.service
|
|
command: check_by_ssh!/usr/local/bin/monitoring-scripts/check_systemd_unit spiceproxy.service
|
|
hostgroup: role-hypervisor
|
|
- name: PVE Unit pve-ha-crm.service
|
|
command: check_by_ssh!/usr/local/bin/monitoring-scripts/check_systemd_unit pve-ha-crm.service
|
|
hostgroup: role-hypervisor
|
|
- name: PVE Unit pvedaemon.service
|
|
command: check_by_ssh!/usr/local/bin/monitoring-scripts/check_systemd_unit pvedaemon.service
|
|
hostgroup: role-hypervisor
|
|
- name: PVE Unit pvefw-logger.service
|
|
command: check_by_ssh!/usr/local/bin/monitoring-scripts/check_systemd_unit pvefw-logger.service
|
|
hostgroup: role-hypervisor
|
|
- name: PVE Unit pveproxy.service
|
|
command: check_by_ssh!/usr/local/bin/monitoring-scripts/check_systemd_unit pveproxy.service
|
|
hostgroup: role-hypervisor
|
|
- name: PVE Unit pve-cluster.service
|
|
command: check_by_ssh!/usr/local/bin/monitoring-scripts/check_systemd_unit pve-cluster.service
|
|
hostgroup: role-hypervisor
|
|
- name: PVE Unit pvestatd.service
|
|
command: check_by_ssh!/usr/local/bin/monitoring-scripts/check_systemd_unit pvestatd.service
|
|
hostgroup: role-hypervisor
|
|
# Tag-specific checks
|
|
# ansible-pull
|
|
- name: Unit ansible-pull.service
|
|
command: check_by_ssh!/usr/local/bin/monitoring-scripts/check_systemd_unit ansible-pull.service
|
|
hostgroup: tag-ansible-pull
|
|
- name: Unit ansible-pull.timer
|
|
command: check_by_ssh!/usr/local/bin/monitoring-scripts/check_systemd_unit ansible-pull.timer
|
|
hostgroup: tag-ansible-pull
|
|
# docker
|
|
- name: Unit docker.service
|
|
command: check_by_ssh!/usr/local/bin/monitoring-scripts/check_systemd_unit docker.service
|
|
hostgroup: "ansible,!tag-no-docker"
|
|
- name: Docker Status
|
|
command: check_by_ssh!/usr/bin/sudo /usr/local/bin/monitoring-scripts/check_docker --no-ok --status running
|
|
hostgroup: tag-nagios-checkdocker
|
|
- name: Docker CPU Usage
|
|
command: check_by_ssh!/usr/bin/sudo /usr/local/bin/monitoring-scripts/check_docker --no-ok --cpu 85:90
|
|
hostgroup: tag-nagios-checkdocker
|
|
- name: Docker Memory Usage
|
|
command: check_by_ssh!/usr/bin/sudo /usr/local/bin/monitoring-scripts/check_docker --no-ok --memory 90:95:%
|
|
hostgroup: tag-nagios-checkdocker
|
|
# nagios-checkpgsql
|
|
- name: PSQL
|
|
command: "check_by_ssh!/usr/lib/nagios/plugins/check_pgsql -H localhost -l nagios -p {{ secret_postgresql_monitoring_password }} -w 2 -c 5"
|
|
hostgroup: tag-nagios-checkpgsql
|
|
- name: PSQL Connections
|
|
command: "check_by_ssh!/usr/lib/nagios/plugins/check_pgsql -H localhost -l nagios -p {{ secret_postgresql_monitoring_password }} -w 2 -c 5 -q 'select (select count(*)::float used from pg_stat_activity) / (select setting::int max_conn from pg_settings where name=\\$\\$max_connections\\$\\$)' -W 0.7-0.8 -C 0.8-1.0"
|
|
hostgroup: tag-nagios-checkpgsql
|
|
# https://rhaas.blogspot.com/2020/02/useless-vacuuming.html
|
|
- name: PSQL Old Xacts
|
|
command: "check_by_ssh!/usr/lib/nagios/plugins/check_pgsql -H localhost -l nagios -p {{ secret_postgresql_monitoring_password }} -w 2 -c 5 -q 'select count(*)::float from pg_prepared_xacts where age(transaction) > 5000000' -W 500-1000 -C 1000-1000000"
|
|
hostgroup: tag-nagios-checkpgsql
|
|
- name: Unit postgresql.service
|
|
command: check_by_ssh!/usr/local/bin/monitoring-scripts/check_systemd_unit postgresql.service
|
|
hostgroup: tag-nagios-checkpgsql
|
|
# nagios-checkswap
|
|
- name: Swap Usage
|
|
command: check_by_ssh!/usr/lib/nagios/plugins/check_swap -w 20% -c 10%
|
|
hostgroup: tag-nagios-checkswap
|
|
# zerotier
|
|
- name: Unit zerotier-one.service
|
|
command: check_by_ssh!/usr/local/bin/monitoring-scripts/check_systemd_unit zerotier-one.service
|
|
hostgroup: tag-zt-personal
|
|
tags: [ nagios ]
|
|
- role: ingress
|
|
vars:
|
|
ingress_servers:
|
|
- name: netbox.desu.ltd
|
|
proxy_pass: http://netbox:8080
|
|
- name: nagios.desu.ltd
|
|
proxy_pass: http://nagios:80
|
|
- name: movie.desu.ltd
|
|
proxy_pass: http://movienight:8089
|
|
tags: [ web, docker, ingress ]
|