Monit セットアップ

node/1187

Monit によるサービス監視を行う。

1 . インストール

$ sudo apt-get install monit

2 . 監視のための情報確認

CPU コア数

$ grep ^cpu\\scores /proc/cpuinfo | uniq
cpu cores       : 8
$ lscpu | grep -i core
Thread(s) per core:  2
Core(s) per socket:  8

3 . 設定

$ sudo vi /etc/monit/monitrc
set mailserver localhost
set alert root not on { instance, action }  #Set email address to receive alerts.
# to issue Monit commands from the command line
set httpd port 2812 and
        use address localhost
        allow localhost
check system $HOST
    if loadavg (15min) > 8 then alert
    if cpu usage > 95% for 10 cycles then alert
    if memory usage > 75% then alert
    if swap usage > 25% then alert

check filesystem datafs with path /dev/md3
    start program  = "/bin/mount /data"
    stop program  = "/bin/umount /data"
    if failed permission 660 then unmonitor
    if failed uid "root" then unmonitor
    if failed gid "disk" then unmonitor
    if space usage > 80% for 5 times within 15 cycles then alert
    if space usage > 95% then stop
    if inode usage > 90% then alert
    if inode usage > 95% then stop
    if read rate > 1 MB/s for 5 cycles then alert
    if read rate > 500 operations/s for 5 cycles then alert
    if write rate > 1 MB/s for 5 cycles then alert
    if write rate > 500 operations/s for 5 cycles then alert
    if service time > 10 milliseconds for 3 times within 5 cycles then alert
    group server
# Restarting Unhealthy LEMP Services

check process nginx with pidfile /var/run/nginx.pid
    group www-data
    start program = "/bin/systemctl start nginx" with timeout 60 seconds
    start program = "/bin/systemctl stop nginx"
    if failed host octaviadata.com port 80 protocol http then restart
    if 5 restarts within 5 cycles then timeout

check process mariadb with pidfile /var/run/mysqld/mysqld.pid
    start program = "/bin/systemctl start mysql" with timeout 60 seconds
    start program = "/bin/systemctl stop mysql"
    if failed unixsocket /var/run/mysqld/mysqld.sock then restart
    if 5 restarts within 5 cycles then timeout

check process php-fpm with pidfile /var/run/php/php7.2-fpm.pid
    start program = "/bin/systemctl start php7.2-fpm" with timeout 60 seconds
    start program = "/bin/systemctl stop php7.2-fpm"
    if failed unixsocket /var/run/php/php7.2-fpm.sock then restart
    if 5 restarts within 5 cycles then timeout

check process redis with pidfile /var/run/redis/redis-server.pid
    start program = "/bin/systemctl start redis-server" with timeout 60 seconds
    start program = "/bin/systemctl stop redis-server"
    if failed host 127.0.0.1 port 6379 then restart
    if 5 restarts within 5 cycles then timeout
    if totalmem > 16000.0 MB for 5 cycles then alert

4 . 監視サーバ設定

監視サーバからリモートサーバを監視する場合の設定

check host remote-example.com with address remote-example.com
    if failed
          port 443 type tcpSSL protocol http
       request "/healthcheck"
       for 5 times within 5 cycles
       then alert

5 . 設定確認とリロード

$ sudo monit -t && sudo monit reload
Control file syntax OK
Reinitializing monit daemon

6 . ログ確認、監視設定

$ sudo tail -f /var/log/monit.log
$ sudo monit monitor all

7 . 状態確認

$ sudo monit status
$ sudo monit summary