Monit によるサービス監視

  • インストール
$ sudo apt-get install monit

 

  • 設定
$ sudo vim /etc/monit/monitrc
set mailserver localhost
set alert root@localhost not on { instance, action }  #Set email address to receive alerts.
# to issue Monit commands from the command line
set httpd port 2812 and
        use address localhost
        allow localhost
check system example.com
    if loadavg (1min) > 4 then alert
    if loadavg (5min) > 2 then alert
    if memory usage > 75% then alert
    if swap usage > 25% then alert
    if cpu usage (user) > 70% then alert
    if cpu usage (system) > 30% then alert
    if cpu usage (wait) > 20% then alert

check filesystem rootfs with path / #Alert if low on disk space.
    if space usage > 90% then alert
# Restarting Unhealthy LEMP Services

check process nginx with pidfile /var/run/nginx.pid
    group www-data
    start program = "/bin/systemctl start nginx"
    start program = "/bin/systemctl stop nginx"
    if failed host octaviadata.com port 80 protocol http then restart
    if 5 restarts within 5 cycles then timeout

check process mysql with pidfile /var/run/mysqld/mysqld.pid
    start program = "/bin/systemctl start mysql"
    start program = "/bin/systemctl stop mysql"
    if failed unixsocket /var/run/mysqld/mysqld.sock then restart
    if 5 restarts within 5 cycles then timeout

check process php-fpm with pidfile /var/run/php/php7.0-fpm.pid
    start program = "/bin/systemctl start php7.0-fpm"
    start program = "/bin/systemctl stop php7.0-fpm"
    if failed unixsocket /var/run/php/php7.0-fpm.sock then restart
    if 5 restarts within 5 cycles then timeout

check process redis with pidfile  /var/run/redis/redis-server.pid
    start program = "/bin/systemctl start redis-server"
    start program = "/bin/systemctl stop redis-server"
    if failed host 127.0.0.1 port 6379 then restart
    if 5 restarts within 5 cycles then timeout
    if totalmem > 4000.0 MB for 5 cycles then alert

check process memcached with match memcached
    start program = "/bin/systemctl start memcached"
    stop program = "/bin/systemctl stop memcached"
    if failed host 127.0.0.1 port 11211 then restart
    if cpu > 70% for 2 cycles then alert
    if cpu > 98% for 5 cycles then restart
    if 2 restarts within 3 cycles then timeout

監視サーバからリモートサーバを監視する場合の設定

check host remote-example.com with address remote-example.com
    if failed
          port 443 type tcpSSL protocol http
       request "/healthcheck"
       for 5 times within 5 cycles
       then alert

 

  • 設定のテスト
$ sudo monit -t
Control file syntax OK

 

  • 起動
$ sudo service monit restart
$ sudo tail -f /var/log/monit.log
$ sudo monit status
$ sudo monit monitor all