Hello, i am setting up monit for monitoring our servers. Please check my configuration file for monitrc. For some reason it starts failing from check filesystem rule which i have highlighted below. If i comment that whole section of for the check file system.. the error starts moving on to the next rules down................. not sure how to trouble shoot this error. This has been really frustrating. Any help is appreciated here...................
#### set daemon 120 # check services at 2-minute intervals set logfile /var/log/monit.log set idfile /var/lib/monit/id set statefile /var/lib/monit/state set mailserver smtp.gmail.com port 587 using tlsv1 with timeout 30 seconds username "[email protected]" password "~RollingStone2014!" set mail-format { from: [email protected] } set alert [email protected] # Email to send notifications set eventqueue basedir /var/lib/monit/events slots 100 set mail-format { subject: monit alert -- $EVENT $SERVICE message: $EVENT Service $SERVICE Received: Date: $DATE Action: $ACTION Host: $HOST Description: $DESCRIPTION } set httpd port 8181 and # Port used for the WEB access, listen on LOCALHOST use address localhost allow 0.0.0.0/0.0.0.0 allow admin:monit # Username/password ############################################################################### ## Services ############################################################################### check system localhost if loadavg (1min) > 4 then alert if loadavg (5min) > 2 then alert if memory usage > 75% then alert if swap usage > 25% then alert if cpu usage (user) > 70% then alert if cpu usage (system) > 30% then alert if cpu usage (wait) > 20% then alert # Check the binary Apache file (MD5 checksum + permission + UID + GUID) check file apache_bin with path /usr/lib/apache2/mpm-prefork/apache2 if failed permission 755 then unmonitor if failed uid root then unmonitor if failed gid root then unmonitor group web # Check apache service (service alive + resource consumption) check process apache with pidfile /var/run/apache2.pid start program = "/etc/init.d/apache2 start" with timeout 20 seconds stop program = "/etc/init.d/apache2 stop" if children > 150 then alert if children > 250 then restart if loadavg(5min) greater than 15 for 8 cycles then stop if totalcpu > 50% for 2 cycles then alert if totalcpu > 80% for 5 cycles then restart if failed host 127.0.0.1 port 80 protocol http then restart if 5 restarts within 5 cycles then timeout and alert group web # Check ROOT filesystem space usage--------------------------------> Sytax error start from the check filesystem rule below. check filesystem rootfs with path / if space usage > 80% for 5 times within 15 cycles then alert group server # Check DATA filesystem space usage check filesystem datafs with path /dev/xvda1 if space usage > 80% for 5 times within 15 cycles then alert group server # Check mysqld service (depends mysqld binary + init.d startup script) check process mysqld with pidfile /var/run/mysqld/mysqld.pid group database start program = "/etc/init.d/mysql start" with timeout 20 seconds stop program = "/etc/init.d/mysql stop" if failed host 127.0.0.1 port 3306 protocol mysql then restart if 5 restarts within 5 cycles then timeout and alert # Check Tomcat7 check process tomcat7 with pidfile /var/run/tomcat7.pid start program = "/etc/init.d/tomcat7 start" with timeout 60 seconds stop program = "/etc/init.d/tomcat7 stop" if failed host 127.0.0.1 port 8080 protocol http then restart if 5 restarts within 5 cycles then timeout and alert group web # Check mongodb process check process mongodb with pidfile /var/lib/mongodb/mongod.lock group database start program = "/etc/init.d/mongodb start" with timeout 20 seconds stop program = "/etc/init.d/mongodb stop" if failed host 127.0.0.1 port 27017 protocol http for 3 times within 5 cycles then restart if 5 restarts within 5 cycles then timeout and alert
-- To unsubscribe: https://lists.nongnu.org/mailman/listinfo/monit-general
