#!/usr/bin/perl -w

# WebWatchDaemon
# Copyright 2003 John M. Grohol
# December 23, 2003
# v1.0

# -------------------------------------
# WebWatchDaemon allows you to not only
# check to see if a Web site is functioning, but to ensure
# that the correct page is online and contains a specific
# piece of content (called the keyphrase). This allows you
# to check not only that your site is operational, but 
# that your application is functioning as expected.
#
# Run this on a server that has Internet connectivity as a cron job, every
# X minutes you want to check on your server. Sample crontab line:
#
# 05 * * * * perl /home/user/johnsmith/webwatch.pl
#
# This line would run the script every hour at :05. If your
# watch page is very important to your site,
# you might run it every minute or every 5 minutes.
#
# Please note: You can run this script on your Web server.
# But for more robust applications, you should consider
# running it on a Web server external to your network. 
# This allows the script to test not only for content
# changes in the target page, but also network connectivity.
#
# -------------------------------------
# License: This software is copyright (C) 2003 John M. Grohol. It is distributed 
# under the terms of the GNU General Public License (GPL). Because it is licensed 
# free of charge, there is NO WARRANTY, it is provided AS IS. The author can not 
# be held liable for any damage that might arise from the use of this software. 
# Use it at your own risk. See http://www.gnu.org/ for details and more information.
# -------------------------------------
#

# Requires the following perl modules to be installed:

use LWP::UserAgent;
use HTTP::Request;
use HTTP::Response;

# Set your watch URL here

  my $watch_url	= "http://www.yourdomain.com/specific_page_to_be_watched.html";

# Set your keyphrase here

  my $keyphrase = "This is text that appears on the target watch URL";

# Alert and logging options

  my $alert     = 1;       	# Set to 1 to turn email alerts on; 0 to turn them off
  my $email	= "your\@contact_email_address.com";
  my $mailprog  = '/usr/lib/sendmail';
 
  my $logging   = 0;            # Set to 1 to turn logging on; 0 to turn it off
  my $logfile   = "/home/www/logs/www_watch.txt"; # Set to your logfile

# Other variables that may need changing

  $ENV{"TZ"} 	= "EST5EDT";  # Change to reflect your time zone if necessary
  my $date 	= "/bin/date";
  my $t 	= scalar(time);


# -------------- End Configuration --------------------------

# -----------------------------------------------------------
# Main
# -----------------------------------------------------------

  # This just sets the program to pretend to be a specific Web browser version

  my $br_ver    = "";
  my $rand_num  = ""; srand( time() ^ ($$ + ($$ << 15)) );
  $rand_num     = int(rand(50));
   if ($rand_num gt 30) { $br_ver = "5";
      } elsif ($rand_num lt 20) { $br_ver = "01";
      } else { $br_ver = "0"; } 

  # Go get the watch content page

  my $content = &get_watch_page($watch_url,$br_ver);
  my @page; my $whenn = 0;

  @page = split(/\n/,$content);
  foreach $line (@page) {
     if ($line =~ /$keyphrase/) { $whenn++; }
  }

  # We only alert/log failures

  if ($whenn == 0) {

	# If alerts are enabled, send an email to the administrator's contact address

	if ($alert) {

   open (MAIL, "|$mailprog -t") || die "Can't open $mailprog!\n";
   print MAIL "To: $email\n";
   print MAIL "From: WebWatchDaemon <$email>\n";
   print MAIL "X-Priority: High\n";
   print MAIL "Subject: Alert! Content is unreachable\n\n";

   print MAIL qq~

   Hi! I'm the WebWatchDaemon for your Website. 
   On my most recent watch, I noticed that the following page did not
   have the required text returned, suggesting it may not be available:

	$watch_url

   I suggest you look into it at your earliest convenience, thank you!

   - WebWatchDaemon

   ~;

   close(MAIL);

	}

	# If logging is enabled, write the event to the logfile

	if ($logging) {

   		open (WRITEIT, ">>$logfile");
   		flock(WRITEIT, LOCK_EX);
   		seek (WRITEIT, 0,2);
   		print WRITEIT "$date|$watch_url|Failure\n";
   		close(WRITEIT);
	}


  } # end if
 
exit(0);

# ---------------------------------------------
# Subroutine: Get the watched content page
# --------------------------------------------- 

sub get_watch_page()
{
  my $url      	= shift(@_);
  my $br_ver    = shift(@_);
  my $ua 	= LWP::UserAgent->new();
  my $ag_ver 	= "Mozilla/4.0 (compatible; MSIE 5." . $br_ver . "; Windows 98)";
  $ua->agent($ag_ver);
  my $req 	= HTTP::Request->new(GET=> $url);
  my $response 	= $ua->request($req);

    if ($response->is_error()) {  $response->error_as_HTML(); }

  $content 	= $response->content();
  return $content;
}


1;


