#use base qw/OpenILS::Application/;
use strict;
use warnings;
use DBI;
use LWP::UserAgent;
use LWP::Simple;
use HTML::LinkExtor;
use URI::URL;
use XML::LibXML;


###################################
###  Configuration  ###############
###################################
my $database    = "evergreen";
my $username    = "evergreen";
my $password    = "evergreen";
my $hostname    = "localhost";
my $outputfile  = "report_dead_links";
my $limit       = 500;  # 0 = unlimited
my $timeout_limit = 5;  #Number of seconds until page timeout when verifying a link



# Connect To Database
my $db = DBI->connect("DBI:Pg:dbname=$database;host=$hostname", $username, $password);

# Impose limit on number of records if needed
my $sql_limit = "";
if($limit != 0){
  $sql_limit = "LIMIT $limit"; 
}

print "Building query...\n";


#SQL query, returns all links and found in MARC records
my $links = $db->prepare(
  "SELECT id,
         regexp_matches(
             marc,
             '.+?<datafield tag=\"856\".+?><subfield code=\"u\">([^<]+).*</subfield>',
             'g'
             ) as URL
         FROM biblio.record_entry
         WHERE marc LIKE '%<datafield tag=\"856\"%' and deleted = 'False' ORDER BY id DESC $sql_limit");

$links->execute(); 


#create the user agent object for testing if a link is valid
my $ua = LWP::UserAgent->new;
$ua->timeout($timeout_limit);
# deleting the old content
unlink("$outputfile.csv");

#print csv header
open FH, ">>$outputfile.csv" or die "$!";
print FH "Record ID, Link, Error \n";
close FH;

print "Looping through all links, this may take a while, please be patient...\n";

while(my $link = $links->fetchrow_hashref()){

  for(@{$link->{'url'}}){

    my $req = HTTP::Request->new('GET' => $_);
    my $res = $ua->request($req);

    if(not $res->is_success) 
    {
      ##open and close file so we can read see it as we go and we only open it when we need it
      open FH, ">>$outputfile.csv" or die "$!";

      print FH "" . $link->{'id'}. "," . $_ . "," . $res->status_line; 
      print FH "\n";

      #close handle
      close FH;
    }
  }

  print ".";
}

print "\n\nSuccessful, results in $outputfile.csv";

#close the databse connection
$db->disconnect();
