# -*- mode: Perl; -*-

package NewsClipper::Handler::Acquisition::dilbert;

use vars qw( @ISA $VERSION %handlerInfo );

$handlerInfo{'Author_Name'}              = 'John Tunison';
$handlerInfo{'Author_Email'}             = 'nosinut@cmu.edu';
$handlerInfo{'Maintainer_Name'}          = 'John Tunison';
$handlerInfo{'Maintainer_Email'}         = 'nosinut@cmu.edu';
$handlerInfo{'Description'}              = <<'EOF';
Dilbert comic snarf; fetches archives, too.
EOF
$handlerInfo{'Category'}                 = 'Comics';
$handlerInfo{'URL'}                      = <<'EOF';
http://www.dilbert.com/
EOF
$handlerInfo{'License'}                  = '';
$handlerInfo{'For_News_Clipper_Version'} = '1.18';
$handlerInfo{'Language'}                 = 'English';
$handlerInfo{'Notes'}                    = <<'EOF';
Modified from the original handler written by David Coppit
EOF
$handlerInfo{'Syntax'}                   = <<'EOF';
<input name=dilbert days=X>
  Returns an array of image links
  X: number of days of comics to fetch, including the current day. If
     omitted, defaults to 1
EOF

use strict;
use NewsClipper::Handler;
@ISA = qw(NewsClipper::Handler);

# - The first number should be incremented when a change is made to the
#   handler that will break people's input files.
# - The second number should be incremented when a change is made that won't
#   break people's input files, but changes the functionality.
# - The third number should be incremented when only a bugfix is applied.

$VERSION = do {my @r=('1.1.3'=~/\d+/g);sprintf "%d."."%02d"x$#r,@r};

# ------------------------------------------------------------------------------

sub ComputeURL
{
  my $self = shift;
  my $attributes = shift;

  my $url = 'http://www.dilbert.com/';

  return $url;
}

# ------------------------------------------------------------------------------

# This subroutine checks the handler's attributes to make sure they are valid,
# and sets any default attributes if necessary.

sub ProcessAttributes
{
  my $self = shift;
  my $attributes = shift;
  my $handlerRole = shift;

  # Set defaults here. You can safely delete this function if your handler has
  # no attributes with default values.

  # $attributes->{'some_attribute'} = 'default_value'
  #   unless defined $attributes->{'some_attribute'};
  $attributes->{days} = 1 unless defined $attributes->{days};

  # Verify any attributes you need to here. Output an error and return undef
  # if something is wrong.

  # unless ($attributes->{somevalue} > 0)
  # {
  #   error "The \"somevalue\" attribute for handler \"HANDLERNAME\" " .
  #     "should be greater than 0.\n";
  #   return undef;
  # }

  return $attributes;
}

# ------------------------------------------------------------------------------

# This function is used to get the raw data from the URL.
sub Get
{
  my $self = shift;
  my $attributes = shift;

  my $url = $self->ComputeURL($attributes);
  my $archive_cruft;
  my ($links, $i, $entry, $data);

  if ($attributes->{days} > 1)
  {
    my $howmany = $attributes->{days} - 1;

    # First get the URLs of the comics pages from the main archive page
    $url = "http://www.dilbert.com/comics/dilbert/archive/";
    $archive_cruft = &GetHtml($url, '(?i)>choose date', '(?i)</SELECT>');

    return undef unless defined $archive_cruft;

    my @archives = split /\n/, $$archive_cruft;
    my @archive_pages;
    foreach $entry (@archives)
    {
      $entry =~ s/<OPTION VALUE="(.*)">(.*)/$1/;
      if($entry)
      {
        $entry = "http://www.dilbert.com$entry";
        push @archive_pages, $entry;
      }
    }
    pop @archive_pages;

    # Now visit each of the archive pages and get the comic link
    $i = 0;
    $url = "http://www.dilbert.com/comics/dilbert/archive/";

    my @images;

    while($i <= $howmany)
    {
      $url = pop @archive_pages;
      $links = &GetLinks($url, '^','$');
      next unless defined $links;

      my @links = @$links;

      @links = grep { $$_ =~ /\d{6}\.gif/ } @links;

      # Cache the image locally
      my $cachedimage =
        RunHandler('cacheimages','filter',$links[0],$attributes);

      push @images,$cachedimage;
      $i++;
    }

    return \@images;
  }
  else
  {
    $links = &GetLinks($url, '^','$');
    return undef unless defined $links;

    my @links = @$links;

    @links = grep { $$_ =~ /\d{13}\.gif/ } @links;
    return undef unless defined $links[0];

    # Cache the image locally
    my $cachedimage =
      RunHandler('cacheimages','filter',$links[0],$attributes);

    my @images = ($cachedimage);

    return \@images;
  }
}

# ------------------------------------------------------------------------------

sub GetDefaultHandlers
{
  my $self = shift;
  my $inputAttributes = shift;

  my $returnVal =<<'  EOF';
    <filter name=limit number=10>
    <output name=array numcols=1 prefix='' suffix='' separator='<br>'>
  EOF

  return $returnVal;
}

# ------------------------------------------------------------------------------

sub GetUpdateTimes
{
  return ['2,5 EST'];
}

1;
