# This script will download player's games from the KGS archives
# It requires a file containing a list of kgs usernames
# The name of this file is stored in $playerfile
# The path to this file, and the directory where the archives
# will be saved, is stored in $localbaseurl
# A player's index page is also stored there as user.html
# Written by D.Gilder 2007

use strict;
use warnings;
use HTML::TokeParser;
use LWP::UserAgent;

# Change these two variables as necessary
#------------------------------------------
my $localbaseurl = '/home/dan/more/KGS/tar/';
my $playerfile = 'kgsplayers.txt';
#------------------------------------------

my $webbaseurl = 'http://www.gokgs.com/';
my $ua = LWP::UserAgent->new;

open(INPUT, "<", $localbaseurl.$playerfile) or die "Couldn't open $playerfile\n";
my $elapsedtime;
while (<INPUT>) {
  chomp;
  $elapsedtime = time;
  unless (/^#/) {
    print $_,"\n";
    getuserpage($_);
    # wait at least 4 seconds between calls to getuserpage
    # as requested by William Shubert
    # Do not delete the following line
    sleep $elapsedtime - time + 4 if time - $elapsedtime < 4;
  }
}
close(INPUT) or die "Couldn't open $playerfile\n";

sub myconnect {
  my $url = shift;
  my $reply = $ua->get($webbaseurl.$url);
  # Check the outcome of the response
  $reply->is_success or die 'Couldn\'t connect to '.$url.' Stopped '."$!";
  return $reply;
}

sub getuserpage {
  my $user = shift;
  my $url = 'gameArchives.jsp?user='.$user;
  my $mylocalurl = $localbaseurl.'user.html';
  open(OUTFILE, ">",$mylocalurl) or die 'Can\'t open '.$mylocalurl;
    my $reply = myconnect($url);
    print OUTFILE $reply->content;
  close OUTFILE or die 'Can\'t close '.$mylocalurl;

  my $p = HTML::TokeParser->new($mylocalurl);

# Skip to start of user data

  $p->get_tag("table");
  $p->get_tag("table");
  while (1) {
    my $a_token = $p->get_tag("a");
    my $str = $a_token->[1]{href};
    last unless $str =~ /y=(.+)&m=(.+)/;
    my $file = $user.'-'.$1.'-'.$2.'.tar.gz';
    my $target = $webbaseurl.'servlet/archives/en_US/'.$file;
    system("lwp-download $target $localbaseurl") unless -e $localbaseurl.$file;
  }
}
