Author: sparky
Date: Mon Jan  5 17:30:06 2009
New Revision: 10049

Modified:
   toys/fun/rsget.pl
Log:
- parrallel download
- added netload support (needs gocr and ocrad for captchas)


Modified: toys/fun/rsget.pl
==============================================================================
--- toys/fun/rsget.pl   (original)
+++ toys/fun/rsget.pl   Mon Jan  5 17:30:06 2009
@@ -5,9 +5,67 @@
 #
 use strict;
 use warnings;
-use WWW::Curl::Easy;
+use Time::HiRes;
+
+my $checklist = 1;
+my %gotlist;
+
+package Line; # {{{
 
 $| = 1;
+my $actual_line = 0;
+my $max_line = 0;
+my $columns = $ENV{COLUMNS} || 124; # XXX
+
+sub new
+{
+       my $proto = shift;
+    my $class = ref( $proto ) || $proto;
+
+       my $steps = $max_line - $actual_line;
+       $actual_line = $max_line;
+       my $move = "";
+
+       if ( $steps < 0 ) {
+               return undef;
+       } elsif ( $steps > 0 ) {
+               $move = "\033[" . $steps . "B";
+       }
+
+       print $move . "\n\r\033[K";
+
+       my $line = $max_line++;
+       my $self = \$line;
+       return bless $self, $class;
+}
+
+sub print
+{
+       my $self = shift;
+       my $text = shift;
+       my $line = $$self;
+
+       return undef if $line >= $max_line;
+       my $steps = $line - $actual_line;
+       $actual_line = $line;
+       my $move = "";
+
+       if ( $steps < 0 ) {
+               $move = "\033[" . (-$steps) . "A";
+       } elsif ( $steps > 0 ) {
+               $move = "\033[" . $steps . "B";
+       }
+       if ( length $text > $columns ) {
+               $text = (substr $text, 0, $columns - 4) . '...';
+       }
+       
+       print $move . "\r\033[K" . $text;
+}
+
+# }}}
+package Curl; # {{{
+use WWW::Curl::Easy;
+use WWW::Curl::Multi;
 
 my $curl_headers = [
        'User-Agent: Mozilla/5.0 (X11; U; Linux ppc; ca-AD; rv:1.8.1.17) 
Gecko/20080926 PLD/3.0 (Th) Iceape/1.1.12',
@@ -16,61 +74,106 @@
        'Accept-Language: ca,en-us;q=0.7,en;q=0.3',
        ];
 
-sub body_file {
-       my ($chunk, $self) = @_;
+sub file_init
+{
+       my $self = shift;
+       my $curl = $self->{curl};
 
-       unless ( exists $self->{total} ) {
-               my $info = $self->{curl}->getinfo( 
CURLINFO_CONTENT_LENGTH_DOWNLOAD );
-               $self->{total} = $info || '?';
-               $self->{start} = time;
+       {
+               my $f_len = $curl->getinfo( CURLINFO_CONTENT_LENGTH_DOWNLOAD );
+               $self->{size_total} = $f_len || 0;
+               $self->{size_got} = 0;
        }
-       my $len = length $chunk;
-       $self->{got} += $len;
 
-       my $file = $self->{file};
-       print $file $chunk;
+       $self->{time_start} = time;
 
-       my $size = "$self->{got} / $self->{total}";
-       my $eta = "";
-       my $time = time;
-       if ( int $self->{total} > 0 ) {
-               $size .= sprintf " [%.2f%%]", $self->{got} * 100 / 
$self->{total};
-               if ( $time > $self->{start} ) {
-                       my $tleft = ($self->{total} - $self->{got}) / 
$self->{got} * ($time - $self->{start});
-                       $eta = sprintf "%2d:%.2d", $tleft / 60, $tleft % 60;
+       if ( $self->{head} =~ 
/^Content-Disposition:\s*attachment;\s*filename\s*=\s*"?(.*?)"?\s*$/im ) {
+               $self->{file_name} = $1;
+       } else {
+               my $eurl = $curl->getinfo( CURLINFO_EFFECTIVE_URL );
+               $eurl =~ s#^.*/##;
+               $self->{file_name} = $eurl;
+       }
+
+       {
+               my $fn = $self->{file_name};
+               if ( length $fn >= $columns - 30 ) {
+                       my $l = int ( ( $columns - 34 ) / 2 );
+                       $fn =~ s/^(.{$l}).*(.{$l})$/$1...$2/;
                }
+
+               my $net = $self->{obj}->{net};
+               $self->{obj}->{netname} = "[$net] $fn: ";
        }
-       my $speed = "???";
-       $speed = sprintf "%.2f", $self->{got} / ($time - $self->{start}) / 1024
-               if $time > $self->{start};
 
-       print "\r$self->{fn}: $size; ${speed}KB/s $eta  ";
+       {
+               open my $f_out, '>', $self->{file_name};
+               $self->{file} = $f_out;
+       }
+}
+
+sub body_file
+{
+       my ($chunk, $self) = @_;
+
+       file_init( $self ) unless exists $self->{size_total};
+
+       my $len = length $chunk;
+       $self->{size_got} += $len;
+
+       my $file = $self->{file};
+       print $file $chunk;
 
        return $len;
 }
 
-sub body_scalar {
+sub body_scalar
+{
        my ($chunk, $scalar) = @_;
        $$scalar .= $chunk;
        return length $chunk;
 }
 
-open NULL, '>', '/dev/null';
-sub curl
+
+my $mcurl = new WWW::Curl::Multi;
+my %curling;
+
+sub start
 {
        my $url = shift;
+       my $obj = shift;
        my %opts = @_;
 
        my $curl = new WWW::Curl::Easy;
 
-       $curl->setopt( CURLOPT_WRITEHEADER, \*NULL);
-       $curl->setopt( CURLOPT_MAXREDIRS, 10);
-       $curl->setopt( CURLOPT_FOLLOWLOCATION, 1);
-       $curl->setopt( CURLOPT_HTTPHEADER, $curl_headers);
-       $curl->setopt( CURLOPT_URL, $url);
-       $curl->setopt( CURLOPT_REFERER, $opts{referer})
+       my $id = 1;
+       ++$id while exists $curling{ $id };
+
+       my $ecurl = {
+               curl => $curl,
+               id => $id,
+               got => 0,
+               head => "",
+               body => "",
+               obj => $obj,
+       };
+
+    $curl->setopt( CURLOPT_PRIVATE, $id );
+
+       if ( $opts{cookies} ) {
+               $curl->setopt( CURLOPT_COOKIEJAR, $opts{cookies} );
+               $curl->setopt( CURLOPT_COOKIEFILE, $opts{cookies} )
+                       if -r $opts{cookies};
+       }
+       $curl->setopt( CURLOPT_HEADERFUNCTION, \&body_scalar );
+       $curl->setopt( CURLOPT_WRITEHEADER, \$ecurl->{head} );
+       $curl->setopt( CURLOPT_MAXREDIRS, 10 );
+       $curl->setopt( CURLOPT_FOLLOWLOCATION, 1 );
+       $curl->setopt( CURLOPT_HTTPHEADER, $curl_headers );
+       $curl->setopt( CURLOPT_URL, $url );
+       $curl->setopt( CURLOPT_REFERER, $opts{referer} )
                if $opts{referer};
-       $curl->setopt( CURLOPT_ENCODING, 'gzip,deflate');
+       $curl->setopt( CURLOPT_ENCODING, 'gzip,deflate' );
 
        if ( $opts{post} ) {
                my $post = $opts{post};
@@ -78,103 +181,654 @@
                $curl->setopt( CURLOPT_POSTFIELDS, $post );
        }
 
-       my $body;
-       if ( $opts{file} ) {
-               $curl->setopt( CURLOPT_WRITEFUNCTION, \&body_file);
-               $body = {
-                       curl => $curl,
-                       got => 0,
-                       fn => ($opts{net} || "").$opts{file}
-               };
-               open my $f_out, '>', $opts{file};
-               $body->{file} = $f_out;
-               $curl->setopt( CURLOPT_FILE, $body );
+       if ( $opts{save} ) {
+               $curl->setopt( CURLOPT_WRITEFUNCTION, \&body_file );
+               $curl->setopt( CURLOPT_WRITEDATA, $ecurl );
        } else {
-               $curl->setopt( CURLOPT_WRITEFUNCTION, \&body_scalar);
-               $curl->setopt( CURLOPT_FILE, \$body);
+               $curl->setopt( CURLOPT_WRITEFUNCTION, \&body_scalar );
+               $curl->setopt( CURLOPT_WRITEDATA, \$ecurl->{body} );
        }
 
-       if ( $curl->perform != 0 ) {
-               my $err = $curl->errbuf;
-               print "error: $err\n";
+       $curling{ $id } = $ecurl;
+    $mcurl->add_handle( $curl );
+}
+
+sub finish
+{
+       my $id = shift;
+       my $err = shift;
+
+       my $ecurl = $curling{ $id };
+       delete $curling{ $id };
+
+       my $curl = $ecurl->{curl};
+       delete $ecurl->{curl}; # remove circular dep
+
+       my $obj = $ecurl->{obj};
+       delete $ecurl->{obj};
+
+       if ( $ecurl->{file} ) {
+               close $ecurl->{file};
+               $obj->print( donemsg( $ecurl ) );
+       }
+
+       if ( $err ) {
+               my $error = $curl->errbuf;
+               $obj->print( "error: $err" );
+               $obj->stage1();
                return undef;
        }
 
-       if ( $opts{file} ) {
-               close $body->{file};
-               delete $body->{curl}; # remove circular dep
-               return $curl->getinfo( CURLINFO_CONTENT_LENGTH_DOWNLOAD );
+       if ( $obj->{curl_next} ) {
+               my $func = $obj->{curl_next};
+               my $eurl = $curl->getinfo( CURLINFO_EFFECTIVE_URL );
+               my $body = $ecurl->{body};
+               
+               &$func( $obj, $body, $eurl );
        }
-       return $body;
 }
 
-sub rsget
+sub perform
 {
-       my $file = shift;
-       (my $fn = $file) =~ s#^.*/##;
-       my $try = 10;
+       my $running = scalar keys %curling;
+       return unless $running;
+       my $act = $mcurl->perform();
+       return if $act == $running;
+
+       while ( my ($id, $rv) = $mcurl->info_read() ) {
+               next unless $id;
 
-rsget_restart:
-       $try -= 1;
-       if ( $try <= 0 ) {
-               return "problems";
+               finish( $id, $rv );
        }
+}
+
+sub print_status
+{
+       my $time = time;
+
+       foreach my $ecurl ( values %curling ) {
+               next unless exists $ecurl->{size_total};
+               my $size_got = $ecurl->{size_got};
+               my $size_total = $ecurl->{size_total};
+
+               my $size = "$size_got / $size_total";
+               my $eta = "";
+               my $time_diff = $time - $ecurl->{time_start};
+               if ( $size_total > 0 ) {
+                       $size .= sprintf " [%.2f%%]", $size_got * 100 / 
$size_total;
+                       if ( $time_diff > 0 ) {
+                               my $tleft = ($size_total - $size_got) * 
$time_diff / $size_got;
+                               $eta = main::s2string( $tleft );
+                       }
+               }
+               my $speed = "???";
+               $speed = sprintf "%.2f", $size_got / ( $time_diff * 1024 )
+                       if $time_diff > 0;
+
+               $ecurl->{obj}->print( "$size; ${speed}KB/s $eta" );
+       }
+}
+
+sub donemsg
+{
+       my $ecurl = shift;
+
+       my $size_total = $ecurl->{size_total};
+
+       my $time_diff = time() - $ecurl->{time_start};
+       my $eta = main::s2string( $time_diff );
+       my $speed = sprintf "%.2f", $size_total / ( $time_diff * 1024 );
+
+       return "DONE $size_total; ${speed}KB/s $eta";
+}
+
+
+# }}}
+package Wait; # {{{
+
+my %waiting;
+
+sub start
+{
+       my $obj = shift;
+       my $time = shift;
+       $obj->{wait_until} = time + $time;
+
+       my $id = 0;
+       ++$id while exists $waiting{ $id };
+       $waiting{ $id } = $obj;
+}
+
+sub finish
+{
+       my $id = shift;
+
+       my $obj = $waiting{ $id };
+       delete $waiting{ $id };
+
+       my $func = $obj->{wait_next};
+               
+       &$func( $obj );
+}
+
+sub perform
+{
+       my $time = time;
+
+       foreach my $id ( keys %waiting ) {
+               my $obj = $waiting{ $id };
+               my $left = $obj->{wait_until} - $time;
+               if ( $left <= 0 ) {
+                       finish( $id );
+               } else {
+                       $obj->print( $obj->{wait_msg} . main::s2string( $left ) 
);
+               }
+       }
+}
+
+# }}}
+package Get; # {{{
+
+my $slots = 1;
+my %running;
+sub makenew
+{
+       my $net = shift;
+       my $class = shift;
+       my $url = shift;
+
+       return {} if $gotlist{ $url };
+       $running{ $net } = {} unless exists $running{ $net };
+       my $rn = $running{ $net };
+       return {} if $slots <= scalar keys %$rn;
+
+       my $id = 1;
+       ++$id while exists $rn->{ $id };
+
+       my $line = new Line;
+
+       ( my $fn = $url ) =~ s#/+$##; ## vim
+       $fn =~ s#^.*/##;
+
+       my $self = {
+               @_,
+               url => $url,
+               id => $id,
+               try => 10,
+               line => $line,
+               net => $net,
+               netname => "[$net] $fn: ",
+       };
+
+       $rn->{ $id } = bless $self, $class;
+
+       $self->stage1();
+       return $self;
+}
+
+sub print
+{
+       my $self = shift;
+       my $text = shift;
+       my $line = $self->{line};
+       $line->print( $self->{netname} . $text );
+}
+
+sub curl
+{
+       my $self = shift;
+       my $url = shift;
+       my $next_stage = shift;
+
+       $self->{curl_next} = $next_stage;
+       Curl::start( $url, $self, @_ );
+}
+
+sub wait
+{
+       my $self = shift;
+       my $time = shift;
+       my $next_stage = shift;
+       my $msg = shift || "waiting";
+
+       $self->{wait_next} = $next_stage;
+       $self->{wait_msg} = $msg . " ";
+       Wait::start( $self, $time );
+}
+
+sub finish
+{
+       my $self = shift;
+
+       my $url = $self->{url};
+       $gotlist{ $url } = 1;
+
+       my $net = $self->{net};
+       my $id = $self->{id};
+       delete $running{ $net }->{ $id };
+
+       $checklist = 1;
+}
+
+sub download
+{
+       my $self = shift;
+
+       $self->{curl_next} = \&finish;
+       $self->print("starting download");
+       Curl::start( $self->{file_url}, $self, save => 1, @_ );
+}
+
+sub error
+{
+       my $self = shift;
+       my $msg = shift;
+
+       $self->print( $msg );
+}
+
+# }}}
+package Get::RapidShare; # {{{
+
+BEGIN {
+       our @ISA;
+       @ISA = qw(Get);
+}
+
+sub new
+{
+       my $proto = shift;
+       my $class = ref $proto || $proto;
+       my $url = shift;
<<diff output has been trimmed to 500 lines, 509 line(s) remained.>>
_______________________________________________
pld-cvs-commit mailing list
[email protected]
http://lists.pld-linux.org/mailman/listinfo/pld-cvs-commit

Reply via email to