Author: sparky
Date: Thu Oct 29 03:48:11 2009
New Revision: 10874

Added:
   toys/rsget.pl/RSGet/Plugin.pm
      - copied, changed from rev 10863, toys/rsget.pl/RSGet/Processor.pm
   toys/rsget.pl/RSGet/Processor.pm
Modified:
   toys/rsget.pl/RSGet/Captcha.pm
   toys/rsget.pl/RSGet/Dispatch.pm
   toys/rsget.pl/RSGet/FileList.pm
   toys/rsget.pl/RSGet/Get.pm
   toys/rsget.pl/RSGet/HTTPRequest.pm
   toys/rsget.pl/RSGet/ListAdder.pm
   toys/rsget.pl/RSGet/Main.pm
   toys/rsget.pl/RSGet/Tools.pm
Log:
- compile plugins on demand, lowers memory usage in typical cases


Modified: toys/rsget.pl/RSGet/Captcha.pm
==============================================================================
--- toys/rsget.pl/RSGet/Captcha.pm      (original)
+++ toys/rsget.pl/RSGet/Captcha.pm      Thu Oct 29 03:48:11 2009
@@ -109,7 +109,7 @@
                return;
        }
 
-       my $getter = $getters{ $self->{_pkg} };
+       my $getter = RSGet::Plugin::from_pkg( $self->{_pkg} );
        my $dir = "captcha/$getter->{short}/$subdir";
        mkpath( $dir ) unless -d $dir;
 

Modified: toys/rsget.pl/RSGet/Dispatch.pm
==============================================================================
--- toys/rsget.pl/RSGet/Dispatch.pm     (original)
+++ toys/rsget.pl/RSGet/Dispatch.pm     Thu Oct 29 03:48:11 2009
@@ -161,7 +161,11 @@
        my $outif = find_free_if( $pkg, $working, get_slots( $cmd, 
$getter->{slots} ) );
        return unless defined $outif;
 
-       my $obj = RSGet::Get::new( $pkg, $cmd, $uri, $options, $outif );
+       my $obj = $getter->start( $cmd, $uri, $options, $outif );
+       if ( not $obj and $getter->{error} ) {
+               $options->{error} = $getter->{error};
+               return;
+       }
        $working->{ $uri } = $obj if $obj;
        
        return $obj;
@@ -275,31 +279,6 @@
        }
 }
 
-sub getter
-{
-       my $uri = shift;
-       foreach my $getter ( values %getters ) {
-               foreach my $re ( @{ $getter->{uri} } ) {
-                       return $getter
-                               if $uri =~ m{^http://(?:www\.)?$re};
-               }
-       }
-       return undef;
-}
-
-sub unigetter
-{
-       my $uri = shift;
-       my $getter = getter( $uri );
-       if ( $getter ) {
-               my $unify = $getter->{unify};
-               $uri = &$unify( $uri );
-               return $getter, $uri;
-       }
-       return undef, $uri;
-}
-
-
 1;
 
 # vim: ts=4:sw=4

Modified: toys/rsget.pl/RSGet/FileList.pm
==============================================================================
--- toys/rsget.pl/RSGet/FileList.pm     (original)
+++ toys/rsget.pl/RSGet/FileList.pm     Thu Oct 29 03:48:11 2009
@@ -165,8 +165,10 @@
                                next;
                        } elsif ( m{^(http://)?(.*?)$} ) {
                                my $proto = $1 || "http://";;
-                               my ( $getter, $uri ) = 
RSGet::Dispatch::unigetter( $proto . $2 );
+                               my $uri = $proto . $2;
+                               my $getter = RSGet::Plugin::from_uri( $uri );
                                if ( $getter ) {
+                                       $uri = $getter->unify( $uri );
                                        $options = {};
                                        $decoded{ $uri } = [ $getter, $options 
];
                                        next;

Modified: toys/rsget.pl/RSGet/Get.pm
==============================================================================
--- toys/rsget.pl/RSGet/Get.pm  (original)
+++ toys/rsget.pl/RSGet/Get.pm  Thu Oct 29 03:48:11 2009
@@ -48,21 +48,20 @@
 
 sub new
 {
-       my ( $pkg, $cmd, $uri, $options, $outif ) = @_;
-       my $getter = $getters{ $pkg };
+       my ( $getter, $cmd, $uri, $options, $outif ) = @_;
 
        my $self = {
                _uri => $uri,
                _opts => $options,
                _try => 0,
                _cmd => $cmd,
-               _pkg => $pkg,
+               _pkg => $getter->{pkg},
                _outif => $outif,
                _id => (sprintf "%.6x", int rand 1 << 24),
                _last_dump => 0,
                make_cookie( $getter->{cookie}, $cmd ),
        };
-       bless $self, $pkg;
+       bless $self, $getter->{pkg};
        $self->bestinfo();
 
        if ( verbose( 2 ) or $cmd eq "get" ) {
@@ -94,7 +93,7 @@
        return unless $line;
 
        my $outifstr = $self->{_outif} ? "[$self->{_outif}]" :  "";
-       my $getter = $getters{ $self->{_pkg} };
+       my $getter = RSGet::Plugin::from_pkg( $self->{_pkg} );
        new RSGet::Line( "[$getter->{short}]$outifstr ", $self->{_name} . ": " 
. $text );
 }
 

Modified: toys/rsget.pl/RSGet/HTTPRequest.pm
==============================================================================
--- toys/rsget.pl/RSGet/HTTPRequest.pm  (original)
+++ toys/rsget.pl/RSGet/HTTPRequest.pm  Thu Oct 29 03:48:11 2009
@@ -567,7 +567,7 @@
        my $r = '<fieldset id="f_listask"><legend>Select clone</legend>'
                . '<ul class="flist">';
        my ( $uri, $options, $clones ) = @$ask;
-       my $getter = RSGet::Dispatch::getter( $uri );
+       my $getter = RSGet::Plugin::from_uri( $uri );
 
        my $list_ids = $list->{ids};
        $list_ids->{addclone} = { uri => $uri };
@@ -581,7 +581,7 @@
                                name => $ucd->[1],
                                size => $ucd->[3],
                        };
-                       my $getter = RSGet::Dispatch::getter( $uri );
+                       my $getter = RSGet::Plugin::from_uri( $uri );
                        $r .= file_info( $list_ids, "addclone", $uri, $getter, 
$options, ['SELECT'] );
                }
        }

Modified: toys/rsget.pl/RSGet/ListAdder.pm
==============================================================================
--- toys/rsget.pl/RSGet/ListAdder.pm    (original)
+++ toys/rsget.pl/RSGet/ListAdder.pm    Thu Oct 29 03:48:11 2009
@@ -73,8 +73,9 @@
                        }
                }
 
-               (my $getter, $uri) = RSGet::Dispatch::unigetter( $uri );
+               my $getter = RSGet::Plugin::from_uri( $uri );
                next unless $getter;
+               $uri = $getter->unify( $uri );
                next if exists $all_uris{ $uri };
                $all_uris{ $uri } = 1;
                my $options = {};
@@ -132,8 +133,9 @@
                                if ( my $links = $save->{links} ) {
                                        my @new;
                                        foreach my $luri ( @$links ) {
-                                               my ($getter, $uri) = 
RSGet::Dispatch::unigetter( $luri );
+                                               my $getter = 
RSGet::Plugin::from_uri( $luri );
                                                if ( $getter ) {
+                                                       my $uri = 
$getter->unify( $luri );
                                                        push @new, { cmd => 
"ADD", globals => {}, uris => { $uri => [ $getter, {} ] } };
                                                } else {
                                                        push @new, "# 
unsupported uri: $uri";

Modified: toys/rsget.pl/RSGet/Main.pm
==============================================================================
--- toys/rsget.pl/RSGet/Main.pm (original)
+++ toys/rsget.pl/RSGet/Main.pm Thu Oct 29 03:48:11 2009
@@ -16,7 +16,7 @@
 use RSGet::MortalObject;
 use RSGet::Line;
 use RSGet::ListManager;
-use RSGet::Processor;
+use RSGet::Plugin;
 use RSGet::Tools;
 use RSGet::Wait;
 use Time::HiRes;
@@ -239,32 +239,16 @@
                foreach my $type ( qw(Get Link Video) ) {
                        my $dir = "$path/$type";
                        next unless -d $dir;
+                       my $count = 0;
                        foreach ( sort glob "$path/$type/*" ) {
-                               add_getter( $type, $_ );
+                               $count += RSGet::Plugin::add( $type, $_ );
                        }
+                       new RSGet::Line( "INIT: ", "$dir: found $count new 
plugins\n" )
+                               if $count;
                }
        }
 }
 
-sub add_getter
-{
-       my $type = shift;
-       local $_ = shift;
-       return if /~$/;
-       return if m{/\.[^/]*$};
-       ( my $file = $_ ) =~ s#.*/##;
-       return if exists $getters{ $type . "::" . $file };
-       my ( $pkg, $getter ) = RSGet::Processor::read_file( $type, $_ );
-       my $msg = "${type}/$file: failed";
-       if ( $pkg and $getter ) {
-               $getters{ $pkg } = $getter;
-               $msg = "$pkg: added\n";
-               new RSGet::Line( "INIT: ", $msg );
-       } else {
-               warn "$msg\n";
-       }
-}
-
 sub loop
 {
        # main loop

Copied: toys/rsget.pl/RSGet/Plugin.pm (from rev 10863, 
toys/rsget.pl/RSGet/Processor.pm)
==============================================================================
--- toys/rsget.pl/RSGet/Processor.pm    (original)
+++ toys/rsget.pl/RSGet/Plugin.pm       Thu Oct 29 03:48:11 2009
@@ -1,4 +1,4 @@
-package RSGet::Processor;
+package RSGet::Plugin;
 # This file is an integral part of rsget.pl downloader.
 #
 # 2009 (c) Przemysław Iskra <[email protected]>
@@ -7,271 +7,274 @@
 
 use strict;
 use warnings;
+use RSGet::Processor;
 use RSGet::Tools;
 set_rev qq$Id$;
 
-my $options = "name|short|slots|cookie|status|min_ver";
+my %getters;
 
-my $processed = "";
-sub pr(@)
+sub read_file($)
 {
-       my $line = join "", @_;
-       $processed .= $line;
-       return length $line;
-}
-
-my $is_sub = 0;
-my $last_cmd = undef;
-sub p_sub
-{
-       my $sub = shift;
-       pr "sub $sub {\n";
-       pr "\tmy \$self = shift;\n";
-       foreach ( @_ ) {
-               pr "\t$_;\n";
-       }
-       $is_sub++;
-}
-sub p_subend
-{
-       return unless $is_sub;
-       $is_sub--;
-
-       my $error = 'unexpected end of script';
-       if ( $last_cmd and $last_cmd =~ /(?:click_)?download/ ) {
-               $error = 'download is a HTML page';
-       }
-       $last_cmd = undef;
-       pr "\treturn \${self}->error( '$error' );\n}\n";
-}
-
-my $space;
-sub p_ret
-{
-       my $ret = shift;
-       my @opts = @_;
-       pr $space . "return \${self}->${ret}( ";
-       pr join( ", ", @opts ) . ", " if @opts;
-}
-
-sub p_func
-{
-       my $f = shift;
-       pr $space . "\${self}->$f(";
-}
-
-sub p_line
-{
-       s/\$-{/\$self->{/g;
-       pr $_ . "\n";
-}
-
-
-sub read_file
-{
-       my $class = shift;
-       my $file = shift;
+       my $self = shift;
+       my $file = $self->{file};
 
-       open F_IN, '<', $file;
+       open F_IN, '<', $file or return;
 
        my %opts = (
                uri => [],
+               map { $_ => undef } qw(name short slots cookie status),
        );
+       my $opts = join "|", keys %opts;
+
        my %parts = (
-               unify => [],
-               pre => [],
-               start => [],
-               perl => [],
+               map { $_ => [] } qw(unify pre start perl),
        );
        my $parts = join "|", keys %parts;
 
-       my $part = undef;
+       my $part;
        while ( <F_IN> ) {
                chomp;
-               next unless length;
                next if /^\s*#/;
+               next if /^\s*$/;
 
-               if ( $part ) {
-                       unless ( /^\S+/ ) {
-                               push @{$parts{$part}}, $_;
-                               next;
-                       }
-                       if ( $part eq "perl" ) {
-                               push @{$parts{perl}}, $_."\n", <F_IN>;
-                               last;
-                       } elsif ( $part eq "start" and /^stage_.*?:/ ) {
-                               push @{$parts{start}}, $_;
-                               next;
-                       }
-                       $part = undef;
+               if ( /^($parts)\s*:/ ) {
+                       $part = $1;
+                       last;
+               }
+
+               my ( $key, $value );
+               unless ( ($key, $value) = /^($opts)\s*:\s+(.*)$/ ) {
+                       warn "$file: unrecognized line: $_\n";
+                       next;
                }
 
+               if ( ref $opts{ $key } ) {
+                       push @{ $opts{ $key } }, $value;
+               } else {
+                       warn "$file: $key overwritten (changed from '$opts{ 
$key }' to '$value')\n"
+                               if defined $opts{ $key };
+                       $opts{ $key } = $value;
+               }
+       }
+
+       while ( <F_IN> ) {
+               chomp;
+               next if /^\s*#/;
+               next if /^\s*$/;
+
                if ( /^($parts)\s*:/ ) {
                        $part = $1;
-               } elsif ( /^uri\s*:\s+(.*)$/ ) {
-                       push @{$opts{uri}}, $1;
-               } elsif ( /^($options)\s*:\s+(.*)$/ ) {
-                       $opts{$1} = $2;
+                       if ( $part eq "perl" ) {
+                               my @perl = <F_IN>;
+                               $parts{perl} = \...@perl;
+                       }
+                       next;
                }
+
+               push @{ $parts{ $part } }, $_;
        }
 
        close F_IN;
-       unless ( scalar @{$parts{start}} ) {
-               p "Can't find 'start:'\n";
-               return undef;
-       }
-       unless ( @{$opts{uri}} ) {
-               p "Can't find 'uri:'\n";
-               return undef;
+
+       foreach my $k ( keys %opts ) {
+               $self->{ $k } = $opts{ $k };
+       }
+
+       return \%parts;
+}
+
+sub check_opts
+{
+       my $self = shift;
+       my $file = shift;
+       my $plugin_class = shift;
+
+       unless ( @{$self->{uri}} ) {
+               return "Can't find 'uri:'\n";
        }
+
        foreach ( qw(name short) ) {
-               next if $opts{$_};
-               p "Can't find '$_:'\n";
-               return undef;
+               next if $self->{$_};
+               return "Can't find '$_:'\n";
        }
+
        $file =~ m{.*/(.*?)$};
        my $fname = $1;
-       if ( $fname ne $opts{name} ) {
-               p "Name field: '$opts{name}' differs from file name: 
'$fname'\n";
-               return undef;
-       }
-       if ( $opts{status} and $opts{status} !~ /^OK(\s+.*)?$/ ) {
-               p "Marked as '$opts{status}'\n";
-               return undef;
-       }
-
-       $processed = "";
-       $space = "";
-       $last_cmd = undef;
-       $is_sub = 0;
-
-       $opts{uri} = [ map { eval $_ } @{$opts{uri}} ];
-       $opts{class} = ${class};
-       $opts{pkg} = "${class}::$opts{name}";
-       $opts{unify} = join "\n", @{ $parts{unify} };
-       $opts{unify} ||= 's/#.*//; s{/$}{};';
-
-       pr "package $opts{pkg};\n\n";
-       pr <<'EOF';
-       use strict;
-       use warnings;
-       use RSGet::Get;
-       use RSGet::Tools;
-       use URI::Escape;
-
-       BEGIN {
-               our @ISA;
-               @ISA = qw(RSGet::Get);
-       }
-
-       my $STDSIZE = qr/\d+(?:\.\d+)?\s*[kmg]?b/i;
-EOF
-
-       pr join "\n", @{$parts{pre}}, "\n";
-
-       my $stage = 0;
-       p_sub( "stage0" );
-       my @machine = @{$parts{start}};
-       while ( $_ = shift @machine ) {
-               $space = "";
-               $space = $1 if s/^(\s+)//;
-
-               if ( s/^(GET|WAIT|CAPTCHA|(?:CLICK_)?DOWNLOAD|CLICK)\s*\(// ) {
-                       my $cmd = lc $1;
-                       my $next_stage = "stage" . ++$stage;
-                       my @skip;
-                       push @skip, $_;
-                       until ( /;\s*$/ ) {
-                               $_ = shift @machine;
-                               push @skip, $_;
-                       }
-                       p_ret( $cmd, "\\&$next_stage" );
-                       foreach ( @skip ) {
-                               p_line();
-                       }
-                       p_subend();
-                       $last_cmd = $cmd;
-                       p_sub( $next_stage );
-               } elsif ( s/^(GET|WAIT|CAPTCHA|CLICK)_NEXT\s*\(\s*(.*?)\s*,// ) 
{
-                       my $cmd = lc $1;
-                       my $next_stage = $2;
-                       p_ret( $cmd, "\\&$next_stage" );
-                       p_line();
-               } elsif ( s/^GOTO\s+(stage_[a-z0-9_]+)// ) {
-                       p_ret( $1 );
-                       pr ')';
-                       p_line();
-               } elsif ( s/^(stage_[a-z0-9_]+)\s*:\s*(.*)$// ) {
-                       my $next_stage = $1;
-                       my $left = $_;
-                       p_ret( $next_stage );
-                       pr ');';
-                       p_subend();
-                       p_sub( $next_stage );
-                       $_ = $left;
-                       redo if /\S/;
-               } elsif ( s/^(ERROR|RESTART|LINK|MULTI)\s*\(// ) {
-                       p_ret( lc $1 );
-                       p_line();
-               } elsif ( s/^INFO\s*\(// ) {
-                       pr $space . 'return "info" if $self->info( ';
-                       p_line();
-               } elsif ( s/^SEARCH\s*\(// ) {
-                       pr $space . 'return if $self->search( ';
-                       p_line();
-               } elsif ( s/^(PRINT|LOG|COOKIE|CAPTCHA_RESULT)\s*\(// ) {
-                       p_func( lc $1 );
-                       p_line();
-               } elsif ( s/^!\s+// ) {
-                       my $line = quotemeta $_;
-                       pr $space . 'return $self->problem( "'. $line .'" ) 
unless ';
-                       p_line();
+       if ( $fname eq $self->{name} ) {
+               $self->{pkg} = $plugin_class."::". $self->{name};
+       } else {
+               return "Name field: '$self->{name}' differs from file name\n";
+       }
+
+       if ( $self->{status} and $self->{status} =~ /^OK(\s+.*)?$/ ) {
+               return "";
+       }
+
+       return "Incorrect status\n";
+}
+
+sub check_parts
+{
+       my $class = shift;
+       my $parts = shift;
+
+       unless ( @{ $parts->{start} } ) {
+               return "Can't find start\n";
+       }
+
+       return "";
+}
+
+sub eval_uris
+{
+       my $self = shift;
+       my $in = $self->{uri};
+       my @out;
+
+       local $SIG{__DIE__};
+       delete $SIG{__DIE__};
+
+       foreach my $uri_text ( @$in ) {
+               my $re = eval $uri_text;
+               if ( $@ ) {
+                       warn "Problem with uri $uri_text: $...@\n";
+               } elsif ( not $re ) {
+                       warn "Problem with uri $uri_text\n";
+               } elsif ( not ref $re or ref $re ne "Regexp" ) {
+                       warn "URI $uri_text is not a regular expression\n";
                } else {
-                       pr $space;
-                       p_line();
+                       push @out, $re;
                }
        }
-       p_subend();
 
-       pr @{$parts{perl}};
+       $self->{uri} = \...@out;
+}
 
-       pr "\npackage $opts{pkg};\n";
-       pr "sub unify { local \$_ = shift; $opts{unify};\nreturn \$_;\n};\n";
-       pr '\&unify;';
-
-       my $unify = eval_it( $processed );
-
-       if ( $@ ) {
-               p "Error(s): $@";
-               return undef unless verbose( 1 );
-               my $err = $@;
-               return undef unless $err =~ /line \d+/;
-               my @p = split /\n/, $processed;
-               for ( my $i = 0; $i < scalar @p; $i++ ) {
-                       my $n = $i + 1;
-                       p sprintf "%s%4d: %s\n",
-                               ($err =~ /line $n[^\d]/ ? "!" : " "),
-                               $n,
-                               $p[ $i ];
-               }
-               return undef;
+sub new
+{
+       my $class = shift;
+       my $type = shift;
+       my $file = shift;
+
+       my $self = {
+               file => $file,
+               class => $type,
+       };
+       bless $self, $class;
+
+       my $parts = $self->read_file();
+       return undef unless $parts;
+       my $error = "";
+       $error .= $self->check_opts( $file, $type );
+       $error .= $self->check_parts( $parts );
+
+       $self->eval_uris();
+       return undef unless @{ $self->{uri} };
+
+       $self->{error} = "$self->{pkg} plugin error: $error" if $error;
+       p $file . ": " . $self->{error} if $error;
+
+       return $self;
+}
+
+sub compile
+{
+       my $self = shift;
+       $self->{compiled} = 1;
+       return if $self->{error};
+       p "Compiling $self->{pkg} plugin";
+
+       my $parts = $self->read_file();
+       unless ( $parts ) {
+               $self->{error} = "$self->{pkg} compilation error: cannot read 
file $self->{file}";
+               p "Compilation failed";
+       }
+
+       my $unify = RSGet::Processor::compile( $self, $parts );
+
+       if ( ref $unify and ref $unify eq "CODE" ) {
+               $self->{unify} = $unify;
+               p "Compilation successful";
+       } else {
+               $self->{error} = "$self->{pkg} compilation error";
+               p "Compilation failed";
        }
-       if ( not $unify or not ref $unify or ref $unify ne "CODE" ) {
-               my $ru = ref $unify || "undef";
-               p "Error: invalid, unify returned '$ru'";
-               return undef;
+}
+
+sub can_do
+{
+       my $self = shift;
+       my $uri = shift;
+
+       foreach my $re ( @{ $self->{uri} } ) {
+               return 1 if $uri =~ m{^http://(?:www\.)?$re};
        }
-       $opts{unify} = $unify;
+       return 0;
+}
+
+sub unify
+{
+       my $self = shift;
+       my $uri = shift;
 
-       return $opts{pkg} => \%opts;
-       return ();
+       $self->compile() unless $self->{compiled};
+       return $uri if $self->{error};
+
+       my $func = $self->{unify};
+       return $uri unless $func;
+
+       return &$func( $uri );
 }
 
-sub eval_it
+sub start
 {
-       local $SIG{__DIE__};
-       delete $SIG{__DIE__};
-       return eval shift;
+       my $self = shift;
+       my @args = @_;
+
+       $self->compile() unless $self->{compiled};
+       return undef if $self->{error};
+
+       return RSGet::Get::new( $self, @args );
+}
+
+
+sub add
+{
+       my $type = shift;
+       local $_ = shift;
+       return 0 if /~$/;
+       return 0 if m{/\.[^/]*$};
+       ( my $file = $_ ) =~ s#.*/##;
+       return 0 if exists $getters{ $type . "::" . $file };
+       my $plugin = new RSGet::Plugin( $type, $_ );
+       if ( $plugin ) {
+               my $pkg = $plugin->{pkg};
+               $getters{ $pkg } = $plugin;
+               new RSGet::Line( "INIT: ", "$pkg: added" )
+                       if verbose( 1 );
+               return 1;
+       } else {
+               warn "${type}/$file: failed\n";
+               return 0;
+       }
+}
+
+
+
+sub from_uri
+{
+       my $uri = shift;
+       foreach my $getter ( values %getters ) {
+               return $getter if $getter->can_do( $uri );
+       }
+       return undef;
+}
+
+sub from_pkg
+{
+       my $pkg = shift;
+
<<diff output has been trimmed to 500 lines, 4 line(s) remained.>>

Added: toys/rsget.pl/RSGet/Processor.pm
==============================================================================
--- (empty file)
+++ toys/rsget.pl/RSGet/Processor.pm    Thu Oct 29 03:48:11 2009
@@ -0,0 +1,202 @@
+package RSGet::Processor;
+# This file is an integral part of rsget.pl downloader.
+#
+# 2009 (c) Przemysław Iskra <[email protected]>
+#              This program is free software,
+# you may distribute it under GPL v2 or newer.
+
+use strict;
+use warnings;
+use RSGet::Tools;
+set_rev qq$Id$;
+
+my $processed = "";
+sub pr(@)
+{
+       my $line = join "", @_;
+       $processed .= $line;
+       return length $line;
+}
+
+my $is_sub = 0;
+my $last_cmd = undef;
+sub p_sub
+{
+       my $sub = shift;
+       pr "sub $sub {\n";
+       pr "\tmy \$self = shift;\n";
+       foreach ( @_ ) {
+               pr "\t$_;\n";
+       }
+       $is_sub++;
+}
+sub p_subend
+{
+       return unless $is_sub;
+       $is_sub--;
+
+       my $error = 'unexpected end of script';
+       if ( $last_cmd and $last_cmd =~ /(?:click_)?download/ ) {
+               $error = 'download is a HTML page';
+       }
+       $last_cmd = undef;
+       pr "\treturn \${self}->error( '$error' );\n}\n";
+}
+
+my $space;
+sub p_ret
+{
+       my $ret = shift;
+       my @opts = @_;
+       pr $space . "return \${self}->${ret}( ";
+       pr join( ", ", @opts ) . ", " if @opts;
+}
+
+sub p_func
+{
+       my $f = shift;
+       pr $space . "\${self}->$f(";
+}
+
+sub p_line
+{
+       s/\$-{/\$self->{/g;
+       pr $_ . "\n";
+}
+
+
+sub compile
+{
+       my $opts = shift;
+       my $parts = shift;
+
+       $processed = "";
+       $space = "";
+       $last_cmd = undef;
+       $is_sub = 0;
+
+       my $unify_body = ( join "\n", @{ $parts->{unify} } ) || 's/#.*//; 
s{/$}{};';
+
+       pr "package $opts->{pkg};\n\n";
+       pr <<'EOF';
+       use strict;
+       use warnings;
+       use RSGet::Get;
+       use RSGet::Tools;
+       use URI::Escape;
+
+       BEGIN {
+               our @ISA;
+               @ISA = qw(RSGet::Get);
+       }
+
+       my $STDSIZE = qr/\d+(?:\.\d+)?\s*[kmg]?b/i;
+EOF
+
+       pr join "\n", @{$parts->{pre}}, "\n";
+
+       my $stage = 0;
+       p_sub( "stage0" );
+       my @machine = @{ $parts->{start} };
+       while ( $_ = shift @machine ) {
+               $space = "";
+               $space = $1 if s/^(\s+)//;
+
+               if ( s/^(GET|WAIT|CAPTCHA|(?:CLICK_)?DOWNLOAD|CLICK)\s*\(// ) {
+                       my $cmd = lc $1;
+                       my $next_stage = "stage" . ++$stage;
+                       my @skip;
+                       push @skip, $_;
+                       until ( /;\s*$/ ) {
+                               $_ = shift @machine;
+                               push @skip, $_;
+                       }
+                       p_ret( $cmd, "\\&$next_stage" );
+                       foreach ( @skip ) {
+                               p_line();
+                       }
+                       p_subend();
+                       $last_cmd = $cmd;
+                       p_sub( $next_stage );
+               } elsif ( s/^(GET|WAIT|CAPTCHA|CLICK)_NEXT\s*\(\s*(.*?)\s*,// ) 
{
+                       my $cmd = lc $1;
+                       my $next_stage = $2;
+                       p_ret( $cmd, "\\&$next_stage" );
+                       p_line();
+               } elsif ( s/^GOTO\s+(stage_[a-z0-9_]+)// ) {
+                       p_ret( $1 );
+                       pr ')';
+                       p_line();
+               } elsif ( s/^(stage_[a-z0-9_]+)\s*:\s*(.*)$// ) {
+                       my $next_stage = $1;
+                       my $left = $_;
+                       p_ret( $next_stage );
+                       pr ');';
+                       p_subend();
+                       p_sub( $next_stage );
+                       $_ = $left;
+                       redo if /\S/;
+               } elsif ( s/^(ERROR|RESTART|LINK|MULTI)\s*\(// ) {
+                       p_ret( lc $1 );
+                       p_line();
+               } elsif ( s/^INFO\s*\(// ) {
+                       pr $space . 'return "info" if ${self}->info( ';
+                       p_line();
+               } elsif ( s/^SEARCH\s*\(// ) {
+                       pr $space . 'return if ${self}->search( ';
+                       p_line();
+               } elsif ( s/^(PRINT|LOG|COOKIE|CAPTCHA_RESULT)\s*\(// ) {
+                       p_func( lc $1 );
+                       p_line();
+               } elsif ( s/^!\s+// ) {
+                       my $line = quotemeta $_;
+                       pr $space . 'return ${self}->problem( "'. $line .'" ) 
unless ';
+                       p_line();
+               } else {
+                       pr $space;
+                       p_line();
+               }
+       }
+       p_subend();
+
+       pr @{$parts->{perl}};
+
+       pr "\npackage $opts->{pkg};\n";
+       pr "sub unify { local \$_ = shift; $unify_body;\nreturn \$_;\n};\n";
+       pr '\&unify;';
+
+       my $unify = eval_it( $processed );
+
+       if ( $@ ) {
+               p "Error(s): $@";
+               return undef unless verbose( 1 );
+               my $err = $@;
+               return undef unless $err =~ /line \d+/;
+               my @p = split /\n/, $processed;
+               for ( my $i = 0; $i < scalar @p; $i++ ) {
+                       my $n = $i + 1;
+                       p sprintf "%s%4d: %s\n",
+                               ($err =~ /line $n[^\d]/ ? "!" : " "),
+                               $n,
+                               $p[ $i ];
+               }
+               return undef;
+       }
+       if ( not $unify or not ref $unify or ref $unify ne "CODE" ) {
+               my $ru = ref $unify || "undef";
+               p "Error: invalid, unify returned '$ru'";
+               return undef;
+       }
+       return $unify;
+}
+
+sub eval_it
+{
+       local $SIG{__DIE__};
+       delete $SIG{__DIE__};
+       return eval shift;
+}
+
+1;
+
+# vim: ts=4:sw=4

Modified: toys/rsget.pl/RSGet/Tools.pm
==============================================================================
--- toys/rsget.pl/RSGet/Tools.pm        (original)
+++ toys/rsget.pl/RSGet/Tools.pm        Thu Oct 29 03:48:11 2009
@@ -16,10 +16,9 @@
 @ISA = qw(Exporter);
 @EXPORT = qw(set_rev s2string bignum de_ml hadd hprint p isotime require_prog
        irand jstime def_settings setting verbose
-       data_file dump_to_file randomize %getters);
+       data_file dump_to_file randomize);
 @EXPORT_OK = qw();
 
-our %getters;
 our %revisions;
 
 sub set_rev($)
_______________________________________________
pld-cvs-commit mailing list
[email protected]
http://lists.pld-linux.org/mailman/listinfo/pld-cvs-commit

Reply via email to