I've got a script designed to do mass HTTP grabs, but after ~2000 requests it
runs into
sub poco_weeble_connect_error {
my ($kernel, $heap, $operation, $errnum, $errstr, $wheel_id) =
@_[KERNEL, HEAP, ARG0..ARG3];
DEBUG and
warn "wheel $wheel_id encountered $operation error $errnum: $errstr\n";
# Drop the wheel and its cross-references.
my $request_id = delete $heap->{wheel_to_request}->{$wheel_id};
die "expected a request ID, but there is none" unless defined $request_id;
and dies. However, I can't figure out where the info's been lost. My code
follows -
#!/usr/local/bin/perl
use strict;
$| = 1;
use POE qw/Component::Client::HTTP/;
use HTTP::Request;
use HTML::HeadParser;
open IN, $ARGV[1];
system("mkdir out.$ARGV[1]");
POE::Component::Client::HTTP->spawn(
Timeout => 5,
Agent => 'Mozilla/4.0 (compatible; MSIE 5.5; Windows 98)',
Alias => 'ua'
);
sub handler_start {
for (1..$ARGV[0]) {
start_next_request( @_[ KERNEL ] );
}
}
sub start_next_request {
my ($kernel) = @_;
my $line = <IN>;
return unless ($line);
chomp($line);
my $req = new HTTP::Request('GET', "http://$line/");
$kernel->post( 'ua', 'request', 'response', $req );
}
sub handler_response {
my ($req_p, $res_p) = @_[ ARG0, ARG1 ];
my ($req, $res) = ($req_p->[0], $res_p->[0]);
my $url = $req->url();
$url =~ m!http://(.*)/!;
my $dom = $1;
if ($res->is_success) {
open OUT, ">out.$ARGV[1]/$dom";
print OUT $res->content;
close OUT;
}
start_next_request( @_[ KERNEL ] );
}
POE::Session->create(
inline_states => {
_start => \&handler_start,
response => \&handler_response
}
);
$poe_kernel->run();
exit;
--
Bring me my etherkiller; Oh clouds unfold! / Bring me the magic smoke of desire
I shall not cease from mental fight / Nor shall my LART rest in my hand
Till we have buried the bodies / Of all the lusers in all this land
-- rpg, ASR [ My homepage is http://www.trout.me.uk/ ]