Anybody know how to strip links from an FTP site?
The script below works good on HTTP sites, but
has rarely or never worked for me on FTP sites.
#
#
# This script strips all links from the $html website.
#
#
use strict;
use warnings;
use LWP::Simple;
use HTML::TreeBuilder;
#my $html = get 'http://www.census.gov/geo/www/cob/co2000.html';
my $html = get 'ftp://mcmcftp.er.usgs.gov/Katrina/508dpi/';
open OUT, ">", 'links.txt' or die "$0: open links.txt: $!";
my $tree = HTML::TreeBuilder->new_from_content($html);
my $links = $tree->extract_links;
foreach (@$links) {
my ($link, $elem, $attr, $tag) = @$_;
print OUT qq(<$tag $attr="$link">\n);
}
close OUT;
__END__
_______________________________________________
Houston mailing list
[email protected]
http://mail.pm.org/mailman/listinfo/houston
Website: http://houston.pm.org/