Anybody know how to strip links from an FTP site?
The script below works good on HTTP sites, but
has rarely or never worked for me on FTP sites.

#
#
# This script strips all links from the $html website.
#
#
use strict;
use warnings;

use LWP::Simple;
use HTML::TreeBuilder;


#my $html = get 'http://www.census.gov/geo/www/cob/co2000.html';
my $html = get 'ftp://mcmcftp.er.usgs.gov/Katrina/508dpi/';

open OUT, ">", 'links.txt' or die "$0: open links.txt: $!";

my $tree = HTML::TreeBuilder->new_from_content($html);
my $links = $tree->extract_links;

foreach (@$links) {
  my ($link, $elem, $attr, $tag) = @$_;
  print OUT qq(<$tag $attr="$link">\n);
}

close OUT;

__END__

_______________________________________________
Houston mailing list
[email protected]
http://mail.pm.org/mailman/listinfo/houston
Website: http://houston.pm.org/

Reply via email to