would anyone like to translate the following perl script to Python or Scheme (scsh)?
the file takes a inpath, and report all html files in it above certain size. (counting inline images) also print a sorted report of html files and their size. (a copy of the script is here: http://xahlee.org/_scripts/check_file_size.pl ) Xah [EMAIL PROTECTED] ∑ http://xahlee.org/ # perl # Tue Oct 4 14:36:48 PDT 2005 # given a dir, report all html file's size. (counting inline images) # XahLee.org use Data::Dumper; use File::Find; use File::Basename; $inpath = '/Users/t/web/mydirectory/'; $sizeLimit = 800 * 1000; # $inpath = $ARGV[0]; # should give a full path; else the $File::Find::dir won't give full path. while ($inpath =~ [EMAIL PROTECTED](.+)/$@) { $inpath = $1;} # get rid of trailing slash die "dir $inpath doesn't exist! $!" unless -e $inpath; ################################################## # subroutines # getInlineImg($file_full_path) returns a array that is a list of inline images. For example, it may return ('xx.jpg','../image.png') sub getInlineImg ($) { $full_file_name= $_[0]; @linx =(); open (FF, "<$full_file_name") or die "error: can not open $full_file_name $!"; while (<FF>) { @txt_segs = split(m/img/, $_); shift @txt_segs; for $lin (@txt_segs) { if ($lin =~ m@ src\s*=\s*\"([^\"]+)\"@i) { push @linx, $1; }} } close FF; return @linx; } # linkFullPath($dir,$locallink) returns a string that is the full path to the local link. For example, linkFullPath('/Users/t/public_html/a/b', '../image/t.png') returns 'Users/t/public_html/a/image/t.png'. The returned result will not contain double slash or '../' string. sub linkFullPath($$){ $result=$_[0] . $_[1]; while ($result =~ [EMAIL PROTECTED]/\/@\/@) {}; while ($result =~ s@/[^\/]+\/\.\.@@) {}; return $result;} # listLocalLinks($html_file_full_path) returns a array where each element is a full path of local links in the html. sub listLocalLinks($) { my $htmlfile= $_[0]; my ($name, $dir, $suffix) = fileparse($htmlfile, ('\.html') ); my @aa = getlinks($htmlfile); @aa = grep(!m/\#/, @aa); @aa = grep (!m/^mailto:/, @aa); @aa = grep (!m/^http:/, @aa); my @linkedFiles=(); foreach my $lix (@aa) { push @linkedFiles, linkFullPath($dir,$lix);} return @linkedFiles; } # listInlineImg($html_file_full_path) returns a array where each element is a full path to inline images in the html. sub listInlineImg($) { my $htmlfile= $_[0]; my ($name, $dir, $suffix) = fileparse($htmlfile, ('\.html') ); my @aa = getInlineImg($htmlfile); my @result=(); foreach my $ele (@aa) { push @result, linkFullPath($dir,$ele);} return @result; } ################################################## sub checkLink { if ( -T $File::Find::name && $File::Find::name =~ [EMAIL PROTECTED]@ ) { $total= -s $File::Find::name; @h2 = listInlineImg($File::Find::name); for my $ln (@h2) {$total += -s $ln;}; if ( $total > $sizeLimit) {print "problem: file: $File::Find::name, size: $total\n";} push (@result, [$total, $File::Find::name]); }; } find(\&checkLink, $inpath); @result = sort { $b->[0] <=> $a->[0]} @result; print Dumper([EMAIL PROTECTED]); print "done reporting. (any file above size are printed above.)"; __END__ -- http://mail.python.org/mailman/listinfo/python-list