unable to parse html

batch m Wed, 14 Aug 2002 15:11:16 -0700


--- batch m <[EMAIL PROTECTED]> wrote:
> Date: Wed, 14 Aug 2002 15:23:50 -0700 (PDT)
> From: batch m <[EMAIL PROTECTED]>
> Subject: unable to parse html
> To: [EMAIL PROTECTED]
> 
> Hail,
> I'm stuck with this parsing script, (attached) I
> found
> on perlmonks.org.
> 
> I've got Lemay's SAMS 21 day and O'reily's 2rd
> edition. 
> 
> My Perl experience is just modifying a config file
> from the plethora of free scripts to now finally
> trying to modify a script to accomplish the
> following
> task.
> 
> I'd like to import the contents of over 3k html
> files,
> between the tags <body> to </body> after I've
> grepped
> out the header and footer <!-includes-> into a table
> cell in the new templates I'm creating for the site.
> 
> I'm stuck on the error: 'unable to parse' 
> 
> line:              $content = $header_html . $1 .
> $footer_html;


============
total script
===========

# opens up original HTML and inserts
# into template html. and mirrors the other
# files that it doesn't parse....

#!/usr/bin/perl -w

use strict ;
use warnings ;
$|++ ;


#Define variables
##################
    #Directory to parse (with trailing slash)
my $open_dir = 'C:/My
Documents/carlist/carlist_test/';
    #Directory to save parsed documents to (with
trailing slash)
my $save_dir = 'C:/My
Documents/carlist/carlist_test/parsed/';
    #Location of template HTML file
my $html_file = 'C:/My
Documents/carlist/carlist_test/template/newtemplate.html';
    #Extensions allowed to parse
my @exts = ('html','htm','shtml','shtm');
my $exts;
my $ext;
my $header_html= 'C:/My
Documents/carlist/carlist_test/template/ccheader.html';
my $footer_html= 'C:/My
Documents/carlist/carlist_test/template/ccfooter.html';
my $title;

#Used to find html files to replace
use File::Find;
#Used to copy files to new location
use File::Copy;
#Assists in copying path directory structure
use File::Path;

#Starts the actual code
&main();

exit; #Just in case of any accidents

sub eachFile {
    my $filename = $_;
    my $fullpath = $File::Find::name;
    #remember that File::Find changes your CWD, 
    #so you can call open with just $_
    my $found = 0;
    foreach $ext (@exts) {
        if($filename=~/\.$ext$/) {
            print "\tOpening file $filename - ";
            my $content = &open_file("$filename");
            print "Completed\n";
            if($content=~m|<TITLE>(.*)</TITLE>|si) {
                $title = $1;
            } else{
                $title = "www.carlist.com - List your
used car for sale for FREE with the longest running
used car database in the world.";
            }
            print "\t\tParsing Document - ";
            if($content=~m|<BODY.*?>(.*?)</BODY>|si) {
                $content = $header_html . $1 .
$footer_html;
                $content =~ s|%title%|$title|;
                &save_file("$fullpath",$content);
                print "Completed\n";
            } else{
                print "Couldn't parse\n";
            }
            $found = 1;
            last; #So it doesn't reopen it with
similar extension
        }
    }
    if($found==0) {
        my $dir = $fullpath;
        $dir=~s/\Q$open_dir\E/$save_dir/i; #Removes
current root dir, and replaces it with new one
        copy("$fullpath","$dir");
    }
}

#Returns the contents of a filename specified...
sub open_file{
    my($file) = @_;
    my($file_contents) = "";
    open(DATA,"$file") || die "Not Completed\n";
        while(<DATA>) {
            $file_contents .= $_;
        }
    close(DATA);
    return($file_contents);
}

#Saves the new file to its new location
sub save_file{
    my($file,$file_contents) = @_;
    $file=~s/$open_dir/$save_dir/i; #Removes current
root dir, and replaces it with new one
    my($dir) = $file;
    if($dir =~ /(.*)\/.*/) {$dir = $1;}
    if(!(-e "$dir/")) {
        mkdir("$dir/",0755);
    }
    open(DATA,">$file") || die "Cannot saved parse
document\n";
        print DATA "$file_contents";
    close(DATA);
}

sub main{
    #Retrieves HTML for template
    print "Opening Template - ";
    open(FILE,"$html_file") || die "Cannot open HTML
because $!";
        my $data = join('',<FILE>);
    close(FILE);
    #Please let them be global variables
    ($header_html,$footer_html) =
split(/\%content\%/,$data);
    print "Successful\n";

    #Copys directory structure of old to new one...
    print "Copying Directory Path - ";
    mkpath([$open_dir, $save_dir], 1, 0711); 
    print "Successful\n";

    #Starts the actual File Search & Replace
    print "Starting Search and Replace - \n";
    find (\&eachFile, "$open_dir");
    print "Successful\n";
}

 

===============

> 
> I defined $header_html and $footer_html. Pointing to
> actual html header and footer files.
> 
> obliged,
> Stretch
> 



__________________________________________________
Do You Yahoo!?
HotJobs - Search Thousands of New Jobs
http://www.hotjobs.com

-- 
To unsubscribe, e-mail: [EMAIL PROTECTED]
For additional commands, e-mail: [EMAIL PROTECTED]

unable to parse html

Reply via email to