The problem is most likely in your algorithm. Show us the code.

(Oops. Replied only to Charles by accident. Reposting to the list:)

Sorry. I was posting a part of the real code only to avoid posting an overly long string of code. The problem, it seemed to me, was more likely some other limitation than Perl since the code ran fine on files up to a certain size. However, I'll post the whole code if you'd like:

(The script creates a file that can be loaded into a MySQL database.)


#!/usr/bin/perl

use warnings;
use strict;

my $source_file = "/users/jamesmarks/desktop/published_stories.htm";
my $destination_file = "/users/jamesmarks/desktop/published_stories.tb";
my $line_count = 0;
my $total_line_count = 0;

open FILE_IN, "$source_file" or die "Cannot open source file: $!";
open FILE_OUT, ">$destination_file" or die "Cannot open destination file: $!";


select FILE_OUT;

print <<'DEFINE_TABLE';
USE trib_stories;

DROP TABLE IF EXISTS story;

CREATE TABLE story (
story_id INT AUTO_INCREMENT,
issue_date DATE,
section VARCHAR(10),
byline VARCHAR(25),
staff_writer INT,
headline VARCHAR(50),
subhead VARCHAR(255),
body_copy TEXT,
caption_1 VARCHAR(255),
caption_2 VARCHAR(255),
caption_3 VARCHAR(255),
caption_4 VARCHAR(255),
caption_5 VARCHAR(255),
caption_6 VARCHAR(255),
PRIMARY KEY (story_id),
INDEX index1 (issue_date, section, byline),
FULLTEXT (headline),
FULLTEXT (body_copy)
);

DEFINE_TABLE

while (<FILE_IN>) {
        chomp();
        $line_count++;
        $total_line_count++;
        if ($line_count == 1) {
                print "INSERT INTO story\nSET story_id = NULL,\n";
        } elsif ($line_count == 2) {
                m{(\d\d?)/(\d\d?)/(\d\d\d\d)};
                my $issue_date = "$3-$1-$2";
                if ($issue_date eq "<BR>") {
                        $issue_date = "";
                }
                print "issue_date = \"$issue_date\",\n";
        } elsif ($line_count == 3) {
                m{<TD>(.*)</TD>};
                my $section = $1;
                if ($section eq "<BR>") {
                        $section = "";
                }
                print "section = \"$section\",\n";
        } elsif ($line_count == 4) {
                m{<TD>(.*)</TD>};
                my $byline = $1;
                if ($byline eq "<BR>") {
                        $byline = "";
                }
                print "byline = \"$byline\",\n";
        } elsif ($line_count == 5) {
                m{<TD>(.*)</TD>};
                my $staff_writer = $1;
                if ($staff_writer eq "<BR>") {
                        $staff_writer = "";
                }
                print "staff_writer = \"$staff_writer\",\n";
        } elsif ($line_count == 6) {
                s/<BR>//g;
                m{<TD>(.*)</TD>};
                my $headline = $1;
                if ($headline eq "<BR>") {
                        $headline = "";
                }
                print "headline = \"$headline\",\n";
        } elsif ($line_count == 7) {
                s/<BR>//g;
                m{<TD>(.*)</TD>};
                my $subhead = $1;
                if ($subhead eq "<BR>") {
                        $subhead = "";
                }
                print "subhead = \"$subhead\",\n";
        } elsif ($line_count == 8) {
                m{<TD>(.*)</TD>};
                my $body_copy = $1;
                if ($body_copy eq "<BR>") {
                        $body_copy = "";
                }
                print "body_copy = \"$body_copy\",\n";
        } elsif ($line_count == 9) {
                m{<TD>(.*)</TD>};
                my $caption_1 = $1;
                if ($caption_1 eq "<BR>") {
                        $caption_1 = "";
                }
                print "caption_1 = \"$caption_1\",\n";
        } elsif ($line_count == 10) {
                m{<TD>(.*)</TD>};
                my $caption_2 = $1;
                if ($caption_2 eq "<BR>") {
                        $caption_2 = "";
                }
                print "caption_2 = \"$caption_2\",\n";
        } elsif ($line_count == 11) {
                m{<TD>(.*)</TD>};
                my $caption_3 = $1;
                if ($caption_3 eq "<BR>") {
                        $caption_3 = "";
                }
                print "caption_3 = \"$caption_3\",\n";
        } elsif ($line_count == 12) {
                m{<TD>(.*)</TD>};
                my $caption_4 = $1;
                if ($caption_4 eq "<BR>") {
                        $caption_4 = "";
                }
                print "caption_4 = \"$caption_4\",\n";
        } elsif ($line_count == 13) {
                m{<TD>(.*)</TD>};
                my $caption_5 = $1;
                if ($caption_5 eq "<BR>") {
                        $caption_5 = "";
                }
                print "caption_5 = \"$caption_5\",\n";
        } elsif ($line_count == 14) {
                m{<TD>(.*)</TD>};
                my $caption_6 = $1;
                if ($caption_6 eq "<BR>") {
                        $caption_6 = "";
                }
                print "caption_6 = \"$caption_6\";\n\n";
        }
        if ($line_count == 15) {
                $line_count = 0;
        }
}

close FILE_IN;
close FILE_OUT;



--
To unsubscribe, e-mail: [EMAIL PROTECTED]
For additional commands, e-mail: [EMAIL PROTECTED]
<http://learn.perl.org/> <http://learn.perl.org/first-response>




Reply via email to