Jim Schueler wrote: > The code below represents 3 packages that are used to parse a > csv file. > > The code would be invoked as: > open $f, 'test.csv' ; > @records = csv::qqlines \',', <$f> ; > close $f ; > > @records represents a 2 dim array of array refs. > > The qqlines function takes advantage of the class 'quoted'. 'quoted' > protects quoted strings in a text block, for example, during a split > operation. > > The only problem with this code is that it runs very slow. About 200 > lines per second on a 600M processor. Is there an obvious bottleneck > that I am overlooking? > > -Jim > > -------------------------- code --------------------------- > > package binlib; > > use 5.006; > use strict; > use warnings; > > require Exporter; > > our @ISA = qw(Exporter); > > our @EXPORT = qw( tobinary frombinary ) ; > our %EXPORT_TAGS = ( 'all' => [ qw( tobinarychr frombinarychr ), > @EXPORT ] ) ; our @EXPORT_OK = ( @{ $EXPORT_TAGS{all} } ) ; > > sub tobinary { > my $in = shift ; > my $out = "" ; > > while ( $in ) { > $out = unpack( 'B8', chr( $in & 0xff ) ) .$out ; > $in >>= 8 ; > } > > return $out ; > } > > sub tobinarychr { > return pack 'C*', split //, tobinary( @_ ) ; > } > > sub frombinary { > my $in = shift ; > my @in = ref $in? @$in: split //, $in ; > my $out = 0 ; > > while ( @in ) { > $out <<= 8 ; > $out += unpack 'C', pack 'B8', join '', splice( > @in, 0, 8 ) ; > } > > return $out ; > } > > sub frombinarychr { > my $in = shift ; > return frombinary [ unpack 'C*', $in ] ; > } > > > > package quoted ; > > use binlib ':all' ; > > sub new { > my $package = shift ; > my $orig = shift ; > my $self = {} ; > > my $ct ; > while ( $orig =~ s/(.*?)(['"])(.*)//s ) { > my $sep = $2 ; > $self->{string} .= $1 . $sep ; > > my $segm = $3 ; > my @foo ; > > while ( $segm ) { > @foo = split /$sep/, $segm, 2 ; > > last unless $foo[0] =~ /\\$/ ; > > $self->{string} .= $foo[0] . $sep ; > $segm = $foo[1] ; > } > > $orig = $foo[-1] ; > next if @foo == 1 ; > > $self->{string} .= tobinarychr( ++$ct ) ; > $self->{string} .= $sep ; > push @{ $self->{tokens} }, $foo[0] ; > } > > $self->{string} .= $orig ; > return bless $self, $package ; > } > > > sub dequote { > my $self = shift ; > > $self->{string} =~ s/'([\000\001]+)'/$1/g ; > $self->{string} =~ s/"([\000\001]+)"/$1/g ; > } > > > sub do { > my $self = shift ; > my $code = shift ; > my $buff = shift if @_ ; > > return map { s/([\000\001]+)/$self->{tokens}->[ > frombinarychr( $1 ) -1 ]/sge ; $_ } > &$code( defined $buff? $buff: > $self->{string} ) ; > } > > > > package csv ; > > sub qqlines { > my $arg = shift ; > my $buff = shift ; > > ## If first arg is a ref, return empty elements > my $delim = $arg unless ref $arg ; > $delim = $$arg if ref $arg eq 'SCALAR' ; > $delim = $arg->[0] if ref $arg eq 'ARRAY' ; > > unless ( defined $delim ) { > bless $arg ; > $delim = $$arg if $arg->isa( 'SCALAR' ) ; > $delim = $arg->[0] if $arg->isa( 'ARRAY' ) ; > } > > ## Do the parsing > my $qq = new quoted( $buff ) ; > $qq->dequote() ; > > my @qq = split /\s*[\r\n]+/s, $qq->{string} ; > my $code = fun_bydelim( $delim ) ; > > return map { [ $qq->do( $code, $_ ) ] } @qq if ref $arg ; > return map { [ grep $_, $qq->do( $code, $_ ) ] } @qq ; } > > sub fun_bydelim { > my $delim = shift ; > return sub { my $buff = shift ; return split > /[$delim]/, $buff ; } ; > } > > 1
After a brief perusal of your code, you do seem to be jumpng through some weird hoops, without any comments in the code explaining why. However, my primary question is what does your code do that could not be done easier and more reliably with Text::CSV (or Text::CSV_XS if speed is important) ? -- Brian Raven ----------------------------------------------------------------------- The information contained in this e-mail is confidential and solely for the intended addressee(s). Unauthorised reproduction, disclosure, modification, and/or distribution of this email may be unlawful. If you have received this email in error, please notify the sender immediately and delete it from your system. The views expressed in this message do not necessarily reflect those of LIFFE Holdings Plc or any of its subsidiary companies. ----------------------------------------------------------------------- _______________________________________________ ActivePerl mailing list [EMAIL PROTECTED] To unsubscribe: http://listserv.ActiveState.com/mailman/mysubs