From: Sven Dowideit <svendowid...@fosiki.com> --- AnyData/Format/Weblog.pm | 24 +++++++++++------------- Changes | 2 ++ t/fixed.tbl | 8 +++++++- t/weblog.t | 42 ++++++++++++++++++++++++++++++++++++++++++ t/weblog.tbl | 1 + 5 files changed, 63 insertions(+), 14 deletions(-) create mode 100644 t/weblog.t create mode 100644 t/weblog.tbl
diff --git a/AnyData/Format/Weblog.pm b/AnyData/Format/Weblog.pm index 0c89da8..339623f 100644 --- a/AnyData/Format/Weblog.pm +++ b/AnyData/Format/Weblog.pm @@ -61,12 +61,11 @@ all rights reserved =cut - use strict; use AnyData::Format::Base; use vars qw( @ISA $DEBUG $VERSION); @AnyData::Format::Weblog::ISA = qw( AnyData::Format::Base ); -$DEBUG = 0; +$DEBUG = 0; $VERSION = '0.06'; @@ -74,9 +73,9 @@ sub new { my $class = shift; my $self = shift || {}; $self->{col_names} = - 'remotehost,username,authuser,date,request,status,bytes,client,referer'; - $self->{record_sep} = "\n"; - $self->{key} = 'datestamp'; + 'remotehost,username,authuser,date,request,status,bytes,referer,client'; + $self->{record_sep} = "\n"; + $self->{key} = 'datestamp'; $self->{keep_first_line} = 1; return bless $self, $class; } @@ -84,21 +83,20 @@ sub new { sub read_fields { print "PARSE RECORD\n" if $DEBUG; my $self = shift; - my $str = shift || return undef; + my $str = shift || return undef; $str =~ s/^\s+//; $str =~ s/\s+$//; return undef unless $str; - my(@row) = $str =~ - /^(\S*) (\S*) (\S*) \[([^\]]*)\] "(.*)" (\S*) (\S*)\s*(.*)$/; + my (@row) = + $str =~ /^(\S*) (\S*) (\S*) \[([^\]]*)\] "(.*?)" (\S*) (\S*)\s*(.*)$/; return undef unless defined $row[0]; - my($client,$referer) = $row[7] =~ /^(.*) (\S*)$/; + my ( $referer, $client ) = $row[7] =~ /^(.*?)\s(.*)$/; $client ||= ''; $referer ||= ''; - ($row[7],$row[8])=($client,$referer); + ( $row[7], $row[8] ) = ( $referer, $client ); + # $row[3] =~ s/\s*-\s*(\S*)$//; # hide GMT offset on datestamp - return @row + return @row; } 1; - - diff --git a/Changes b/Changes index 9593473..e09929f 100644 --- a/Changes +++ b/Changes @@ -11,6 +11,8 @@ version 0.11, released Aug 2012 * Fix spelling errors (debian) Ansgar Burchardt <ans...@43-1.org> * adColumn $distinct_flag not handled (RT#6248 & RT#6251) John D. Lima * writing fields containing 0 with AnyData::Format::Fixed (RT#8671) <elodie+cpan [...] pasteur.fr> + * weblog request and referer regexs were too greedy, and the referer and client regex's where in the wrong order (RT#34063) and (RT#72334) Wes Brown <wes [...] smellycat.com> and pawal [...] blipp.com + * version 0.10, released 19 April 2004 diff --git a/t/fixed.tbl b/t/fixed.tbl index aeac94d..02ffb63 100644 --- a/t/fixed.tbl +++ b/t/fixed.tbl @@ -1 +1,7 @@ -country, code australia au germany de france fr switzerlandch broken 0 broken2 0 +country,code +australia au +germany de +france fr +switzerlandch +broken 0 +broken2 0 diff --git a/t/weblog.t b/t/weblog.t new file mode 100644 index 0000000..2d494e6 --- /dev/null +++ b/t/weblog.t @@ -0,0 +1,42 @@ +#!/usr/local/bin/perl -wT +use strict; +use warnings; + +use Test::More; +plan tests => 10; + +use AnyData; + +my $table = adTie( 'Weblog', 't/weblog.tbl', 'r', {} ); + +ok( 1 == adRows($table), "Failed rows" ); + +#remotehost,username,authuser,date,request,status,bytes,client,referer +#12.34.56.78 - - [13/Mar/2008:07:38:53 +0100] "GET /creeper/image HTTP/1.1" 200 252 "http://www.example.com/" "Mozilla/5.0 (Windows; U; Windows NT 6.0; sv-SE; rv:1.8.1.12) Gecko/20080201 Firefox/2.0.0.12" + +my $row = each %$table; +ok( '12.34.56.78' eq $row->{remotehost}, 'remotehost' ); +ok( '-' eq $row->{username}, 'username' ); +ok( '-' eq $row->{authuser}, 'authuser' ); +ok( '13/Mar/2008:07:38:53 +0100' eq $row->{date}, 'date' ); +ok( 'GET /creeper/image HTTP/1.1' eq $row->{request}, 'request' ); +ok( '200' eq $row->{status}, 'status' ); +ok( '252' eq $row->{bytes}, 'bytes' ); +ok( +'"Mozilla/5.0 (Windows; U; Windows NT 6.0; sv-SE; rv:1.8.1.12) Gecko/20080201 Firefox/2.0.0.12"' + eq $row->{client}, + 'client ' . $row->{client} +); +ok( '"http://www.example.com/"' eq $row->{referer}, + 'referer: ' . $row->{referer} ); + +#write test +#TODO: looks like writing a weblog is broken +#print STDERR "\n---\n"; +#print STDERR adExport( $table, 'Weblog', undef, { } ); +#print STDERR "\n---\n"; +#ok( +# <<'HERE' eq adExport( $table, 'Weblog', undef, { } ), 'export weblog format' ); +#HERE + +__END__ diff --git a/t/weblog.tbl b/t/weblog.tbl new file mode 100644 index 0000000..54e379f --- /dev/null +++ b/t/weblog.tbl @@ -0,0 +1 @@ +12.34.56.78 - - [13/Mar/2008:07:38:53 +0100] "GET /creeper/image HTTP/1.1" 200 252 "http://www.example.com/" "Mozilla/5.0 (Windows; U; Windows NT 6.0; sv-SE; rv:1.8.1.12) Gecko/20080201 Firefox/2.0.0.12" -- 1.7.10.4