From: Sven Dowideit <svendowid...@fosiki.com>

---
 AnyData/Format/Weblog.pm |   24 +++++++++++-------------
 Changes                  |    2 ++
 t/fixed.tbl              |    8 +++++++-
 t/weblog.t               |   42 ++++++++++++++++++++++++++++++++++++++++++
 t/weblog.tbl             |    1 +
 5 files changed, 63 insertions(+), 14 deletions(-)
 create mode 100644 t/weblog.t
 create mode 100644 t/weblog.tbl

diff --git a/AnyData/Format/Weblog.pm b/AnyData/Format/Weblog.pm
index 0c89da8..339623f 100644
--- a/AnyData/Format/Weblog.pm
+++ b/AnyData/Format/Weblog.pm
@@ -61,12 +61,11 @@ all rights reserved
 
 =cut
 
-
 use strict;
 use AnyData::Format::Base;
 use vars qw( @ISA $DEBUG $VERSION);
 @AnyData::Format::Weblog::ISA = qw( AnyData::Format::Base );
-$DEBUG = 0;
+$DEBUG                        = 0;
 
 $VERSION = '0.06';
 
@@ -74,9 +73,9 @@ sub new {
     my $class = shift;
     my $self = shift || {};
     $self->{col_names} =
-        
'remotehost,username,authuser,date,request,status,bytes,client,referer';
-    $self->{record_sep} = "\n";
-    $self->{key} = 'datestamp';
+      'remotehost,username,authuser,date,request,status,bytes,referer,client';
+    $self->{record_sep}      = "\n";
+    $self->{key}             = 'datestamp';
     $self->{keep_first_line} = 1;
     return bless $self, $class;
 }
@@ -84,21 +83,20 @@ sub new {
 sub read_fields {
     print "PARSE RECORD\n" if $DEBUG;
     my $self = shift;
-    my $str  = shift || return undef;
+    my $str = shift || return undef;
     $str =~ s/^\s+//;
     $str =~ s/\s+$//;
     return undef unless $str;
-    my(@row) = $str =~
-        /^(\S*) (\S*) (\S*) \[([^\]]*)\] "(.*)" (\S*) (\S*)\s*(.*)$/;
+    my (@row) =
+      $str =~ /^(\S*) (\S*) (\S*) \[([^\]]*)\] "(.*?)" (\S*) (\S*)\s*(.*)$/;
     return undef unless defined $row[0];
-    my($client,$referer) = $row[7] =~ /^(.*) (\S*)$/;
+    my ( $referer, $client ) = $row[7] =~ /^(.*?)\s(.*)$/;
     $client  ||= '';
     $referer ||= '';
-    ($row[7],$row[8])=($client,$referer);
+    ( $row[7], $row[8] ) = ( $referer, $client );
+
     # $row[3] =~ s/\s*-\s*(\S*)$//; # hide GMT offset on datestamp
-    return @row
+    return @row;
 }
 1;
 
-
-
diff --git a/Changes b/Changes
index 9593473..e09929f 100644
--- a/Changes
+++ b/Changes
@@ -11,6 +11,8 @@ version 0.11, released  Aug 2012
  * Fix spelling errors (debian) Ansgar Burchardt <ans...@43-1.org>
  * adColumn $distinct_flag not handled (RT#6248 & RT#6251) John D. Lima
  * writing fields containing 0 with AnyData::Format::Fixed (RT#8671) 
<elodie+cpan [...] pasteur.fr>
+ * weblog request and referer regexs were too greedy, and the referer and 
client regex's where in the wrong order (RT#34063) and (RT#72334) Wes Brown 
<wes [...] smellycat.com> and pawal [...] blipp.com
+ * 
 
 version 0.10, released 19 April 2004
 
diff --git a/t/fixed.tbl b/t/fixed.tbl
index aeac94d..02ffb63 100644
--- a/t/fixed.tbl
+++ b/t/fixed.tbl
@@ -1 +1,7 @@
-country, code australia au germany de france fr switzerlandch broken 0 broken2 0
+country,code
+australia  au
+germany    de
+france     fr
+switzerlandch
+broken     0
+broken2     0
diff --git a/t/weblog.t b/t/weblog.t
new file mode 100644
index 0000000..2d494e6
--- /dev/null
+++ b/t/weblog.t
@@ -0,0 +1,42 @@
+#!/usr/local/bin/perl -wT
+use strict;
+use warnings;
+
+use Test::More;
+plan tests => 10;
+
+use AnyData;
+
+my $table = adTie( 'Weblog', 't/weblog.tbl', 'r', {} );
+
+ok( 1 == adRows($table), "Failed rows" );
+
+#remotehost,username,authuser,date,request,status,bytes,client,referer
+#12.34.56.78 - - [13/Mar/2008:07:38:53 +0100] "GET /creeper/image HTTP/1.1" 
200 252 "http://www.example.com/"; "Mozilla/5.0 (Windows; U; Windows NT 6.0; 
sv-SE; rv:1.8.1.12) Gecko/20080201 Firefox/2.0.0.12"
+
+my $row = each %$table;
+ok( '12.34.56.78'                 eq $row->{remotehost}, 'remotehost' );
+ok( '-'                           eq $row->{username},   'username' );
+ok( '-'                           eq $row->{authuser},   'authuser' );
+ok( '13/Mar/2008:07:38:53 +0100'  eq $row->{date},       'date' );
+ok( 'GET /creeper/image HTTP/1.1' eq $row->{request},    'request' );
+ok( '200'                         eq $row->{status},     'status' );
+ok( '252'                         eq $row->{bytes},      'bytes' );
+ok(
+'"Mozilla/5.0 (Windows; U; Windows NT 6.0; sv-SE; rv:1.8.1.12) Gecko/20080201 
Firefox/2.0.0.12"'
+      eq $row->{client},
+    'client ' . $row->{client}
+);
+ok( '"http://www.example.com/";' eq $row->{referer},
+    'referer: ' . $row->{referer} );
+
+#write test
+#TODO: looks like writing a weblog is broken
+#print STDERR "\n---\n";
+#print STDERR adExport( $table, 'Weblog', undef, {  } );
+#print STDERR "\n---\n";
+#ok(
+#    <<'HERE' eq adExport( $table, 'Weblog', undef, {  } ), 'export weblog 
format' );
+#HERE
+
+__END__
diff --git a/t/weblog.tbl b/t/weblog.tbl
new file mode 100644
index 0000000..54e379f
--- /dev/null
+++ b/t/weblog.tbl
@@ -0,0 +1 @@
+12.34.56.78 - - [13/Mar/2008:07:38:53 +0100] "GET /creeper/image HTTP/1.1" 200 
252 "http://www.example.com/"; "Mozilla/5.0 (Windows; U; Windows NT 6.0; sv-SE; 
rv:1.8.1.12) Gecko/20080201 Firefox/2.0.0.12"
-- 
1.7.10.4

Reply via email to