This is an automated email from the git hooks/post-receive script. js pushed a commit to branch master in repository libmarc-parser-raw-perl.
commit 799d2bcadc4ec8f1e4cebc8d29987d08a3e375ad Author: Johann Rolschewski <[email protected]> Date: Mon May 11 18:18:11 2015 +0200 major refactoring --- LICENSE | 6 ++--- README.md | 13 ++++++++++ lib/MARC/Parser/RAW.pm | 69 ++++++++++++++++++++++++++++++++------------------ t/01-parser.t | 48 ++++++++++++++++++++++------------- t/camel.mrc | 2 +- 5 files changed, 92 insertions(+), 46 deletions(-) diff --git a/LICENSE b/LICENSE index 48678db..932b9cf 100644 --- a/LICENSE +++ b/LICENSE @@ -1,4 +1,4 @@ -This software is copyright (c) 2015 by Johann Rolschewski. +This software is copyright (c) 2014- by Johann Rolschewski <[email protected]>. This is free software; you can redistribute it and/or modify it under the same terms as the Perl 5 programming language system itself. @@ -12,7 +12,7 @@ b) the "Artistic License" --- The GNU General Public License, Version 1, February 1989 --- -This software is Copyright (c) 2015 by Johann Rolschewski. +This software is Copyright (c) 2014- by Johann Rolschewski <[email protected]>. This is free software, licensed under: @@ -272,7 +272,7 @@ That's all there is to it! --- The Artistic License 1.0 --- -This software is Copyright (c) 2015 by Johann Rolschewski. +This software is Copyright (c) 2014- by Johann Rolschewski <[email protected]>. This is free software, licensed under: diff --git a/README.md b/README.md index a9e67ee..d275960 100644 --- a/README.md +++ b/README.md @@ -72,6 +72,19 @@ Deserialize a raw MARC record to an ARRAY of ARRAYs. Split MARC field string in individual components. +# AUTHOR + +Johann Rolschewski <[email protected]> + +# COPYRIGHT + +Copyright 2014- Johann Rolschewski + +# LICENSE + +This library is free software; you can redistribute it and/or modify +it under the same terms as Perl itself. + # SEEALSO [Catmandu](https://metacpan.org/pod/Catmandu), [Catmandu::Importer::MARC](https://metacpan.org/pod/Catmandu::Importer::MARC). diff --git a/lib/MARC/Parser/RAW.pm b/lib/MARC/Parser/RAW.pm index f20e6dc..718489f 100644 --- a/lib/MARC/Parser/RAW.pm +++ b/lib/MARC/Parser/RAW.pm @@ -7,6 +7,7 @@ use warnings; use charnames qw< :full >; use Carp qw(croak carp); use Encode qw(find_encoding); +use English; use Readonly; Readonly my $LEADER_LEN => 24; @@ -93,8 +94,7 @@ sub new { $file or croak "first argument must be a file or filehandle"; if ($encoding) { - find_encoding($encoding) - or croak "encoding \"$_[0]\" is not a valid encoding"; + find_encoding($encoding) or croak "encoding \"$_[0]\" not found"; } my $self = { @@ -130,25 +130,24 @@ Reads the next record from MARC input stream. Returns a Perl hash. sub next { my $self = shift; my $fh = $self->{fh}; - local $/ = $END_OF_RECORD; - if ( my $record = <$fh> ) { + local $INPUT_RECORD_SEPARATOR = $END_OF_RECORD; + if ( defined (my $raw = <$fh>) ) { $self->{rec_number}++; # remove illegal garbage that sometimes occurs between records - $record + $raw =~ s/^[\N{SPACE}\N{NUL}\N{LINE FEED}\N{CARRIAGE RETURN}\N{SUB}]+//; - return unless $record; + return unless $raw; - my $record = _decode($record); - if ( scalar @{$record} > 1 ) { - return $record; + + if ( my $marc = $self->_decode($raw) ) { + return $marc; + } + else { + return $self->next(); } - carp $record->[0] . $self->{rec_number}; - $self->next(); - } - else { - return; } + return; } =head2 _decode($record) @@ -158,34 +157,41 @@ Deserialize a raw MARC record to an ARRAY of ARRAYs. =cut sub _decode { - my $raw = shift; + my ( $self, $raw ) = @_; chop $raw; my ( $head, @fields ) = split $END_OF_FIELD, $raw; if ( !@fields ) { - return ["no fields found in record "]; + carp "no fields found in record " . $self->{rec_number}; + return; } # ToDO: better RegEX for leader - if ( $head !~ /(.{$LEADER_LEN})/cg ) { - return ["no record leader found in record "]; + my $leader; + if ( $head =~ /(.{$LEADER_LEN})/cg ) { + $leader = $1; + } + else { + carp "no valid record leader found in record " . $self->{rec_number}; + return; } - my $leader = $1; - my @tags = $head =~ /\G(\d{3})\d{9}/cg; + my @tags = $head =~ /\G(\d{3})\d{9}/cg; if ( scalar @tags != scalar @fields ) { - return ["different number of tags and fields in record "]; + carp "different number of tags and fields in record " + . $self->{rec_number}; + return; } if ( $head !~ /\G$/cg ) { - my $tail = $1 if $head =~ /(.*)/cg; - return ["incomplete directory entry in record "]; + carp "incomplete directory entry in record " . $self->{rec_number}; + return; } return [ [ 'LDR', undef, undef, '_', $leader ], - map [ shift(@tags), _field($_) ], + map [ shift(@tags), $self->_field($_) ], @fields ]; } @@ -197,7 +203,7 @@ Split MARC field string in individual components. =cut sub _field { - my ($field) = @_; + my ( $self, $field ) = @_; my @chunks = split( /$SUBFIELD_INDICATOR(.)/, $field ); return ( undef, undef, '_', @chunks ) if @chunks == 1; my @subfields; @@ -208,6 +214,19 @@ sub _field { return ( $indicator1, $indicator2, @subfields ); } +=head1 AUTHOR + +Johann Rolschewski E<lt>[email protected]<gt> + +=head1 COPYRIGHT + +Copyright 2014- Johann Rolschewski + +=head1 LICENSE + +This library is free software; you can redistribute it and/or modify +it under the same terms as Perl itself. + =head1 SEEALSO L<Catmandu>, L<Catmandu::Importer::MARC>. diff --git a/t/01-parser.t b/t/01-parser.t index 422f1b8..e047a34 100644 --- a/t/01-parser.t +++ b/t/01-parser.t @@ -4,13 +4,24 @@ use Test::More; use MARC::Parser::RAW; -my $failure = eval {MARC::Parser::RAW->new()}; -is( $failure, undef, 'croak missing argument'); +new_ok( 'MARC::Parser::RAW' => ['./t/camel.mrc'] ); +new_ok( 'MARC::Parser::RAW' => ['./t/camel.mrc', 'UTF-8'] ); +can_ok( 'MARC::Parser::RAW', qw{ next }); +my $failure = eval { MARC::Parser::RAW->new() }; +is( $failure, undef, 'croak missing argument' ); +$failure = eval { MARC::Parser::RAW->new('./t/camel.mrk') }; +is( $failure, undef, 'croak cannot find file'); +$failure = eval { MARC::Parser::RAW->new('./t/camel.mrc', 'XXX-0') }; +is( $failure, undef, 'croak unavailable encoding'); my $parser = MARC::Parser::RAW->new('./t/camel.mrc'); -isa_ok( $parser, 'MARC::Parser::RAW' ); my $record = $parser->next(); is_deeply( + $record->[0], + [ 'LDR', undef, undef, '_', '00755cam 22002414a 4500' ], + 'LDR' +); +is_deeply( $record->[1], [ '001', undef, undef, '_', 'fol05731351 ' ], 'first field' @@ -20,20 +31,23 @@ is_deeply( [ '020', ' ', ' ', 'a', '0471383147 (paper/cd-rom : alk. paper)' ], 'sixth field' ); -$record = $parser->next(); -is_deeply( - $record->[1], - [ '001', undef, undef, '_', 'fol05754809 ' ], - 'first field' -); -$parser = MARC::Parser::RAW->new('./t/camel.mrc', 'UTF-8'); -isa_ok( $parser, 'MARC::Parser::RAW' ); -$record = $parser->next(); -is_deeply( - $record->[1], - [ '001', undef, undef, '_', 'fol05731351 ' ], - 'first field' -); +{ + my @warnings; + local $SIG{__WARN__} = sub { + push @warnings, @_; + }; + my $record = $parser->next(); + is_deeply( + $record->[0], + [ 'LDR', undef, undef, '_', '00665nam 22002298a 4500' ], + 'skipped faulty records' + ); + is scalar(@warnings), 4, 'got warnings'; + like $warnings[0], qr{no fields found in record}, 'carp no fields found in record'; + like $warnings[1], qr{no valid record leader found in record}, 'carp no valid record leader found in record'; + like $warnings[2], qr{different number of tags and fields in record}, 'carp different number of tags and fields in record'; + like $warnings[3], qr{incomplete directory entry in record}, 'carp incomplete directory entry in record'; +} done_testing; \ No newline at end of file diff --git a/t/camel.mrc b/t/camel.mrc index 68d6dad..7127bd2 100644 --- a/t/camel.mrc +++ b/t/camel.mrc @@ -1 +1 @@ -00755cam 22002414a 4500001001300000003000600013005001700019008004100036010001700077020004300094040001800137042000800155050002600163082001700189100003100206245005400237260004200291300007200333500003300405650003700438630002500475630001300500fol05731351 IMchF20000613133448.0000107s2000 nyua 001 0 eng a 00020737 a0471383147 (paper/cd-rom : alk. paper) aDLCcDLCdDLC apcc00aQA76.73.P22bM33 200000a005.13/32211 aMartinsson, Tobias,d1976-10aActivePerl [...] \ No newline at end of file +00755cam 22002414a 4500001001300000003000600013005001700019008004100036010001700077020004300094040001800137042000800155050002600163082001700189100003100206245005400237260004200291300007200333500003300405650003700438630002500475630001300500fol05731351 IMchF20000613133448.0000107s2000 nyua 001 0 eng a 00020737 a0471383147 (paper/cd-rom : alk. paper) aDLCcDLCdDLC apcc00aQA76.73.P22bM33 200000a005.13/32211 aMartinsson, Tobias,d1976-10aActivePerl [...] \ No newline at end of file -- Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/pkg-perl/packages/libmarc-parser-raw-perl.git _______________________________________________ Pkg-perl-cvs-commits mailing list [email protected] http://lists.alioth.debian.org/cgi-bin/mailman/listinfo/pkg-perl-cvs-commits
