This is an automated email from the git hooks/post-receive script. js pushed a commit to branch master in repository libcatmandu-perl.
commit fe020d9ae79e12c199a1d8fc6e93250b936d6744 Author: Patrick Hochstenbach <patrick.hochstenb...@ugent.be> Date: Thu Feb 4 15:35:10 2016 +0100 Adding seperate TSV export Tab delimited files folow a bit different rules than CSV --- lib/Catmandu/Exporter/TSV.pm | 130 +++++++++++++++++++++++++++++++++++++++++++ lib/Catmandu/Importer/CSV.pm | 46 ++++++++++----- lib/Catmandu/Importer/TSV.pm | 120 +++++++++++++++++++++++++++++++++++++++ t/Catmandu-Exporter-TSV.t | 72 ++++++++++++++++++++++++ t/Catmandu-Importer-TSV.t | 47 ++++++++++++++++ 5 files changed, 401 insertions(+), 14 deletions(-) diff --git a/lib/Catmandu/Exporter/TSV.pm b/lib/Catmandu/Exporter/TSV.pm new file mode 100644 index 0000000..742f6f9 --- /dev/null +++ b/lib/Catmandu/Exporter/TSV.pm @@ -0,0 +1,130 @@ +package Catmandu::Exporter::TSV; + +use Catmandu::Sane; + +our $VERSION = '0.9505'; + +use Catmandu::Exporter::CSV; +use Moo; +use namespace::clean; + +with 'Catmandu::TabularExporter'; + +has csv => (is => 'lazy'); + +sub _build_csv { + my ($self) = @_; + my $csv = Catmandu::Exporter::CSV->new( + header => $self->header, + collect_fields => $self->collect_fields, + sep_char => "\t", + quote_char => undef, + escape_char => undef, + file => $self->file, + ); + $csv->{fields} = $self->fields; + $csv->{columns} = $self->columns; + $csv; +} + +sub add { + my ($self, $data) = @_; + $self->csv->add($data); +} + +1; + +__END__ + +=pod + +=head1 NAME + +Catmandu::Exporter::TSV - a tab-delimited TSV exporter + +=head1 SYNOPSIS + + # On the command line + + $ catmandu convert JSON to TSV --fields "id,title,year" < data.json + + # In a Perl script + + use Catmandu::Exporter::TSV; + + my $exporter = Catmandu::Exporter::TSV->new( + fix => 'myfix.txt', + quote_char => '"', + sep_char => ',', + escape_char => '"' , + always_quote => 1, + header => 1); + + $exporter->fields("f1,f2,f3"); + $exporter->fields([qw(f1 f2 f3)]); + + $exporter->add_many($arrayref); + $exporter->add_many($iterator); + $exporter->add_many(sub { }); + + $exporter->add($hashref); + + printf "exported %d objects\n" , $exporter->count; + +=head1 DESCRIPTION + +This C<Catmandu::Exporter> exports items as rows with tab-separated values +(TSV). A header line with field names will be included if option C<header> is +set. See L<Catmandu::TabularExporter> on how to configure the field mapping +and column names. Newlines and tabulator values in field values are escaped +as C<\n>, C<\r>, and C<\t>. + +=head1 CONFIGURATION + +=over + +=item file + +Write output to a local file given by its path or file handle. Alternatively a +scalar reference can be passed to write to a string and a code reference can be +used to write to a callback function. + +=item fh + +Write the output to an L<IO::Handle>. If not specified, +L<Catmandu::Util::io|Catmandu::Util/IO-functions> is used to create the output +handle from the C<file> argument or by using STDOUT. + +=item fix + +An ARRAY of one or more fixes or file scripts to be applied to exported items. + +=item encoding + +Binmode of the output stream C<fh>. Set to "C<:utf8>" by default. + +=item fields + +See L<Catmandu::TabularExporter>. + +=item columns + +See L<Catmandu::TabularExporter>. + +=item header + +Include a header line with column names. Enabled by default. + +=back + +=head1 METHODS + +See L<Catmandu::TabularExporter>, L<Catmandu::Exporter>, L<Catmandu::Addable>, +L<Catmandu::Fixable>, L<Catmandu::Counter>, and L<Catmandu::Logger> for a full +list of methods. + +=head1 SEE ALSO + +L<Catmandu::Importer::TSV> + +=cut diff --git a/lib/Catmandu/Importer/CSV.pm b/lib/Catmandu/Importer/CSV.pm index 6142fe3..496a922 100644 --- a/lib/Catmandu/Importer/CSV.pm +++ b/lib/Catmandu/Importer/CSV.pm @@ -51,13 +51,16 @@ sub _build_csv { sub generator { my ($self) = @_; sub { - state $fh = $self->fh; + state $line = 0; + state $fh = $self->fh; state $csv = do { if ($self->header) { if ($self->fields) { $self->csv->getline($fh); + $line++; } else { $self->_set_fields($self->csv->getline($fh)); + $line++; } } if ($self->fields) { @@ -69,6 +72,7 @@ sub generator { # generate field names if needed unless ($self->fields) { my $row = $csv->getline($fh) // return; + $line++; my $fields = [0 .. (@$row -1)]; $self->_set_fields($fields); $csv->column_names($fields); @@ -78,7 +82,16 @@ sub generator { } +{}, @$fields; } - $csv->getline_hr($fh); + my $rec = $csv->getline_hr($fh); + $line++; + + if (defined $rec || $csv->eof()) { + return $rec; + } + else { + my ($cde, $str, $pos) = $csv->error_diag (); + die "at line $line (byte $pos) found a Text::CSV parse error($cde) $str"; + } }; } @@ -94,29 +107,34 @@ Catmandu::Importer::CSV - Package that imports CSV data =head1 SYNOPSIS - use Catmandu::Importer::CSV; - - my $importer = Catmandu::Importer::CSV->new(file => "/foo/bar.csv"); - - my $n = $importer->each(sub { - my $hashref = $_[0]; - # ... - }); - -Convert CSV to other formats with the catmandu command line client: + # From the command line - # convert CSV file to JSON + # convert a CSV file to JSON catmandu convert CSV to JSON < journals.csv + # set column names if CSV file has no header line echo '12157,"The Journal of Headache and Pain",2193-1801' | \ catmandu convert CSV --header 0 --fields 'id,title,issn' to YAML + # set field separator and quote character echo '12157;$The Journal of Headache and Pain$;2193-1801' | \ catmandu convert CSV --header 0 --fields 'id,title,issn' --sep_char ';' --quote_char '$' to XLSX --file journal.xlsx + + # Or in a Perl script + + use Catmandu; + + my $importer = Catmandu->importer('CSV', file => "/foo/bar.csv"); + + my $n = $importer->each(sub { + my $hashref = $_[0]; + # ... + }); + =head1 DESCRIPTION -This L<Catmandu::Importer> imports comma-separated values (CSV). The object +The L<Catmandu::Importer> package imports comma-separated values (CSV). The object fields are read from the CSV header line or given via the C<fields> parameter. Strings in CSV are quoted by C<quote_char> and fields are separated by C<sep_char>. diff --git a/lib/Catmandu/Importer/TSV.pm b/lib/Catmandu/Importer/TSV.pm new file mode 100644 index 0000000..97833df --- /dev/null +++ b/lib/Catmandu/Importer/TSV.pm @@ -0,0 +1,120 @@ +package Catmandu::Importer::TSV; + +use Catmandu::Sane; + +our $VERSION = '0.9505'; + +use Catmandu::Importer::CSV; +use Moo; +use namespace::clean; + +with 'Catmandu::Importer'; + +has header => (is => 'ro', default => sub { 1 }); +has fields => ( + is => 'rwp', + coerce => sub { + my $fields = $_[0]; + if (ref $fields eq 'ARRAY') { return $fields } + if (ref $fields eq 'HASH') { return [sort keys %$fields] } + return [split ',', $fields]; + }, +); + +has csv => (is => 'lazy'); + +sub _build_csv { + my ($self) = @_; + my $csv = Catmandu::Importer::CSV->new( + header => $self->header, + sep_char => "\t", + quote_char => undef, + escape_char => undef, + file => $self->file, + ); + $csv->{fields} = $self->fields; + $csv; +} + +sub generator { + my ($self) = @_; + $self->csv->generator; +} + +1; + +__END__ +=pod + +=head1 NAME + +Catmandu::Importer::TSV - Package that imports tab-separated values + +=head1 SYNOPSIS + + # From the command line + + # convert a TSV file to JSON + catmandu convert TSV to JSON < journals.tab + + # Or in a Perl script + + use Catmandu; + + my $importer = Catmandu->importer('TSV', file => "/foo/bar.tab"); + + my $n = $importer->each(sub { + my $hashref = $_[0]; + # ... + }); + +=head1 DESCRIPTION + +The L<Catmandu::Importer> package imports tab-separated values (TSV). The object +fields are read from the TSV header line or given via the C<fields> parameter. + +=head1 CONFIGURATION + +=over + +=item file + +Read input from a local file given by its path. Alternatively a scalar +reference can be passed to read from a string. + +=item fh + +Read input from an L<IO::Handle>. If not specified, L<Catmandu::Util::io> is used to +create the input stream from the C<file> argument or by using STDIN. + +=item encoding + +Binmode of the input stream C<fh>. Set to C<:utf8> by default. + +=item fix + +An ARRAY of one or more fixes or file scripts to be applied to imported items. + +=item fields + +List of fields to be used as columns, given as array reference, comma-separated +string, or hash reference. If C<header> is C<0> and C<fields> is C<undef> the +fields will be named by column index ("0", "1", "2", ...). + +=item header + +Read fields from a header line with the column names, if set to C<1> (the +default). + +=back + +=head1 METHODS + +Every L<Catmandu::Importer> is a L<Catmandu::Iterable> all its methods are +inherited. The methods are not idempotent: CSV streams can only be read once. + +=head1 SEE ALSO + +L<Catmandu::Exporter::TSV> + +=cut diff --git a/t/Catmandu-Exporter-TSV.t b/t/Catmandu-Exporter-TSV.t new file mode 100644 index 0000000..3ec8cc4 --- /dev/null +++ b/t/Catmandu-Exporter-TSV.t @@ -0,0 +1,72 @@ +use strict; +use warnings; +use Test::More; +use Test::Exception; + +my $pkg; +BEGIN { + $pkg = 'Catmandu::Exporter::TSV'; + use_ok $pkg; +} +require_ok $pkg; + +my $data = [{'a' => 'moose', b => '1'}, {'a' => 'pony', b => '2'}, {'a' => 'shrimp', b => '3'}]; +my $out = ""; + +my $exporter = $pkg->new(file => \$out); +isa_ok $exporter, $pkg; + +$exporter->add_many($data); +$exporter->commit; + +my $tsv = <<EOF; +a\tb +moose\t1 +pony\t2 +shrimp\t3 +EOF + +is $out, $tsv, "TSV strings ok"; +is $exporter->count,3, "Count ok"; + +$data = [{b => '1'}, {'a' => 'pony', b => '2'}, {'a' => 'shrimp', b => '3'}]; +$out = ""; +$exporter = $pkg->new(file => \$out); +$exporter->add_many($data); +$exporter->commit; +$tsv = <<EOF; +b +1 +2 +3 +EOF +is $out, $tsv, "first record determines fields without collect"; + +$out = ""; +$exporter = $pkg->new(file => \$out, collect_fields => 1); +$exporter->add_many($data); +$exporter->commit; +$tsv = <<EOF; +a\tb +\t1 +pony\t2 +shrimp\t3 +EOF +is $out, $tsv, "collect field names"; + +$out = ""; +$exporter = $pkg->new(fields => 'a,x', columns => 'Longname,X', file => \$out ); +$exporter->add( { a => 'Hello', b => 'World' } ); +$tsv = "Longname\tX\nHello\t\n"; +is $out, $tsv, "custom column names"; + +$out=""; +my $fixer = Catmandu->fixer('if exists(foo) reject() end'); +my $importer = Catmandu->importer('JSON', file => 't/csv_test.json'); + +$exporter = $pkg->new(file => \$out); +$exporter->add_many($fixer->fix($importer)); +$tsv = "fob\ntest\n"; +is $out, $tsv, "custom column names as HASH with reject fix"; + +done_testing; diff --git a/t/Catmandu-Importer-TSV.t b/t/Catmandu-Importer-TSV.t new file mode 100644 index 0000000..c45f74f --- /dev/null +++ b/t/Catmandu-Importer-TSV.t @@ -0,0 +1,47 @@ +#!/usr/bin/env perl + +use strict; +use warnings; +use Test::More; +use Test::Exception; + +my $pkg; +BEGIN { + $pkg = 'Catmandu::Importer::TSV'; + use_ok $pkg; +} +require_ok $pkg; + +my $data = [ + {name=>'Patrick',age=>'44'}, + {name=>'Nicolas',age=>'39'}, +]; + +my $tsv = <<EOF; +name\tage +Patrick\t44 +Nicolas\t39 +EOF + +my $importer = $pkg->new(file => \$tsv); + +isa_ok $importer, $pkg; + +is_deeply $importer->to_array, $data; + +$data = [ + {0=>'Patrick',1=>'44'}, + {0=>'Nicolas',1=>'39'}, +]; + +$tsv = <<EOF; +Patrick\t44 +Nicolas\t39 +EOF + +$importer = $pkg->new(file => \$tsv, header => 0); + +is_deeply $importer->to_array, $data; + +done_testing; + -- Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/pkg-perl/packages/libcatmandu-perl.git _______________________________________________ Pkg-perl-cvs-commits mailing list Pkg-perl-cvs-commits@lists.alioth.debian.org http://lists.alioth.debian.org/cgi-bin/mailman/listinfo/pkg-perl-cvs-commits