This is an automated email from the git hooks/post-receive script. js pushed a commit to annotated tag v0.01 in repository libcatmandu-store-lucy-perl.
commit 4d1c3be573e399304e56f589c28f9c33e5ed44a3 Author: Nicolas Steenlant <[email protected]> Date: Wed Oct 17 13:22:37 2012 +0200 initial release --- .gitignore | 1 + Build.PL | 13 +- Changes | 4 +- LICENSE | 379 +++++++++++++++++++++++++++++++++++++++++ META.json | 55 ++++++ META.yml | 32 ++++ Makefile.PL | 20 +++ README | 4 - lib/Catmandu/Store/Lucy.pm | 183 +++++++++----------- lib/Catmandu/Store/Lucy/Bag.pm | 245 ++++++++++++++++++++++++++ t/00-load.t | 15 -- t/00.t | 88 ++++++++++ 12 files changed, 914 insertions(+), 125 deletions(-) diff --git a/.gitignore b/.gitignore index 60fe9f0..33b7a93 100644 --- a/.gitignore +++ b/.gitignore @@ -7,3 +7,4 @@ blib Build MANIFEST* MYMETA.* +Catmandu-Store-Lucy-* diff --git a/Build.PL b/Build.PL index ed9fd1f..a0e501d 100644 --- a/Build.PL +++ b/Build.PL @@ -8,18 +8,21 @@ my $builder = Module::Build->new( dist_author => [ 'Nicolas Steenlant <[email protected]>' ], - dist_version_from => 'lib/Catmandu/Store/Lucy.pm', build_requires => { 'Software::License' => 0, 'Test::Exception' => 0, 'Test::More' => 0, }, requires => { - 'perl' => '5.10.0', - 'Catmandu' => '0.1', - 'Lucy' => '0.30', - 'Moo' => '0.009011', + 'perl' => '5.10.1', + 'Catmandu' => '0.03', + 'Lucy' => '0.003', + 'Moo' => '1.00', }, + add_to_cleanup => [qw( + Catmandu-Store-Lucy-* + )], + create_makefile_pl => 'traditional', create_license => 1, ); diff --git a/Changes b/Changes index d0badc6..b934dcb 100644 --- a/Changes +++ b/Changes @@ -1,4 +1,4 @@ -Revision history for Catmandu-Store-MongoDB +Revision history for Catmandu-Store-Lucy -0.1 2012-05-04 +0.01 2012-10-17 - initial release diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..0bd7643 --- /dev/null +++ b/LICENSE @@ -0,0 +1,379 @@ +This software is copyright (c) 2012 by Nicolas Steenlant <[email protected]>. + +This is free software; you can redistribute it and/or modify it under +the same terms as the Perl 5 programming language system itself. + +Terms of the Perl programming language system itself + +a) the GNU General Public License as published by the Free + Software Foundation; either version 1, or (at your option) any + later version, or +b) the "Artistic License" + +--- The GNU General Public License, Version 1, February 1989 --- + +This software is Copyright (c) 2012 by Nicolas Steenlant <[email protected]>. + +This is free software, licensed under: + + The GNU General Public License, Version 1, February 1989 + + GNU GENERAL PUBLIC LICENSE + Version 1, February 1989 + + Copyright (C) 1989 Free Software Foundation, Inc. + 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The license agreements of most software companies try to keep users +at the mercy of those companies. By contrast, our General Public +License is intended to guarantee your freedom to share and change free +software--to make sure the software is free for all its users. The +General Public License applies to the Free Software Foundation's +software and to any other program whose authors commit to using it. +You can use it for your programs, too. + + When we speak of free software, we are referring to freedom, not +price. Specifically, the General Public License is designed to make +sure that you have the freedom to give away or sell copies of free +software, that you receive source code or can get it if you want it, +that you can change the software or use pieces of it in new free +programs; and that you know you can do these things. + + To protect your rights, we need to make restrictions that forbid +anyone to deny you these rights or to ask you to surrender the rights. +These restrictions translate to certain responsibilities for you if you +distribute copies of the software, or if you modify it. + + For example, if you distribute copies of a such a program, whether +gratis or for a fee, you must give the recipients all the rights that +you have. You must make sure that they, too, receive or can get the +source code. And you must tell them their rights. + + We protect your rights with two steps: (1) copyright the software, and +(2) offer you this license which gives you legal permission to copy, +distribute and/or modify the software. + + Also, for each author's protection and ours, we want to make certain +that everyone understands that there is no warranty for this free +software. If the software is modified by someone else and passed on, we +want its recipients to know that what they have is not the original, so +that any problems introduced by others will not reflect on the original +authors' reputations. + + The precise terms and conditions for copying, distribution and +modification follow. + + GNU GENERAL PUBLIC LICENSE + TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION + + 0. This License Agreement applies to any program or other work which +contains a notice placed by the copyright holder saying it may be +distributed under the terms of this General Public License. The +"Program", below, refers to any such program or work, and a "work based +on the Program" means either the Program or any work containing the +Program or a portion of it, either verbatim or with modifications. Each +licensee is addressed as "you". + + 1. You may copy and distribute verbatim copies of the Program's source +code as you receive it, in any medium, provided that you conspicuously and +appropriately publish on each copy an appropriate copyright notice and +disclaimer of warranty; keep intact all the notices that refer to this +General Public License and to the absence of any warranty; and give any +other recipients of the Program a copy of this General Public License +along with the Program. You may charge a fee for the physical act of +transferring a copy. + + 2. You may modify your copy or copies of the Program or any portion of +it, and copy and distribute such modifications under the terms of Paragraph +1 above, provided that you also do the following: + + a) cause the modified files to carry prominent notices stating that + you changed the files and the date of any change; and + + b) cause the whole of any work that you distribute or publish, that + in whole or in part contains the Program or any part thereof, either + with or without modifications, to be licensed at no charge to all + third parties under the terms of this General Public License (except + that you may choose to grant warranty protection to some or all + third parties, at your option). + + c) If the modified program normally reads commands interactively when + run, you must cause it, when started running for such interactive use + in the simplest and most usual way, to print or display an + announcement including an appropriate copyright notice and a notice + that there is no warranty (or else, saying that you provide a + warranty) and that users may redistribute the program under these + conditions, and telling the user how to view a copy of this General + Public License. + + d) You may charge a fee for the physical act of transferring a + copy, and you may at your option offer warranty protection in + exchange for a fee. + +Mere aggregation of another independent work with the Program (or its +derivative) on a volume of a storage or distribution medium does not bring +the other work under the scope of these terms. + + 3. You may copy and distribute the Program (or a portion or derivative of +it, under Paragraph 2) in object code or executable form under the terms of +Paragraphs 1 and 2 above provided that you also do one of the following: + + a) accompany it with the complete corresponding machine-readable + source code, which must be distributed under the terms of + Paragraphs 1 and 2 above; or, + + b) accompany it with a written offer, valid for at least three + years, to give any third party free (except for a nominal charge + for the cost of distribution) a complete machine-readable copy of the + corresponding source code, to be distributed under the terms of + Paragraphs 1 and 2 above; or, + + c) accompany it with the information you received as to where the + corresponding source code may be obtained. (This alternative is + allowed only for noncommercial distribution and only if you + received the program in object code or executable form alone.) + +Source code for a work means the preferred form of the work for making +modifications to it. For an executable file, complete source code means +all the source code for all modules it contains; but, as a special +exception, it need not include source code for modules which are standard +libraries that accompany the operating system on which the executable +file runs, or for standard header files or definitions files that +accompany that operating system. + + 4. You may not copy, modify, sublicense, distribute or transfer the +Program except as expressly provided under this General Public License. +Any attempt otherwise to copy, modify, sublicense, distribute or transfer +the Program is void, and will automatically terminate your rights to use +the Program under this License. However, parties who have received +copies, or rights to use copies, from you under this General Public +License will not have their licenses terminated so long as such parties +remain in full compliance. + + 5. By copying, distributing or modifying the Program (or any work based +on the Program) you indicate your acceptance of this license to do so, +and all its terms and conditions. + + 6. Each time you redistribute the Program (or any work based on the +Program), the recipient automatically receives a license from the original +licensor to copy, distribute or modify the Program subject to these +terms and conditions. You may not impose any further restrictions on the +recipients' exercise of the rights granted herein. + + 7. The Free Software Foundation may publish revised and/or new versions +of the General Public License from time to time. Such new versions will +be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + +Each version is given a distinguishing version number. If the Program +specifies a version number of the license which applies to it and "any +later version", you have the option of following the terms and conditions +either of that version or of any later version published by the Free +Software Foundation. If the Program does not specify a version number of +the license, you may choose any version ever published by the Free Software +Foundation. + + 8. If you wish to incorporate parts of the Program into other free +programs whose distribution conditions are different, write to the author +to ask for permission. For software which is copyrighted by the Free +Software Foundation, write to the Free Software Foundation; we sometimes +make exceptions for this. Our decision will be guided by the two goals +of preserving the free status of all derivatives of our free software and +of promoting the sharing and reuse of software generally. + + NO WARRANTY + + 9. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY +FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN +OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES +PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED +OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS +TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE +PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, +REPAIR OR CORRECTION. + + 10. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR +REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, +INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING +OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED +TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY +YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER +PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE +POSSIBILITY OF SUCH DAMAGES. + + END OF TERMS AND CONDITIONS + + Appendix: How to Apply These Terms to Your New Programs + + If you develop a new program, and you want it to be of the greatest +possible use to humanity, the best way to achieve this is to make it +free software which everyone can redistribute and change under these +terms. + + To do so, attach the following notices to the program. It is safest to +attach them to the start of each source file to most effectively convey +the exclusion of warranty; and each file should have at least the +"copyright" line and a pointer to where the full notice is found. + + <one line to give the program's name and a brief idea of what it does.> + Copyright (C) 19yy <name of author> + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 1, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301 USA + + +Also add information on how to contact you by electronic and paper mail. + +If the program is interactive, make it output a short notice like this +when it starts in an interactive mode: + + Gnomovision version 69, Copyright (C) 19xx name of author + Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. + This is free software, and you are welcome to redistribute it + under certain conditions; type `show c' for details. + +The hypothetical commands `show w' and `show c' should show the +appropriate parts of the General Public License. Of course, the +commands you use may be called something other than `show w' and `show +c'; they could even be mouse-clicks or menu items--whatever suits your +program. + +You should also get your employer (if you work as a programmer) or your +school, if any, to sign a "copyright disclaimer" for the program, if +necessary. Here a sample; alter the names: + + Yoyodyne, Inc., hereby disclaims all copyright interest in the + program `Gnomovision' (a program to direct compilers to make passes + at assemblers) written by James Hacker. + + <signature of Ty Coon>, 1 April 1989 + Ty Coon, President of Vice + +That's all there is to it! + + +--- The Artistic License 1.0 --- + +This software is Copyright (c) 2012 by Nicolas Steenlant <[email protected]>. + +This is free software, licensed under: + + The Artistic License 1.0 + +The Artistic License + +Preamble + +The intent of this document is to state the conditions under which a Package +may be copied, such that the Copyright Holder maintains some semblance of +artistic control over the development of the package, while giving the users of +the package the right to use and distribute the Package in a more-or-less +customary fashion, plus the right to make reasonable modifications. + +Definitions: + + - "Package" refers to the collection of files distributed by the Copyright + Holder, and derivatives of that collection of files created through + textual modification. + - "Standard Version" refers to such a Package if it has not been modified, + or has been modified in accordance with the wishes of the Copyright + Holder. + - "Copyright Holder" is whoever is named in the copyright or copyrights for + the package. + - "You" is you, if you're thinking about copying or distributing this Package. + - "Reasonable copying fee" is whatever you can justify on the basis of media + cost, duplication charges, time of people involved, and so on. (You will + not be required to justify it to the Copyright Holder, but only to the + computing community at large as a market that must bear the fee.) + - "Freely Available" means that no fee is charged for the item itself, though + there may be fees involved in handling the item. It also means that + recipients of the item may redistribute it under the same conditions they + received it. + +1. You may make and give away verbatim copies of the source form of the +Standard Version of this Package without restriction, provided that you +duplicate all of the original copyright notices and associated disclaimers. + +2. You may apply bug fixes, portability fixes and other modifications derived +from the Public Domain or from the Copyright Holder. A Package modified in such +a way shall still be considered the Standard Version. + +3. You may otherwise modify your copy of this Package in any way, provided that +you insert a prominent notice in each changed file stating how and when you +changed that file, and provided that you do at least ONE of the following: + + a) place your modifications in the Public Domain or otherwise make them + Freely Available, such as by posting said modifications to Usenet or an + equivalent medium, or placing the modifications on a major archive site + such as ftp.uu.net, or by allowing the Copyright Holder to include your + modifications in the Standard Version of the Package. + + b) use the modified Package only within your corporation or organization. + + c) rename any non-standard executables so the names do not conflict with + standard executables, which must also be provided, and provide a separate + manual page for each non-standard executable that clearly documents how it + differs from the Standard Version. + + d) make other distribution arrangements with the Copyright Holder. + +4. You may distribute the programs of this Package in object code or executable +form, provided that you do at least ONE of the following: + + a) distribute a Standard Version of the executables and library files, + together with instructions (in the manual page or equivalent) on where to + get the Standard Version. + + b) accompany the distribution with the machine-readable source of the Package + with your modifications. + + c) accompany any non-standard executables with their corresponding Standard + Version executables, giving the non-standard executables non-standard + names, and clearly documenting the differences in manual pages (or + equivalent), together with instructions on where to get the Standard + Version. + + d) make other distribution arrangements with the Copyright Holder. + +5. You may charge a reasonable copying fee for any distribution of this +Package. You may charge any fee you choose for support of this Package. You +may not charge a fee for this Package itself. However, you may distribute this +Package in aggregate with other (possibly commercial) programs as part of a +larger (possibly commercial) software distribution provided that you do not +advertise this Package as a product of your own. + +6. The scripts and library files supplied as input to or produced as output +from the programs of this Package do not automatically fall under the copyright +of this Package, but belong to whomever generated them, and may be sold +commercially, and may be aggregated with this Package. + +7. C or perl subroutines supplied by you and linked into this Package shall not +be considered part of this Package. + +8. The name of the Copyright Holder may not be used to endorse or promote +products derived from this software without specific prior written permission. + +9. THIS PACKAGE IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED +WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF +MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE. + +The End + diff --git a/META.json b/META.json new file mode 100644 index 0000000..af399f7 --- /dev/null +++ b/META.json @@ -0,0 +1,55 @@ +{ + "abstract" : "A searchable store backed by Lucy", + "author" : [ + "Nicolas Steenlant <[email protected]>" + ], + "dynamic_config" : 1, + "generated_by" : "Module::Build version 0.38, CPAN::Meta::Converter version 2.120921", + "license" : [ + "perl_5" + ], + "meta-spec" : { + "url" : "http://search.cpan.org/perldoc?CPAN::Meta::Spec", + "version" : "2" + }, + "name" : "Catmandu-Store-Lucy", + "prereqs" : { + "build" : { + "requires" : { + "Software::License" : "0", + "Test::Exception" : "0", + "Test::More" : "0" + } + }, + "configure" : { + "requires" : { + "Module::Build" : "0.38" + } + }, + "runtime" : { + "requires" : { + "Catmandu" : "0.03", + "Lucy" : "0.003", + "Moo" : "1.00", + "perl" : "v5.10.1" + } + } + }, + "provides" : { + "Catmandu::Store::Lucy" : { + "file" : "lib/Catmandu/Store/Lucy.pm", + "version" : "0.01" + }, + "Catmandu::Store::Lucy::Bag" : { + "file" : "lib/Catmandu/Store/Lucy/Bag.pm", + "version" : 0 + } + }, + "release_status" : "stable", + "resources" : { + "license" : [ + "http://dev.perl.org/licenses/" + ] + }, + "version" : "0.01" +} diff --git a/META.yml b/META.yml new file mode 100644 index 0000000..6cf84f5 --- /dev/null +++ b/META.yml @@ -0,0 +1,32 @@ +--- +abstract: 'A searchable store backed by Lucy' +author: + - 'Nicolas Steenlant <[email protected]>' +build_requires: + Software::License: 0 + Test::Exception: 0 + Test::More: 0 +configure_requires: + Module::Build: 0.38 +dynamic_config: 1 +generated_by: 'Module::Build version 0.38, CPAN::Meta::Converter version 2.120921' +license: perl +meta-spec: + url: http://module-build.sourceforge.net/META-spec-v1.4.html + version: 1.4 +name: Catmandu-Store-Lucy +provides: + Catmandu::Store::Lucy: + file: lib/Catmandu/Store/Lucy.pm + version: 0.01 + Catmandu::Store::Lucy::Bag: + file: lib/Catmandu/Store/Lucy/Bag.pm + version: 0 +requires: + Catmandu: 0.03 + Lucy: 0.003 + Moo: 1.00 + perl: v5.10.1 +resources: + license: http://dev.perl.org/licenses/ +version: 0.01 diff --git a/Makefile.PL b/Makefile.PL new file mode 100644 index 0000000..6d7592c --- /dev/null +++ b/Makefile.PL @@ -0,0 +1,20 @@ +# Note: this file was auto-generated by Module::Build::Compat version 0.3800 +require 5.010001; +use ExtUtils::MakeMaker; +WriteMakefile +( + 'NAME' => 'Catmandu::Store::Lucy', + 'VERSION_FROM' => 'lib/Catmandu/Store/Lucy.pm', + 'PREREQ_PM' => { + 'Catmandu' => '0.03', + 'Lucy' => '0.003', + 'Moo' => '1.00', + 'Software::License' => 0, + 'Test::Exception' => 0, + 'Test::More' => 0 + }, + 'INSTALLDIRS' => 'site', + 'EXE_FILES' => [], + 'PL_FILES' => {} +) +; diff --git a/README b/README index dd0289f..d54c61e 100644 --- a/README +++ b/README @@ -1,9 +1,5 @@ Catmandu-Store-Lucy -WARNING - -This module isn't finished and WILL NOT WORK! - INSTALLATION To install this module, run the following commands: diff --git a/lib/Catmandu/Store/Lucy.pm b/lib/Catmandu/Store/Lucy.pm index 6e6e60a..0d8774f 100644 --- a/lib/Catmandu/Store/Lucy.pm +++ b/lib/Catmandu/Store/Lucy.pm @@ -1,19 +1,73 @@ -package Catmandu::Index::Lucy; +package Catmandu::Store::Lucy; + use Catmandu::Sane; -use Catmandu::Util qw(is_able check_id); +use Moo; use Lucy::Plan::Schema; +use Lucy::Plan::StringType; use Lucy::Plan::FullTextType; use Lucy::Analysis::PolyAnalyzer; use Lucy::Index::Indexer; use Lucy::Search::IndexSearcher; -use Catmandu::Hits; -use Catmandu::Object - path => 'r', - _analyzer => { default => '_build_analyzer' }, - _ft_field_type => { default => '_build_ft_field_type' }, - _schema => { default => '_build_schema' }, - _indexer => { default => '_build_indexer' }, - _searcher => { default => '_build_searcher' }; +use Data::MessagePack; +use Catmandu::Store::Lucy::Bag; + +with 'Catmandu::Store'; + +=head1 NAME + +Catmandu::Store::Lucy - A searchable store backed by Lucy + +=head1 VERSION + +Version 0.01 + +=cut + +our $VERSION = '0.01'; + +=head1 SYNOPSIS + + use Catmandu::Store::Lucy; + + my $store = Catmandu::Store::Lucy->new(path => '/path/to/index/'); + + my $book = $store->bag->add({ title => 'Advanced Perl' }); + + printf "book stored as %s\n", $book->{_id}; + + $store->bag->commit; + + $bag->get($id); + + # all bags are iterators + $bag->each(sub { ... }); + $bag->take(10)->each(sub { ... }); + + my $hits = $bag->search(query => 'perl'); + + # hits is an iterator + $hits->each(sub { + say $_[0]->{title}; + }); + + $bag->delete($id); + $bag->delete_by_query(query => 'perl'); + $bag->delete_all; + $bag->commit; + +=cut + +has path => (is => 'ro', required => 1); + +for my $attr (qw(analyzer ft_field_type schema)) { + has "_$attr" => (is => 'ro', lazy => 1, builder => "_build_$attr"); +} + +for my $attr (qw(indexer searcher)) { + has "_$attr" => (is => 'ro', lazy => 1, builder => "_build_$attr", clearer => 1, predicate => 1); +} + +sub _messagepack { state $_messagepack = Data::MessagePack->new->utf8 } sub _build_analyzer { Lucy::Analysis::PolyAnalyzer->new(language => 'en'); @@ -21,13 +75,15 @@ sub _build_analyzer { sub _build_ft_field_type { my $self = $_[0]; - Lucy::Plan::FullTextType->new(analyzer => $self->_analyzer); + Lucy::Plan::FullTextType->new(analyzer => $self->_analyzer, stored => 0); } sub _build_schema { my $self = $_[0]; my $schema = Lucy::Plan::Schema->new; - $schema->spec_field(name => '_id', type => Lucy::Plan::StringType->new); + $schema->spec_field(name => '_id', type => Lucy::Plan::StringType->new(stored => 1, sortable => 1)); + $schema->spec_field(name => '_bag', type => Lucy::Plan::StringType->new(stored => 0)); + $schema->spec_field(name => '_data', type => Lucy::Plan::BlobType->new(stored => 1)); $schema; } @@ -41,103 +97,32 @@ sub _build_searcher { Lucy::Search::IndexSearcher->new(index => $self->path); } -sub _add { - my ($self, $obj) = @_; - check_id($obj); - my $type = $self->_ft_field_type; - my $schema = $self->_schema; - for my $name (keys %$obj) { - $schema->spec_field(name => $name, type => $type) if $name ne '_id'; - } - $self->_indexer->add_doc($obj); - $obj; -} +sub _commit { + my ($self) = @_; -sub add { - my ($self, $obj) = @_; - if (is_able $obj, 'each') { - $obj->each(sub { $self->_add($_[0]) }); - } else { - $self->_add($obj); + if ($self->_has_indexer) { + $self->_indexer->commit; + $self->_clear_indexer; + $self->_clear_searcher; } } -sub search { - my ($self, $query, %opts) = @_; +=head1 SEE ALSO - $opts{limit} ||= 50; - $opts{start} //= 0; +L<Catmandu::Store> - if (ref $query eq 'HASH') { - $query = Lucy::Search::ANDQuery->new( - children => [ map { - Lucy::Search::TermQuery->new(field => $_, term => $query->{$_}); - } keys %$query ], - ); - } +=head1 AUTHOR - my $hits = $self->_searcher->hits( - query => $query, - num_wanted => $opts{limit}, - offset => $opts{start}, - ); - - my $objs = []; - - if (my $store = $opts{reify}) { - while (my $hit = $hits->next) { - push @$objs, $store->get($hit->{_id}); - } - } else { - while (my $hit = $hits->next) { - push @$objs, $hit->get_fields; - } - } +Nicolas Steenlant, C<< <nicolas.steenlant at ugent.be> >> - Catmandu::Hits->new({ - limit => $opts{limit}, - start => $opts{start}, - total => $hits->total, - hits => $objs, - }); -} +=head1 LICENSE AND COPYRIGHT -sub delete { - my ($self, $id) = @_; - $self->_indexer->delete_by_term(field => '_id', term => check_id($id)); - return; -} +This program is free software; you can redistribute it and/or modify it +under the terms of either: the GNU General Public License as published +by the Free Software Foundation; or the Artistic License. -sub delete_where { - my ($self, $query) = @_; +See http://dev.perl.org/licenses/ for more information. - if (! ref $query) { - $query = Lucy::Search::QueryParser->new(schema => $self->_schema)->parse($query); - } elsif (ref $query eq 'HASH') { - my $terms = [ map { - Lucy::Search::TermQuery->new(field => $_, term => $query->{$_}); - } keys %$query ]; - $query = Lucy::Search::ANDQuery->new(children => $terms); - } - - $self->_indexer->delete_by_query($query); - return; -} - -sub delete_all { - my ($self) = @_; - $self->delete_where(Lucy::Search::MatchAllQuery->new); - return; -} - -sub commit { # TODO optimize - my ($self) = @_; - - if ($self->{_indexer}) { - $self->{_indexer}->commit; - delete $self->{_indexer}; - delete $self->{_searcher}; - } -} +=cut 1; diff --git a/lib/Catmandu/Store/Lucy/Bag.pm b/lib/Catmandu/Store/Lucy/Bag.pm new file mode 100644 index 0000000..7ab740e --- /dev/null +++ b/lib/Catmandu/Store/Lucy/Bag.pm @@ -0,0 +1,245 @@ +package Catmandu::Store::Lucy::Bag; + +use Catmandu::Sane; +use Moo; +use Catmandu::Hits; +use Lucy::Search::ANDQuery; +use Lucy::Search::TermQuery; +use Lucy::Search::QueryParser; +use Lucy::Search::SortSpec; +use Lucy::Search::SortRule; + +with 'Catmandu::Bag'; +with 'Catmandu::Searchable'; + +has _bag_query => (is => 'ro', lazy => 1, builder => '_build_bag_query'); + +sub _build_bag_query { Lucy::Search::TermQuery->new(field => '_bag', term => $_[0]->name) } + +sub _searcher { + my ($self) = @_; + eval { + $self->store->_searcher; + } or do { + my $e = $@; die $e if $e !~ /index doesn't seem to contain any data/i; + }; +} + +sub generator { + my ($self) = @_; + sub { + state $searcher = $self->_searcher || return; + state $messagepack = $self->store->_messagepack; + state $start = 0; + state $limit = 100; + state $hits; + + my $hit; + unless ($hits and $hit = $hits->next) { + $hits = $searcher->hits(query => $self->_bag_query, num_wanted => $limit, offset => $start); + $start += $limit; + $hit = $hits->next || return; + } + $messagepack->unpack($hit->{_data}); + }; +} + +sub count { + my ($self) = @_; + my $searcher = $self->_searcher || return 0; + $searcher->hits( + query => $self->_bag_query, + num_wanted => 0, + )->total_hits; +} + +sub get { + my ($self, $id) = @_; + my $searcher = $self->_searcher || return; + my $hits = $searcher->hits( + query => Lucy::Search::ANDQuery->new(children => [ + Lucy::Search::TermQuery->new(field => '_id', term => $id), + $self->_bag_query, + ]), + num_wanted => 1, + ); + $hits->total_hits || return; + $self->store->_messagepack->unpack($hits->next->{_data}); +} + +sub add { + my ($self, $data) = @_; + + my $store = $self->store; + my $bag = $self->name; + my $data_blob = $store->_messagepack->pack($data); + + $data = $self->_flatten_data($data); + + my $type = $store->_ft_field_type; + my $schema = $store->_schema; + for my $key (keys %$data) { + next if $key eq '_id'; + $schema->spec_field(name => $key, type => $type); + } + + $data->{_data} = $data_blob; + $data->{_bag} = $bag; + $store->_indexer->add_doc($data); + $data; +} + +sub commit { + my ($self) = @_; + $self->store->_commit; +} + +sub search { + my ($self, %args) = @_; + + my $start = delete $args{start}; + my $limit = delete $args{limit}; + my $sort = delete $args{sort}; + my $bag = delete $args{reify}; + + if ($sort) { + $args{sort_spec} = $sort; + } + + my $searcher = $self->_searcher || return Catmandu::Hits->new( + start => $start, + limit => $limit, + total => 0, + hits => [], + ); + + my $lucy_hits = $searcher->hits( + %args, + num_wanted => $limit, + offset => $start, + ); + + my $hits = []; + + if ($bag) { + while (my $hit = $lucy_hits->next) { + push @$hits, $bag->get($hit->{_id}); + } + } else { + while (my $hit = $lucy_hits->next) { + push @$hits, $self->store->_messagepack->unpack($hit->{_data}); + } + } + + Catmandu::Hits->new( + start => $start, + limit => $limit, + total => $lucy_hits->total_hits, + hits => $hits, + ); +} + +sub searcher { + confess 'TODO'; +} + +sub delete { + my ($self, $id) = @_; + $self->store->_indexer->delete_by_query(Lucy::Search::ANDQuery->new(children => [ + Lucy::Search::TermQuery->new(field => '_id', term => $id), + $self->_bag_query, + ])); +} + +sub delete_all { + my ($self) = @_; + $self->store->_indexer->delete_by_query($self->_bag_query); +} + +sub delete_by_query { + my ($self, %args) = @_; + $self->store->_indexer->delete_by_query($args{query}); + +} + +sub translate_sru_sortkeys { # TODO score, cql mapping + my ($self, $sortkeys) = @_; + my $rules = []; + for my $sortkey (split /\s+/, $sortkeys) { + my ($field, $schema, $asc) = split /,/, $sortkey; + $field || next; + if ($field eq 'relevance') { + push @$rules, Lucy::Search::SortRule->new(type => 'score', reverse => $asc ? 1 : 0); + } else { + push @$rules, Lucy::Search::SortRule->new(type => 'field', field => $field, reverse => $asc ? 0 : 1); + } + } + Lucy::Search::SortSpec->new(rules => $rules); +} + +sub translate_cql_query { + confess 'TODO'; +} + +sub normalize_query { + my ($self, $query) = @_; + if (!defined $query) { + return $self->_bag_query; + } + if (ref $query) { + return Lucy::Search::ANDQuery->new(children => [ + $self->_bag_query, + $query, + ]); + } + Lucy::Search::ANDQuery->new(children => [ + $self->_bag_query, + Lucy::Search::QueryParser->new(default_boolop => 'AND', schema => $self->store->_schema)->parse($query), + ]); +} + +sub _flatten_data { + my ($self, $data) = @_; + + my $flat = {}; + + my @ref_stack = ($data); + my @key_stack; + while (@ref_stack) { + my $ref = shift @ref_stack; + my $key = shift @key_stack; + + if (ref $ref eq 'ARRAY') { + for my $val (@$ref) { + if (ref $val) { + push @key_stack, $key; + push @ref_stack, $val; + } elsif (defined $val) { + $flat->{$key} = $val; + } + } + next; + } + + for my $k (keys %$ref) { + my $val = $ref->{$k}; + $k = "$key.$k" if defined $key; + if (ref $val) { + push @key_stack, $k; + push @ref_stack, $val; + } elsif (defined $val) { + $flat->{$k} = $val; + } + } + } + + $flat; +} + +=head1 SEE ALSO + +L<Catmandu::Bag>, L<Catmandu::Searchable> + +=cut + +1; diff --git a/t/00-load.t b/t/00-load.t deleted file mode 100644 index 95c012d..0000000 --- a/t/00-load.t +++ /dev/null @@ -1,15 +0,0 @@ -#!perl -T - -use strict; -use warnings; -use Test::More; - -my $pkg; -BEGIN { - $pkg = 'Catmandu::Store::Lucy'; - use_ok $pkg; -} - -require_ok $pkg; - -done_testing 2; diff --git a/t/00.t b/t/00.t new file mode 100644 index 0000000..8f94b7c --- /dev/null +++ b/t/00.t @@ -0,0 +1,88 @@ +#!perl -T + +use strict; +use warnings; +use Test::More; +use File::Temp (); + +my $pkg; +BEGIN { + $pkg = 'Catmandu::Store::Lucy'; + use_ok $pkg; +} + +require_ok $pkg; + +my $index_path = File::Temp->newdir; + +note "Index path is $index_path"; + +my $store = $pkg->new(path => $index_path); + +isa_ok $store, $pkg; + +is $store->path, $index_path; + +my $bag = $store->bag; + +isa_ok $bag, "${pkg}::Bag"; + +is_deeply $bag->_flatten_data({ + foo => 'foo', + bar => [['bar']], + baz => {baz=>[{'boz' => 'boz'},'baz']}, + fob => 'fob', + foz => ['faz', 'foz'], +}), { + 'foo' => 'foo', + 'bar' => 'bar', + 'baz.baz.boz' => 'boz', + 'baz.baz' => 'baz', + 'fob' => 'fob', + 'foz' => 'foz', +}; + +my $data = $bag->add({lang => 'Perl'}); +$bag->add({lang => 'Ruby'}); +$bag->add({lang => 'Perl'}); + +is $bag->count, 0; + +$bag->commit; + +is $bag->count, 3; + +is_deeply $bag->get($data->{_id}), $data; +is $bag->get('?'), undef; + +my $hits = $bag->search; + +isa_ok $hits, 'Catmandu::Hits'; + +is $hits->total, 3; + +$hits = $bag->search(query => 'ruby'); + +is $hits->total, 1; + +my @ids = sort @{$bag->pluck('_id')->to_array}; + +is $ids[0], $store->bag->search(sru_sortkeys => "_id,,1")->first->{_id}; +is $ids[-1], $store->bag->search(sru_sortkeys => "_id,,0")->first->{_id}; + +$bag->delete($data->{_id}); +$bag->commit; + +is $bag->count, 2; + +$bag->delete_by_query(query => 'ruby'); +$bag->commit; + +is $bag->count, 1; + +$bag->delete_all; +$bag->commit; + +is $bag->count, 0; + +done_testing 18; -- Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/pkg-perl/packages/libcatmandu-store-lucy-perl.git _______________________________________________ Pkg-perl-cvs-commits mailing list [email protected] http://lists.alioth.debian.org/cgi-bin/mailman/listinfo/pkg-perl-cvs-commits
