Convert t/221-sort_writer.t to C Original commit by Nick Wellnhofer, amended by Marvin Humphrey to replace "cnick" with "nickname".
Project: http://git-wip-us.apache.org/repos/asf/lucy/repo Commit: http://git-wip-us.apache.org/repos/asf/lucy/commit/be4e1833 Tree: http://git-wip-us.apache.org/repos/asf/lucy/tree/be4e1833 Diff: http://git-wip-us.apache.org/repos/asf/lucy/diff/be4e1833 Branch: refs/heads/master Commit: be4e1833868fd8cca5cba9883d1bcd1cab32f2b4 Parents: d96ef18 Author: Nick Wellnhofer <[email protected]> Authored: Sat Sep 28 15:56:26 2013 +0200 Committer: Marvin Humphrey <[email protected]> Committed: Tue Jul 1 12:09:44 2014 -0700 ---------------------------------------------------------------------- core/Lucy/Test.c | 2 + core/Lucy/Test/Index/TestSortWriter.c | 315 +++++++++++++++++++++++++++ core/Lucy/Test/Index/TestSortWriter.cfh | 43 ++++ perl/t/221-sort_writer.t | 174 --------------- perl/t/core/224-sort_writer.t | 23 ++ 5 files changed, 383 insertions(+), 174 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/lucy/blob/be4e1833/core/Lucy/Test.c ---------------------------------------------------------------------- diff --git a/core/Lucy/Test.c b/core/Lucy/Test.c index 5923e98..62c9bdf 100644 --- a/core/Lucy/Test.c +++ b/core/Lucy/Test.c @@ -40,6 +40,7 @@ #include "Lucy/Test/Index/TestSegWriter.h" #include "Lucy/Test/Index/TestSegment.h" #include "Lucy/Test/Index/TestSnapshot.h" +#include "Lucy/Test/Index/TestSortWriter.h" #include "Lucy/Test/Index/TestTermInfo.h" #include "Lucy/Test/Object/TestBitVector.h" #include "Lucy/Test/Object/TestI32Array.h" @@ -121,6 +122,7 @@ Test_create_test_suite() { TestSuite_Add_Batch(suite, (TestBatch*)TestHLWriter_new()); TestSuite_Add_Batch(suite, (TestBatch*)TestPListWriter_new()); TestSuite_Add_Batch(suite, (TestBatch*)TestSegWriter_new()); + TestSuite_Add_Batch(suite, (TestBatch*)TestSortWriter_new()); TestSuite_Add_Batch(suite, (TestBatch*)TestPolyReader_new()); TestSuite_Add_Batch(suite, (TestBatch*)TestFullTextType_new()); TestSuite_Add_Batch(suite, (TestBatch*)TestBlobType_new()); http://git-wip-us.apache.org/repos/asf/lucy/blob/be4e1833/core/Lucy/Test/Index/TestSortWriter.c ---------------------------------------------------------------------- diff --git a/core/Lucy/Test/Index/TestSortWriter.c b/core/Lucy/Test/Index/TestSortWriter.c new file mode 100644 index 0000000..761572f --- /dev/null +++ b/core/Lucy/Test/Index/TestSortWriter.c @@ -0,0 +1,315 @@ +/* Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#define TESTLUCY_USE_SHORT_NAMES +#include "Lucy/Util/ToolSet.h" + +#include "Lucy/Test/Index/TestSortWriter.h" + +#include "Clownfish/TestHarness/TestBatchRunner.h" +#include "Lucy/Analysis/StandardTokenizer.h" +#include "Lucy/Document/Doc.h" +#include "Lucy/Document/HitDoc.h" +#include "Lucy/Index/DocReader.h" +#include "Lucy/Index/Indexer.h" +#include "Lucy/Index/IndexManager.h" +#include "Lucy/Index/PolyReader.h" +#include "Lucy/Index/Segment.h" +#include "Lucy/Index/SegReader.h" +#include "Lucy/Index/SortCache.h" +#include "Lucy/Index/SortReader.h" +#include "Lucy/Index/SortWriter.h" +#include "Lucy/Plan/FullTextType.h" +#include "Lucy/Plan/Schema.h" +#include "Lucy/Plan/StringType.h" +#include "Lucy/Store/RAMFolder.h" + +static String *name_str; +static String *speed_str; +static String *weight_str; +static String *home_str; +static String *cat_str; +static String *wheels_str; +static String *unused_str; +static String *nope_str; + +TestSortWriter* +TestSortWriter_new() { + return (TestSortWriter*)VTable_Make_Obj(TESTSORTWRITER); +} + +static void +S_init_strings() { + name_str = Str_newf("name"); + speed_str = Str_newf("speed"); + weight_str = Str_newf("weight"); + home_str = Str_newf("home"); + cat_str = Str_newf("cat"); + wheels_str = Str_newf("wheels"); + unused_str = Str_newf("unused"); + nope_str = Str_newf("nope"); +} + +static void +S_destroy_strings() { + DECREF(name_str); + DECREF(speed_str); + DECREF(weight_str); + DECREF(home_str); + DECREF(cat_str); + DECREF(wheels_str); + DECREF(unused_str); + DECREF(nope_str); +} + +static Schema* +S_create_schema() { + Schema *schema = Schema_new(); + + StandardTokenizer *tokenizer = StandardTokenizer_new(); + FullTextType *full_text_type = FullTextType_new((Analyzer*)tokenizer); + FullTextType_Set_Sortable(full_text_type, true); + + StringType *string_type = StringType_new(); + StringType_Set_Sortable(string_type, true); + + StringType *unsortable = StringType_new(); + + Schema_Spec_Field(schema, name_str, (FieldType*)full_text_type); + Schema_Spec_Field(schema, speed_str, (FieldType*)string_type); + Schema_Spec_Field(schema, weight_str, (FieldType*)string_type); + Schema_Spec_Field(schema, home_str, (FieldType*)string_type); + Schema_Spec_Field(schema, cat_str, (FieldType*)string_type); + Schema_Spec_Field(schema, wheels_str, (FieldType*)string_type); + Schema_Spec_Field(schema, unused_str, (FieldType*)string_type); + Schema_Spec_Field(schema, nope_str, (FieldType*)unsortable); + + DECREF(unsortable); + DECREF(string_type); + DECREF(full_text_type); + DECREF(tokenizer); + + return schema; +} + +static void +S_store_field(Doc *doc, String *field, const char *value) { + if (value) { + StackString *string = SSTR_WRAP_UTF8(value, strlen(value)); + Doc_Store(doc, field, (Obj*)string); + } +} + +static void +S_add_doc(Indexer *indexer, const char *name, const char *speed, + const char *weight, const char *home, const char *wheels, + const char *nope) { + Doc *doc = Doc_new(NULL, 0); + + S_store_field(doc, name_str, name); + S_store_field(doc, speed_str, speed); + S_store_field(doc, weight_str, weight); + S_store_field(doc, home_str, home); + S_store_field(doc, cat_str, "vehicle"); + S_store_field(doc, wheels_str, wheels); + S_store_field(doc, nope_str, nope); + + Indexer_Add_Doc(indexer, doc, 1.0f); + + DECREF(doc); +} + +static void +S_test_sort_cache(TestBatchRunner *runner, RAMFolder *folder, + SegReader *seg_reader, const char *gen, bool is_used, + String *field) { + Segment *segment = SegReader_Get_Segment(seg_reader); + int32_t field_num = Seg_Field_Num(segment, field); + String *filename = Str_newf("seg_%s/sort-%i32.ord", gen, field_num); + if (is_used) { + TEST_TRUE(runner, RAMFolder_Exists(folder, filename), + "sort files written for %s", Str_Get_Ptr8(field)); + } + else { + TEST_TRUE(runner, !RAMFolder_Exists(folder, filename), + "no sort files written for %s", Str_Get_Ptr8(field)); + } + DECREF(filename); + + if (!is_used) { return; } + + SortReader *sort_reader + = (SortReader*)SegReader_Obtain(seg_reader, + VTable_Get_Name(SORTREADER)); + DocReader *doc_reader + = (DocReader*)SegReader_Obtain(seg_reader, VTable_Get_Name(DOCREADER)); + SortCache *sort_cache + = SortReader_Fetch_Sort_Cache(sort_reader, field); + + int32_t doc_max = SegReader_Doc_Max(seg_reader); + for (int32_t doc_id = 1; doc_id <= doc_max; ++doc_id) { + int32_t ord = SortCache_Ordinal(sort_cache, doc_id); + Obj *cache_value = SortCache_Value(sort_cache, ord); + HitDoc *doc = DocReader_Fetch_Doc(doc_reader, doc_id); + Obj *doc_value = HitDoc_Extract(doc, field); + + bool is_equal; + if (cache_value == NULL || doc_value == NULL) { + is_equal = (cache_value == doc_value); + } + else { + is_equal = Obj_Equals(cache_value, doc_value); + } + TEST_TRUE(runner, is_equal, "correct cached value field %s doc %d", + Str_Get_Ptr8(field), doc_id); + + DECREF(doc_value); + DECREF(doc); + DECREF(cache_value); + } +} + +static void +test_sort_writer(TestBatchRunner *runner) { + Schema *schema = S_create_schema(); + RAMFolder *folder = RAMFolder_new(NULL); + + { + // Add vehicles. + Indexer *indexer = Indexer_new(schema, (Obj*)folder, NULL, 0); + + S_add_doc(indexer, "airplane", "0200", "8000", "air", "3", "nyet"); + S_add_doc(indexer, "bike", "0015", "0025", "land", "2", NULL); + S_add_doc(indexer, "car", "0070", "3000", "land", "4", NULL); + + Indexer_Commit(indexer); + DECREF(indexer); + } + + { + PolyReader *poly_reader = PolyReader_open((Obj*)folder, NULL, NULL); + VArray *seg_readers = PolyReader_Get_Seg_Readers(poly_reader); + SegReader *seg_reader = (SegReader*)VA_Fetch(seg_readers, 0); + + S_test_sort_cache(runner, folder, seg_reader, "1", true, name_str); + S_test_sort_cache(runner, folder, seg_reader, "1", true, speed_str); + S_test_sort_cache(runner, folder, seg_reader, "1", true, weight_str); + S_test_sort_cache(runner, folder, seg_reader, "1", true, home_str); + S_test_sort_cache(runner, folder, seg_reader, "1", true, cat_str); + S_test_sort_cache(runner, folder, seg_reader, "1", true, wheels_str); + S_test_sort_cache(runner, folder, seg_reader, "1", false, unused_str); + S_test_sort_cache(runner, folder, seg_reader, "1", false, nope_str); + + DECREF(poly_reader); + } + + { + // Add a second segment. + NonMergingIndexManager *manager = NMIxManager_new(); + Indexer *indexer + = Indexer_new(schema, (Obj*)folder, (IndexManager*)manager, 0); + // no "wheels" field -- test NULL/undef + S_add_doc(indexer, "dirigible", "0040", "0000", "air", NULL, NULL); + Indexer_Commit(indexer); + DECREF(indexer); + DECREF(manager); + } + + { + // Consolidate everything, to test merging. + Indexer *indexer = Indexer_new(schema, (Obj*)folder, NULL, 0); + StackString *bike_str = SSTR_WRAP_UTF8("bike", 4); + Indexer_Delete_By_Term(indexer, name_str, (Obj*)bike_str); + // no "wheels" field -- test NULL/undef + S_add_doc(indexer, "elephant", "0020", "6000", "land", NULL, NULL); + Indexer_Optimize(indexer); + Indexer_Commit(indexer); + DECREF(indexer); + } + + { + VArray *filenames = RAMFolder_List_R(folder, NULL); + int num_old_seg_files = 0; + for (uint32_t i = 0, size = VA_Get_Size(filenames); i < size; ++i) { + String *filename = (String*)VA_Fetch(filenames, i); + if (Str_Find_Utf8(filename, "seg_1", 5) >= 0 + || Str_Find_Utf8(filename, "seg_2", 5) >= 0 + ) { + ++num_old_seg_files; + } + } + TEST_INT_EQ(runner, num_old_seg_files, 0, + "all files from earlier segments zapped"); + DECREF(filenames); + } + + { + PolyReader *poly_reader = PolyReader_open((Obj*)folder, NULL, NULL); + VArray *seg_readers = PolyReader_Get_Seg_Readers(poly_reader); + SegReader *seg_reader = (SegReader*)VA_Fetch(seg_readers, 0); + + S_test_sort_cache(runner, folder, seg_reader, "3", true, name_str); + S_test_sort_cache(runner, folder, seg_reader, "3", true, speed_str); + S_test_sort_cache(runner, folder, seg_reader, "3", true, weight_str); + S_test_sort_cache(runner, folder, seg_reader, "3", true, home_str); + S_test_sort_cache(runner, folder, seg_reader, "3", true, cat_str); + S_test_sort_cache(runner, folder, seg_reader, "3", true, wheels_str); + + DECREF(poly_reader); + } + + DECREF(folder); + DECREF(schema); +} + +void +TestSortWriter_Run_IMP(TestSortWriter *self, TestBatchRunner *runner) { + TestBatchRunner_Plan(runner, (TestBatch*)self, 57); + + // Force frequent flushes. + SortWriter_set_default_mem_thresh(100); + + S_init_strings(); + test_sort_writer(runner); + S_destroy_strings(); +} + +NonMergingIndexManager* +NMIxManager_new() { + NonMergingIndexManager *self + = (NonMergingIndexManager*)VTable_Make_Obj(NONMERGINGINDEXMANAGER); + return NMIxManager_init(self); +} + +NonMergingIndexManager* +NMIxManager_init(NonMergingIndexManager *self) { + IxManager_init((IndexManager*)self, NULL, NULL); + return self; +} + +VArray* +NMIxManager_Recycle_IMP(NonMergingIndexManager *self, PolyReader *reader, + lucy_DeletionsWriter *del_writer, int64_t cutoff, + bool optimize) { + UNUSED_VAR(self); + UNUSED_VAR(reader); + UNUSED_VAR(del_writer); + UNUSED_VAR(cutoff); + UNUSED_VAR(optimize); + return VA_new(0); +} + + http://git-wip-us.apache.org/repos/asf/lucy/blob/be4e1833/core/Lucy/Test/Index/TestSortWriter.cfh ---------------------------------------------------------------------- diff --git a/core/Lucy/Test/Index/TestSortWriter.cfh b/core/Lucy/Test/Index/TestSortWriter.cfh new file mode 100644 index 0000000..98e3ec6 --- /dev/null +++ b/core/Lucy/Test/Index/TestSortWriter.cfh @@ -0,0 +1,43 @@ +/* Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +parcel TestLucy; + +class Lucy::Test::Index::TestSortWriter + inherits Clownfish::TestHarness::TestBatch { + + inert incremented TestSortWriter* + new(); + + void + Run(TestSortWriter *self, TestBatchRunner *runner); +} + +class Lucy::Test::Index::NonMergingIndexManager nickname NMIxManager + inherits Lucy::Index::IndexManager { + + public inert incremented NonMergingIndexManager* + new(); + + public inert NonMergingIndexManager* + init(NonMergingIndexManager *self); + + public incremented VArray* + Recycle(NonMergingIndexManager *self, PolyReader *reader, + DeletionsWriter *del_writer, int64_t cutoff, + bool optimize = false); +} + http://git-wip-us.apache.org/repos/asf/lucy/blob/be4e1833/perl/t/221-sort_writer.t ---------------------------------------------------------------------- diff --git a/perl/t/221-sort_writer.t b/perl/t/221-sort_writer.t deleted file mode 100644 index 45fb341..0000000 --- a/perl/t/221-sort_writer.t +++ /dev/null @@ -1,174 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -use strict; -use warnings; -use lib 'buildlib'; - -package NonMergingIndexManager; -use base qw( Lucy::Index::IndexManager ); - -sub recycle { - return Clownfish::VArray->new; -} - -package SortSchema; -use base qw( Lucy::Plan::Schema ); - -sub new { - my $self = shift->SUPER::new(@_); - my $fulltext_type = Lucy::Plan::FullTextType->new( - analyzer => Lucy::Analysis::StandardTokenizer->new, - sortable => 1, - ); - my $string_type = Lucy::Plan::StringType->new( sortable => 1 ); - my $unsortable = Lucy::Plan::StringType->new; - $self->spec_field( name => 'name', type => $fulltext_type ); - $self->spec_field( name => 'speed', type => $string_type ); - $self->spec_field( name => 'weight', type => $string_type ); - $self->spec_field( name => 'home', type => $string_type ); - $self->spec_field( name => 'cat', type => $string_type ); - $self->spec_field( name => 'wheels', type => $string_type ); - $self->spec_field( name => 'unused', type => $string_type ); - $self->spec_field( name => 'nope', type => $unsortable ); - return $self; -} - -package main; -use Lucy::Test; -use Test::More tests => 57; - -# Force frequent flushes. -Lucy::Index::SortWriter::set_default_mem_thresh(100); - -my $airplane = { - name => 'airplane', - speed => '0200', - weight => '8000', - home => 'air', - cat => 'vehicle', - wheels => 3, - nope => 'nyet', -}; -my $bike = { - name => 'bike', - speed => '0015', - weight => '0025', - home => 'land', - cat => 'vehicle', - wheels => 2, -}; -my $car = { - name => 'car', - speed => '0070', - weight => '3000', - home => 'land', - cat => 'vehicle', - wheels => 4, -}; -my $dirigible = { - name => 'dirigible', - speed => '0040', - weight => '0000', - home => 'air', - cat => 'vehicle', - # no "wheels" field -- test NULL/undef -}; -my $elephant = { - name => 'elephant', - speed => '0020', - weight => '6000', - home => 'land', - cat => 'vehicle', - # no "wheels" field -- test NULL/undef -}; - -my $folder = Lucy::Store::RAMFolder->new; -my $schema = SortSchema->new; -my $indexer = Lucy::Index::Indexer->new( - index => $folder, - schema => $schema, -); - -# Add vehicles. -$indexer->add_doc($_) for ( $airplane, $bike, $car ); - -$indexer->commit; - -my $polyreader = Lucy::Index::IndexReader->open( index => $folder ); -my $seg_reader = $polyreader->get_seg_readers->[0]; -my $sort_reader = $seg_reader->obtain("Lucy::Index::SortReader"); -my $doc_reader = $seg_reader->obtain("Lucy::Index::DocReader"); -my $segment = $seg_reader->get_segment; - -for my $field (qw( name speed weight home cat wheels )) { - my $field_num = $segment->field_num($field); - ok( $folder->exists("seg_1/sort-$field_num.ord"), - "sort files written for $field" ); - my $sort_cache = $sort_reader->fetch_sort_cache($field); - for ( 1 .. $seg_reader->doc_max ) { - is( $sort_cache->value( ord => $sort_cache->ordinal($_) ), - $doc_reader->fetch_doc($_)->{$field}, - "correct cached value doc $_ " - ); - } -} - -for my $field (qw( unused nope )) { - my $field_num = $segment->field_num($field); - ok( !$folder->exists("seg_1/sort-$field_num.ord"), - "no sort files written for $field" ); -} - -# Add a second segment. -$indexer = Lucy::Index::Indexer->new( - index => $folder, - schema => $schema, - manager => NonMergingIndexManager->new, -); -$indexer->add_doc($dirigible); -$indexer->commit; - -# Consolidate everything, to test merging. -$indexer = Lucy::Index::Indexer->new( - index => $folder, - schema => $schema, -); -$indexer->delete_by_term( field => 'name', term => 'bike' ); -$indexer->add_doc($elephant); -$indexer->optimize; -$indexer->commit; - -my $num_old_seg_files = scalar grep {m/seg_[12]/} @{ $folder->list_r }; -is( $num_old_seg_files, 0, "all files from earlier segments zapped" ); - -$polyreader = Lucy::Index::IndexReader->open( index => $folder ); -$seg_reader = $polyreader->get_seg_readers->[0]; -$sort_reader = $seg_reader->obtain("Lucy::Index::SortReader"); -$doc_reader = $seg_reader->obtain("Lucy::Index::DocReader"); -$segment = $seg_reader->get_segment; - -for my $field (qw( name speed weight home cat wheels )) { - my $field_num = $segment->field_num($field); - ok( $folder->exists("seg_3/sort-$field_num.ord"), - "sort files written for $field" ); - my $sort_cache = $sort_reader->fetch_sort_cache($field); - for ( 1 .. $seg_reader->doc_max ) { - is( $sort_cache->value( ord => $sort_cache->ordinal($_) ), - $doc_reader->fetch_doc($_)->{$field}, - "correct cached value field $field doc $_ " - ); - } -} http://git-wip-us.apache.org/repos/asf/lucy/blob/be4e1833/perl/t/core/224-sort_writer.t ---------------------------------------------------------------------- diff --git a/perl/t/core/224-sort_writer.t b/perl/t/core/224-sort_writer.t new file mode 100644 index 0000000..b987614 --- /dev/null +++ b/perl/t/core/224-sort_writer.t @@ -0,0 +1,23 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +use strict; +use warnings; + +use Lucy::Test; +my $success = Lucy::Test::run_tests("Lucy::Test::Index::TestSortWriter"); + +exit($success ? 0 : 1); +
