Port t/015-sort_external.t to C Original commit by Nick Wellnhofer, updated by Marvin Humphrey for changes to the SortExternal API.
Project: http://git-wip-us.apache.org/repos/asf/lucy/repo Commit: http://git-wip-us.apache.org/repos/asf/lucy/commit/73d6925e Tree: http://git-wip-us.apache.org/repos/asf/lucy/tree/73d6925e Diff: http://git-wip-us.apache.org/repos/asf/lucy/diff/73d6925e Branch: refs/heads/master Commit: 73d6925e8b6597a41939132feec7ebd2f8bf6f0c Parents: be4e183 Author: Nick Wellnhofer <[email protected]> Authored: Thu Oct 17 18:56:40 2013 +0200 Committer: Marvin Humphrey <[email protected]> Committed: Tue Jul 1 12:09:49 2014 -0700 ---------------------------------------------------------------------- core/Lucy/Test.c | 2 + core/Lucy/Test/Util/TestSortExternal.c | 287 ++++++++++++++++++++++++++ core/Lucy/Test/Util/TestSortExternal.cfh | 29 +++ core/Lucy/Util/BBSortEx.c | 20 ++ core/Lucy/Util/BBSortEx.cfh | 6 + perl/buildlib/Lucy/Build/Binding/Util.pm | 9 - perl/lib/Lucy/Util/BBSortEx.pm | 25 --- perl/t/core/015-sort_external.t | 23 +++ 8 files changed, 367 insertions(+), 34 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/lucy/blob/73d6925e/core/Lucy/Test.c ---------------------------------------------------------------------- diff --git a/core/Lucy/Test.c b/core/Lucy/Test.c index 62c9bdf..196ae6a 100644 --- a/core/Lucy/Test.c +++ b/core/Lucy/Test.c @@ -81,6 +81,7 @@ #include "Lucy/Test/Util/TestJson.h" #include "Lucy/Test/Util/TestMemoryPool.h" #include "Lucy/Test/Util/TestPriorityQueue.h" +#include "Lucy/Test/Util/TestSortExternal.h" TestSuite* Test_create_test_suite() { @@ -88,6 +89,7 @@ Test_create_test_suite() { TestSuite_Add_Batch(suite, (TestBatch*)TestPriQ_new()); TestSuite_Add_Batch(suite, (TestBatch*)TestBitVector_new()); + TestSuite_Add_Batch(suite, (TestBatch*)TestSortExternal_new()); TestSuite_Add_Batch(suite, (TestBatch*)TestMemPool_new()); TestSuite_Add_Batch(suite, (TestBatch*)TestIxFileNames_new()); TestSuite_Add_Batch(suite, (TestBatch*)TestJson_new()); http://git-wip-us.apache.org/repos/asf/lucy/blob/73d6925e/core/Lucy/Test/Util/TestSortExternal.c ---------------------------------------------------------------------- diff --git a/core/Lucy/Test/Util/TestSortExternal.c b/core/Lucy/Test/Util/TestSortExternal.c new file mode 100644 index 0000000..2e04c83 --- /dev/null +++ b/core/Lucy/Test/Util/TestSortExternal.c @@ -0,0 +1,287 @@ +/* Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <stdlib.h> +#include <string.h> +#include <time.h> + +#define TESTLUCY_USE_SHORT_NAMES +#include "Lucy/Util/ToolSet.h" +#include "Lucy/Test/Util/TestSortExternal.h" + +#include "Clownfish/TestHarness/TestBatchRunner.h" +#include "Lucy/Util/BBSortEx.h" +#include "Lucy/Util/SortExternal.h" + +static ByteBuf *a_bb; +static ByteBuf *b_bb; +static ByteBuf *c_bb; +static ByteBuf *d_bb; +static ByteBuf *x_bb; +static ByteBuf *y_bb; +static ByteBuf *z_bb; + +TestSortExternal* +TestSortExternal_new() { + return (TestSortExternal*)VTable_Make_Obj(TESTSORTEXTERNAL); +} + +static void +S_init_bytebufs() { + a_bb = BB_new_bytes("a", 1); + b_bb = BB_new_bytes("b", 1); + c_bb = BB_new_bytes("c", 1); + d_bb = BB_new_bytes("d", 1); + x_bb = BB_new_bytes("x", 1); + y_bb = BB_new_bytes("y", 1); + z_bb = BB_new_bytes("z", 1); +} + +static void +S_destroy_bytebufs() { + DECREF(a_bb); + DECREF(b_bb); + DECREF(c_bb); + DECREF(d_bb); + DECREF(x_bb); + DECREF(y_bb); + DECREF(z_bb); +} + +static void +test_bbsortex(TestBatchRunner *runner) { + BBSortEx *sortex = BBSortEx_new(4, NULL); + + BBSortEx_Feed(sortex, INCREF(c_bb)); + TEST_INT_EQ(runner, BBSortEx_Buffer_Count(sortex), 1, + "feed elem into cache"); + + BBSortEx_Feed(sortex, INCREF(b_bb)); + BBSortEx_Feed(sortex, INCREF(d_bb)); + BBSortEx_Sort_Buffer(sortex); + + { + VArray *cache = BBSortEx_Peek_Cache(sortex); + VArray *wanted = VA_new(3); + VA_Push(wanted, INCREF(b_bb)); + VA_Push(wanted, INCREF(c_bb)); + VA_Push(wanted, INCREF(d_bb)); + TEST_TRUE(runner, VA_Equals(cache, (Obj*)wanted), "sort cache"); + DECREF(wanted); + DECREF(cache); + } + + BBSortEx_Feed(sortex, INCREF(a_bb)); + TEST_INT_EQ(runner, BBSortEx_Buffer_Count(sortex), 0, + "cache flushed automatically when mem_thresh crossed"); + TEST_INT_EQ(runner, BBSortEx_Get_Num_Runs(sortex), 1, "run added"); + + VArray *external = VA_new(3); + VA_Push(external, INCREF(x_bb)); + VA_Push(external, INCREF(y_bb)); + VA_Push(external, INCREF(z_bb)); + BBSortEx *run = BBSortEx_new(0x1000000, external); + BBSortEx_Add_Run(sortex, (SortExternal*)run); + BBSortEx_Flip(sortex); + + { + VArray *got = VA_new(7); + Obj *object; + while (NULL != (object = BBSortEx_Fetch(sortex))) { + VA_Push(got, object); + } + + VArray *wanted = VA_new(7); + VA_Push(wanted, INCREF(a_bb)); + VA_Push(wanted, INCREF(b_bb)); + VA_Push(wanted, INCREF(c_bb)); + VA_Push(wanted, INCREF(d_bb)); + VA_Push(wanted, INCREF(x_bb)); + VA_Push(wanted, INCREF(y_bb)); + VA_Push(wanted, INCREF(z_bb)); + + TEST_TRUE(runner, VA_Equals(got, (Obj*)wanted), "Add_Run"); + + DECREF(wanted); + DECREF(got); + } + + DECREF(external); + DECREF(sortex); +} + +static void +test_clear_buffer(TestBatchRunner *runner) { + BBSortEx *sortex = BBSortEx_new(4, NULL); + + BBSortEx_Feed(sortex, INCREF(c_bb)); + BBSortEx_Clear_Buffer(sortex); + TEST_INT_EQ(runner, BBSortEx_Buffer_Count(sortex), 0, "Clear_Buffer"); + + BBSortEx_Feed(sortex, INCREF(b_bb)); + BBSortEx_Feed(sortex, INCREF(a_bb)); + BBSortEx_Flush(sortex); + BBSortEx_Flip(sortex); + Obj *object = BBSortEx_Peek(sortex); + TEST_TRUE(runner, BB_Equals(a_bb, object), "Peek"); + + VArray *got = VA_new(2); + while (NULL != (object = BBSortEx_Fetch(sortex))) { + VA_Push(got, object); + } + VArray *wanted = VA_new(2); + VA_Push(wanted, INCREF(a_bb)); + VA_Push(wanted, INCREF(b_bb)); + TEST_TRUE(runner, VA_Equals(got, (Obj*)wanted), + "elements cleared via Clear_Buffer truly cleared"); + + DECREF(wanted); + DECREF(got); + DECREF(sortex); +} + +static void +S_test_sort(TestBatchRunner *runner, VArray *bytebufs, uint32_t mem_thresh, + const char *test_name) { + uint32_t size = VA_Get_Size(bytebufs); + BBSortEx *sortex = BBSortEx_new(mem_thresh, NULL); + ByteBuf **shuffled = (ByteBuf**)MALLOCATE(size * sizeof(ByteBuf*)); + + for (int i = 0; i < size; ++i) { + shuffled[i] = (ByteBuf*)CERTIFY(VA_Fetch(bytebufs, i), BYTEBUF); + } + for (int i = size - 1; i > 0; --i) { + int shuffle_pos = rand() % (i + 1); + ByteBuf *temp = shuffled[shuffle_pos]; + shuffled[shuffle_pos] = shuffled[i]; + shuffled[i] = temp; + } + for (int i = 0; i < size; ++i) { + BBSortEx_Feed(sortex, INCREF(shuffled[i])); + } + + BBSortEx_Flip(sortex); + VArray *got = VA_new(size); + Obj *object; + while (NULL != (object = BBSortEx_Fetch(sortex))) { + VA_Push(got, object); + } + TEST_TRUE(runner, VA_Equals(got, (Obj*)bytebufs), test_name); + + FREEMEM(shuffled); + DECREF(got); + DECREF(sortex); +} + +static void +S_test_sort_letters(TestBatchRunner *runner, const char *letters, + uint32_t mem_thresh, const char *test_name) { + size_t num_letters = strlen(letters); + VArray *bytebufs = VA_new(num_letters); + + for (int i = 0; i < num_letters; ++i) { + char ch = letters[i]; + size_t size = ch == '_' ? 0 : 1; + ByteBuf *bytebuf = BB_new_bytes(&ch, size); + VA_Push(bytebufs, (Obj*)bytebuf); + } + + S_test_sort(runner, bytebufs, mem_thresh, test_name); + + DECREF(bytebufs); +} + +static void +test_sort_letters(TestBatchRunner *runner) { + S_test_sort_letters(runner, "abcdefghijklmnopqrstuvwxyz", 0x1000000, + "sort letters"); + S_test_sort_letters(runner, "aaabcdxxxxxxyy", 0x1000000, + "sort repeated letters"); + S_test_sort_letters(runner, "__abcdefghijklmnopqrstuvwxyz", 0x1000000, + "sort letters and empty strings"); + S_test_sort_letters(runner, "abcdefghijklmnopqrstuvwxyz", 30, + "... with an absurdly low mem_thresh"); + S_test_sort_letters(runner, "abcdefghijklmnopqrstuvwxyz", 1, + "... with an even lower mem_thresh"); +} + +static void +test_sort_nothing(TestBatchRunner *runner) { + BBSortEx *sortex = BBSortEx_new(0x1000000, NULL); + BBSortEx_Flip(sortex); + TEST_TRUE(runner, BBSortEx_Fetch(sortex) == NULL, + "Sorting nothing returns undef"); + DECREF(sortex); +} + +static void +test_sort_packed_ints(TestBatchRunner *runner) { + size_t num_ints = 11001; + VArray *bytebufs = VA_new(num_ints); + + for (uint32_t i = 0; i < num_ints; ++i) { + char buf[4]; + buf[0] = i >> 24; + buf[1] = i >> 16; + buf[2] = i >> 8; + buf[3] = i; + ByteBuf *bytebuf = BB_new_bytes(&buf, 4); + VA_Push(bytebufs, (Obj*)bytebuf); + } + + S_test_sort(runner, bytebufs, 5000, "Sorting packed integers..."); + + DECREF(bytebufs); +} + +static void +test_sort_random_strings(TestBatchRunner *runner) { + size_t num_strings = 1001; + VArray *bytebufs = VA_new(num_strings); + + for (uint32_t i = 0; i < num_strings; ++i) { + char buf[1201]; + int size = rand() % 1200 + 1; + for (int i = 0; i < size; ++i) { + buf[i] = rand(); + } + ByteBuf *bytebuf = BB_new_bytes(&buf, size); + VA_Push(bytebufs, (Obj*)bytebuf); + } + + VA_Sort(bytebufs, NULL, NULL); + S_test_sort(runner, bytebufs, 15000, + "Random binary strings of random length"); + + DECREF(bytebufs); +} + +void +TestSortExternal_Run_IMP(TestSortExternal *self, TestBatchRunner *runner) { + TestBatchRunner_Plan(runner, (TestBatch*)self, 16); + + srand((unsigned int)time((time_t*)NULL)); + S_init_bytebufs(); + test_bbsortex(runner); + test_clear_buffer(runner); + test_sort_letters(runner); + test_sort_nothing(runner); + test_sort_packed_ints(runner); + test_sort_random_strings(runner); + S_destroy_bytebufs(); +} + + http://git-wip-us.apache.org/repos/asf/lucy/blob/73d6925e/core/Lucy/Test/Util/TestSortExternal.cfh ---------------------------------------------------------------------- diff --git a/core/Lucy/Test/Util/TestSortExternal.cfh b/core/Lucy/Test/Util/TestSortExternal.cfh new file mode 100644 index 0000000..028b322 --- /dev/null +++ b/core/Lucy/Test/Util/TestSortExternal.cfh @@ -0,0 +1,29 @@ +/* Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +parcel TestLucy; + +class Lucy::Test::Util::TestSortExternal + inherits Clownfish::TestHarness::TestBatch { + + inert incremented TestSortExternal* + new(); + + void + Run(TestSortExternal *self, TestBatchRunner *runner); +} + + http://git-wip-us.apache.org/repos/asf/lucy/blob/73d6925e/core/Lucy/Util/BBSortEx.c ---------------------------------------------------------------------- diff --git a/core/Lucy/Util/BBSortEx.c b/core/Lucy/Util/BBSortEx.c index ff96c73..c9201bb 100644 --- a/core/Lucy/Util/BBSortEx.c +++ b/core/Lucy/Util/BBSortEx.c @@ -169,4 +169,24 @@ BBSortEx_Compare_IMP(BBSortEx *self, void *va, void *vb) { return BB_compare((ByteBuf**)va, (ByteBuf**)vb); } +VArray* +BBSortEx_Peek_Cache_IMP(BBSortEx *self) { + BBSortExIVARS *const ivars = BBSortEx_IVARS(self); + uint32_t count = ivars->buf_max - ivars->buf_tick; + Obj **buffer = ivars->buffer; + VArray *retval = VA_new(count); + + for (uint32_t i = ivars->buf_tick; i < ivars->buf_max; ++i) { + VA_Push(retval, INCREF(buffer[i])); + } + + return retval; +} + +uint32_t +BBSortEx_Get_Num_Runs_IMP(BBSortEx *self) { + BBSortExIVARS *const ivars = BBSortEx_IVARS(self); + return VA_Get_Size(ivars->runs); +} + http://git-wip-us.apache.org/repos/asf/lucy/blob/73d6925e/core/Lucy/Util/BBSortEx.cfh ---------------------------------------------------------------------- diff --git a/core/Lucy/Util/BBSortEx.cfh b/core/Lucy/Util/BBSortEx.cfh index 8ef64c8..6fd8656 100644 --- a/core/Lucy/Util/BBSortEx.cfh +++ b/core/Lucy/Util/BBSortEx.cfh @@ -51,6 +51,12 @@ class Lucy::Util::BBSortEx int Compare(BBSortEx *self, void *va, void *vb); + incremented VArray* + Peek_Cache(BBSortEx *self); + + uint32_t + Get_Num_Runs(BBSortEx *self); + public void Destroy(BBSortEx *self); } http://git-wip-us.apache.org/repos/asf/lucy/blob/73d6925e/perl/buildlib/Lucy/Build/Binding/Util.pm ---------------------------------------------------------------------- diff --git a/perl/buildlib/Lucy/Build/Binding/Util.pm b/perl/buildlib/Lucy/Build/Binding/Util.pm index 4ef7dbe..1438011 100644 --- a/perl/buildlib/Lucy/Build/Binding/Util.pm +++ b/perl/buildlib/Lucy/Build/Binding/Util.pm @@ -21,21 +21,12 @@ $VERSION = eval $VERSION; sub bind_all { my $class = shift; - $class->bind_bbsortex; $class->bind_debug; $class->bind_freezer; $class->bind_indexfilenames; $class->bind_sortexternal; } -sub bind_bbsortex { - my $binding = Clownfish::CFC::Binding::Perl::Class->new( - parcel => "Lucy", - class_name => "Lucy::Util::BBSortEx", - ); - Clownfish::CFC::Binding::Perl::Class->register($binding); -} - sub bind_debug { my $xs_code = <<'END_XS_CODE'; MODULE = Lucy PACKAGE = Lucy::Util::Debug http://git-wip-us.apache.org/repos/asf/lucy/blob/73d6925e/perl/lib/Lucy/Util/BBSortEx.pm ---------------------------------------------------------------------- diff --git a/perl/lib/Lucy/Util/BBSortEx.pm b/perl/lib/Lucy/Util/BBSortEx.pm deleted file mode 100644 index cbba688..0000000 --- a/perl/lib/Lucy/Util/BBSortEx.pm +++ /dev/null @@ -1,25 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -package Lucy::Util::BBSortEx; -use Lucy; -our $VERSION = '0.003000'; -$VERSION = eval $VERSION; - -1; - -__END__ - - http://git-wip-us.apache.org/repos/asf/lucy/blob/73d6925e/perl/t/core/015-sort_external.t ---------------------------------------------------------------------- diff --git a/perl/t/core/015-sort_external.t b/perl/t/core/015-sort_external.t new file mode 100644 index 0000000..6d966bb --- /dev/null +++ b/perl/t/core/015-sort_external.t @@ -0,0 +1,23 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +use strict; +use warnings; + +use Lucy::Test; +my $success = Lucy::Test::run_tests("Lucy::Test::Util::TestSortExternal"); + +exit($success ? 0 : 1); +
