Switch TestSortExternal over to Blob
Project: http://git-wip-us.apache.org/repos/asf/lucy/repo Commit: http://git-wip-us.apache.org/repos/asf/lucy/commit/2f7df540 Tree: http://git-wip-us.apache.org/repos/asf/lucy/tree/2f7df540 Diff: http://git-wip-us.apache.org/repos/asf/lucy/diff/2f7df540 Branch: refs/heads/master Commit: 2f7df540f27fae676082332612718f9da79bc7bb Parents: 32e5dc4 Author: Nick Wellnhofer <[email protected]> Authored: Mon May 4 17:51:53 2015 +0200 Committer: Nick Wellnhofer <[email protected]> Committed: Tue May 5 11:23:05 2015 +0200 ---------------------------------------------------------------------- core/Lucy/Test/Util/TestSortExternal.c | 215 ++++++++++++++-------------- core/Lucy/Util/BBSortEx.c | 200 -------------------------- core/Lucy/Util/BBSortEx.cfh | 67 --------- core/Lucy/Util/BlobSortEx.c | 201 ++++++++++++++++++++++++++ core/Lucy/Util/BlobSortEx.cfh | 67 +++++++++ perl/t/015-sort_external.t | 54 +++---- 6 files changed, 403 insertions(+), 401 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/lucy/blob/2f7df540/core/Lucy/Test/Util/TestSortExternal.c ---------------------------------------------------------------------- diff --git a/core/Lucy/Test/Util/TestSortExternal.c b/core/Lucy/Test/Util/TestSortExternal.c index 059ccca..e9bbba9 100644 --- a/core/Lucy/Test/Util/TestSortExternal.c +++ b/core/Lucy/Test/Util/TestSortExternal.c @@ -22,17 +22,18 @@ #include "Lucy/Util/ToolSet.h" #include "Lucy/Test/Util/TestSortExternal.h" +#include "Clownfish/Blob.h" #include "Clownfish/TestHarness/TestBatchRunner.h" -#include "Lucy/Util/BBSortEx.h" +#include "Lucy/Util/BlobSortEx.h" #include "Lucy/Util/SortExternal.h" -static ByteBuf *a_bb; -static ByteBuf *b_bb; -static ByteBuf *c_bb; -static ByteBuf *d_bb; -static ByteBuf *x_bb; -static ByteBuf *y_bb; -static ByteBuf *z_bb; +static Blob *a_blob; +static Blob *b_blob; +static Blob *c_blob; +static Blob *d_blob; +static Blob *x_blob; +static Blob *y_blob; +static Blob *z_blob; TestSortExternal* TestSortExternal_new() { @@ -40,78 +41,78 @@ TestSortExternal_new() { } static void -S_init_bytebufs() { - a_bb = BB_new_bytes("a", 1); - b_bb = BB_new_bytes("b", 1); - c_bb = BB_new_bytes("c", 1); - d_bb = BB_new_bytes("d", 1); - x_bb = BB_new_bytes("x", 1); - y_bb = BB_new_bytes("y", 1); - z_bb = BB_new_bytes("z", 1); +S_init_blobs() { + a_blob = Blob_new("a", 1); + b_blob = Blob_new("b", 1); + c_blob = Blob_new("c", 1); + d_blob = Blob_new("d", 1); + x_blob = Blob_new("x", 1); + y_blob = Blob_new("y", 1); + z_blob = Blob_new("z", 1); } static void -S_destroy_bytebufs() { - DECREF(a_bb); - DECREF(b_bb); - DECREF(c_bb); - DECREF(d_bb); - DECREF(x_bb); - DECREF(y_bb); - DECREF(z_bb); +S_destroy_blobs() { + DECREF(a_blob); + DECREF(b_blob); + DECREF(c_blob); + DECREF(d_blob); + DECREF(x_blob); + DECREF(y_blob); + DECREF(z_blob); } static void test_bbsortex(TestBatchRunner *runner) { - BBSortEx *sortex = BBSortEx_new(4, NULL); + BlobSortEx *sortex = BlobSortEx_new(4, NULL); - BBSortEx_Feed(sortex, INCREF(c_bb)); - TEST_INT_EQ(runner, BBSortEx_Buffer_Count(sortex), 1, + BlobSortEx_Feed(sortex, INCREF(c_blob)); + TEST_INT_EQ(runner, BlobSortEx_Buffer_Count(sortex), 1, "feed elem into cache"); - BBSortEx_Feed(sortex, INCREF(b_bb)); - BBSortEx_Feed(sortex, INCREF(d_bb)); - BBSortEx_Sort_Buffer(sortex); + BlobSortEx_Feed(sortex, INCREF(b_blob)); + BlobSortEx_Feed(sortex, INCREF(d_blob)); + BlobSortEx_Sort_Buffer(sortex); { - Vector *cache = BBSortEx_Peek_Cache(sortex); + Vector *cache = BlobSortEx_Peek_Cache(sortex); Vector *wanted = Vec_new(3); - Vec_Push(wanted, INCREF(b_bb)); - Vec_Push(wanted, INCREF(c_bb)); - Vec_Push(wanted, INCREF(d_bb)); + Vec_Push(wanted, INCREF(b_blob)); + Vec_Push(wanted, INCREF(c_blob)); + Vec_Push(wanted, INCREF(d_blob)); TEST_TRUE(runner, Vec_Equals(cache, (Obj*)wanted), "sort cache"); DECREF(wanted); DECREF(cache); } - BBSortEx_Feed(sortex, INCREF(a_bb)); - TEST_INT_EQ(runner, BBSortEx_Buffer_Count(sortex), 0, + BlobSortEx_Feed(sortex, INCREF(a_blob)); + TEST_INT_EQ(runner, BlobSortEx_Buffer_Count(sortex), 0, "cache flushed automatically when mem_thresh crossed"); - TEST_INT_EQ(runner, BBSortEx_Get_Num_Runs(sortex), 1, "run added"); + TEST_INT_EQ(runner, BlobSortEx_Get_Num_Runs(sortex), 1, "run added"); Vector *external = Vec_new(3); - Vec_Push(external, INCREF(x_bb)); - Vec_Push(external, INCREF(y_bb)); - Vec_Push(external, INCREF(z_bb)); - BBSortEx *run = BBSortEx_new(0x1000000, external); - BBSortEx_Add_Run(sortex, (SortExternal*)run); - BBSortEx_Flip(sortex); + Vec_Push(external, INCREF(x_blob)); + Vec_Push(external, INCREF(y_blob)); + Vec_Push(external, INCREF(z_blob)); + BlobSortEx *run = BlobSortEx_new(0x1000000, external); + BlobSortEx_Add_Run(sortex, (SortExternal*)run); + BlobSortEx_Flip(sortex); { Vector *got = Vec_new(7); Obj *object; - while (NULL != (object = BBSortEx_Fetch(sortex))) { + while (NULL != (object = BlobSortEx_Fetch(sortex))) { Vec_Push(got, object); } Vector *wanted = Vec_new(7); - Vec_Push(wanted, INCREF(a_bb)); - Vec_Push(wanted, INCREF(b_bb)); - Vec_Push(wanted, INCREF(c_bb)); - Vec_Push(wanted, INCREF(d_bb)); - Vec_Push(wanted, INCREF(x_bb)); - Vec_Push(wanted, INCREF(y_bb)); - Vec_Push(wanted, INCREF(z_bb)); + Vec_Push(wanted, INCREF(a_blob)); + Vec_Push(wanted, INCREF(b_blob)); + Vec_Push(wanted, INCREF(c_blob)); + Vec_Push(wanted, INCREF(d_blob)); + Vec_Push(wanted, INCREF(x_blob)); + Vec_Push(wanted, INCREF(y_blob)); + Vec_Push(wanted, INCREF(z_blob)); TEST_TRUE(runner, Vec_Equals(got, (Obj*)wanted), "Add_Run"); @@ -125,26 +126,26 @@ test_bbsortex(TestBatchRunner *runner) { static void test_clear_buffer(TestBatchRunner *runner) { - BBSortEx *sortex = BBSortEx_new(4, NULL); + BlobSortEx *sortex = BlobSortEx_new(4, NULL); - BBSortEx_Feed(sortex, INCREF(c_bb)); - BBSortEx_Clear_Buffer(sortex); - TEST_INT_EQ(runner, BBSortEx_Buffer_Count(sortex), 0, "Clear_Buffer"); + BlobSortEx_Feed(sortex, INCREF(c_blob)); + BlobSortEx_Clear_Buffer(sortex); + TEST_INT_EQ(runner, BlobSortEx_Buffer_Count(sortex), 0, "Clear_Buffer"); - BBSortEx_Feed(sortex, INCREF(b_bb)); - BBSortEx_Feed(sortex, INCREF(a_bb)); - BBSortEx_Flush(sortex); - BBSortEx_Flip(sortex); - Obj *object = BBSortEx_Peek(sortex); - TEST_TRUE(runner, BB_Equals(a_bb, object), "Peek"); + BlobSortEx_Feed(sortex, INCREF(b_blob)); + BlobSortEx_Feed(sortex, INCREF(a_blob)); + BlobSortEx_Flush(sortex); + BlobSortEx_Flip(sortex); + Obj *object = BlobSortEx_Peek(sortex); + TEST_TRUE(runner, Blob_Equals(a_blob, object), "Peek"); Vector *got = Vec_new(2); - while (NULL != (object = BBSortEx_Fetch(sortex))) { + while (NULL != (object = BlobSortEx_Fetch(sortex))) { Vec_Push(got, object); } Vector *wanted = Vec_new(2); - Vec_Push(wanted, INCREF(a_bb)); - Vec_Push(wanted, INCREF(b_bb)); + Vec_Push(wanted, INCREF(a_blob)); + Vec_Push(wanted, INCREF(b_blob)); TEST_TRUE(runner, Vec_Equals(got, (Obj*)wanted), "elements cleared via Clear_Buffer truly cleared"); @@ -154,32 +155,32 @@ test_clear_buffer(TestBatchRunner *runner) { } static void -S_test_sort(TestBatchRunner *runner, Vector *bytebufs, uint32_t mem_thresh, +S_test_sort(TestBatchRunner *runner, Vector *blobs, uint32_t mem_thresh, const char *test_name) { - int size = (int)Vec_Get_Size(bytebufs); - BBSortEx *sortex = BBSortEx_new(mem_thresh, NULL); - ByteBuf **shuffled = (ByteBuf**)MALLOCATE(size * sizeof(ByteBuf*)); + int size = (int)Vec_Get_Size(blobs); + BlobSortEx *sortex = BlobSortEx_new(mem_thresh, NULL); + Blob **shuffled = (Blob**)MALLOCATE(size * sizeof(Blob*)); for (int i = 0; i < size; ++i) { - shuffled[i] = (ByteBuf*)CERTIFY(Vec_Fetch(bytebufs, i), BYTEBUF); + shuffled[i] = (Blob*)CERTIFY(Vec_Fetch(blobs, i), BLOB); } for (int i = size - 1; i > 0; --i) { int shuffle_pos = rand() % (i + 1); - ByteBuf *temp = shuffled[shuffle_pos]; + Blob *temp = shuffled[shuffle_pos]; shuffled[shuffle_pos] = shuffled[i]; shuffled[i] = temp; } for (int i = 0; i < size; ++i) { - BBSortEx_Feed(sortex, INCREF(shuffled[i])); + BlobSortEx_Feed(sortex, INCREF(shuffled[i])); } - BBSortEx_Flip(sortex); + BlobSortEx_Flip(sortex); Vector *got = Vec_new(size); Obj *object; - while (NULL != (object = BBSortEx_Fetch(sortex))) { + while (NULL != (object = BlobSortEx_Fetch(sortex))) { Vec_Push(got, object); } - TEST_TRUE(runner, Vec_Equals(got, (Obj*)bytebufs), test_name); + TEST_TRUE(runner, Vec_Equals(got, (Obj*)blobs), test_name); FREEMEM(shuffled); DECREF(got); @@ -190,18 +191,18 @@ static void S_test_sort_letters(TestBatchRunner *runner, const char *letters, uint32_t mem_thresh, const char *test_name) { size_t num_letters = strlen(letters); - Vector *bytebufs = Vec_new(num_letters); + Vector *blobs = Vec_new(num_letters); for (size_t i = 0; i < num_letters; ++i) { char ch = letters[i]; size_t size = ch == '_' ? 0 : 1; - ByteBuf *bytebuf = BB_new_bytes(&ch, size); - Vec_Push(bytebufs, (Obj*)bytebuf); + Blob *blob = Blob_new(&ch, size); + Vec_Push(blobs, (Obj*)blob); } - S_test_sort(runner, bytebufs, mem_thresh, test_name); + S_test_sort(runner, blobs, mem_thresh, test_name); - DECREF(bytebufs); + DECREF(blobs); } static void @@ -220,9 +221,9 @@ test_sort_letters(TestBatchRunner *runner) { static void test_sort_nothing(TestBatchRunner *runner) { - BBSortEx *sortex = BBSortEx_new(0x1000000, NULL); - BBSortEx_Flip(sortex); - TEST_TRUE(runner, BBSortEx_Fetch(sortex) == NULL, + BlobSortEx *sortex = BlobSortEx_new(0x1000000, NULL); + BlobSortEx_Flip(sortex); + TEST_TRUE(runner, BlobSortEx_Fetch(sortex) == NULL, "Sorting nothing returns undef"); DECREF(sortex); } @@ -230,7 +231,7 @@ test_sort_nothing(TestBatchRunner *runner) { static void test_sort_packed_ints(TestBatchRunner *runner) { size_t num_ints = 11001; - Vector *bytebufs = Vec_new(num_ints); + Vector *blobs = Vec_new(num_ints); for (uint32_t i = 0; i < num_ints; ++i) { char buf[4]; @@ -238,19 +239,19 @@ test_sort_packed_ints(TestBatchRunner *runner) { buf[1] = i >> 16; buf[2] = i >> 8; buf[3] = i; - ByteBuf *bytebuf = BB_new_bytes(&buf, 4); - Vec_Push(bytebufs, (Obj*)bytebuf); + Blob *blob = Blob_new(buf, 4); + Vec_Push(blobs, (Obj*)blob); } - S_test_sort(runner, bytebufs, 5000, "Sorting packed integers..."); + S_test_sort(runner, blobs, 5000, "Sorting packed integers..."); - DECREF(bytebufs); + DECREF(blobs); } static void test_sort_random_strings(TestBatchRunner *runner) { size_t num_strings = 1001; - Vector *bytebufs = Vec_new(num_strings); + Vector *blobs = Vec_new(num_strings); for (uint32_t i = 0; i < num_strings; ++i) { char buf[1201]; @@ -258,15 +259,15 @@ test_sort_random_strings(TestBatchRunner *runner) { for (int i = 0; i < size; ++i) { buf[i] = rand(); } - ByteBuf *bytebuf = BB_new_bytes(&buf, size); - Vec_Push(bytebufs, (Obj*)bytebuf); + Blob *blob = Blob_new(buf, size); + Vec_Push(blobs, (Obj*)blob); } - Vec_Sort(bytebufs); - S_test_sort(runner, bytebufs, 15000, + Vec_Sort(blobs); + S_test_sort(runner, blobs, 15000, "Random binary strings of random length"); - DECREF(bytebufs); + DECREF(blobs); } static void @@ -274,27 +275,27 @@ test_run(TestBatchRunner *runner) { Vector *letters = Vec_new(26); for (int i = 0; i < 26; ++i) { char ch = 'a' + i; - ByteBuf *bytebuf = BB_new_bytes(&ch, 1); - Vec_Push(letters, (Obj*)bytebuf); + Blob *blob = Blob_new(&ch, 1); + Vec_Push(letters, (Obj*)blob); } - BBSortEx *run = BBSortEx_new(0x1000000, letters); - BBSortEx_Set_Mem_Thresh(run, 5); + BlobSortEx *run = BlobSortEx_new(0x1000000, letters); + BlobSortEx_Set_Mem_Thresh(run, 5); - BBSortEx_Refill(run); - TEST_INT_EQ(runner, BBSortEx_Buffer_Count(run), 5, + BlobSortEx_Refill(run); + TEST_INT_EQ(runner, BlobSortEx_Buffer_Count(run), 5, "Refill doesn't exceed memory threshold"); - Obj *endpost = BBSortEx_Peek_Last(run); - ByteBuf *wanted = BB_new_bytes("e", 1); - TEST_TRUE(runner, BB_Equals(wanted, endpost), "Peek_Last"); + Obj *endpost = BlobSortEx_Peek_Last(run); + Blob *wanted = Blob_new("e", 1); + TEST_TRUE(runner, Blob_Equals(wanted, endpost), "Peek_Last"); Vector *elems = Vec_new(26); do { - while (BBSortEx_Buffer_Count(run) > 0) { - Obj *object = BBSortEx_Fetch(run); + while (BlobSortEx_Buffer_Count(run) > 0) { + Obj *object = BlobSortEx_Fetch(run); Vec_Push(elems, object); } - } while (BBSortEx_Refill(run) > 0); + } while (BlobSortEx_Refill(run) > 0); TEST_TRUE(runner, Vec_Equals(elems, (Obj*)letters), "retrieve all elems"); DECREF(elems); @@ -309,7 +310,7 @@ TestSortExternal_Run_IMP(TestSortExternal *self, TestBatchRunner *runner) { TestBatchRunner_Plan(runner, (TestBatch*)self, 19); srand((unsigned int)time((time_t*)NULL)); - S_init_bytebufs(); + S_init_blobs(); test_bbsortex(runner); test_clear_buffer(runner); test_sort_letters(runner); @@ -317,7 +318,7 @@ TestSortExternal_Run_IMP(TestSortExternal *self, TestBatchRunner *runner) { test_sort_packed_ints(runner); test_sort_random_strings(runner); test_run(runner); - S_destroy_bytebufs(); + S_destroy_blobs(); } http://git-wip-us.apache.org/repos/asf/lucy/blob/2f7df540/core/Lucy/Util/BBSortEx.c ---------------------------------------------------------------------- diff --git a/core/Lucy/Util/BBSortEx.c b/core/Lucy/Util/BBSortEx.c deleted file mode 100644 index 5a9a3e0..0000000 --- a/core/Lucy/Util/BBSortEx.c +++ /dev/null @@ -1,200 +0,0 @@ -/* Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#define C_LUCY_BBSORTEX -#define LUCY_USE_SHORT_NAMES -#include "Lucy/Util/ToolSet.h" - -#include "Lucy/Util/BBSortEx.h" - -#include "Lucy/Index/Segment.h" -#include "Lucy/Store/InStream.h" -#include "Lucy/Store/Folder.h" -#include "Lucy/Store/OutStream.h" - -BBSortEx* -BBSortEx_new(uint32_t mem_threshold, Vector *external) { - BBSortEx *self = (BBSortEx*)Class_Make_Obj(BBSORTEX); - return BBSortEx_init(self, mem_threshold, external); -} - -BBSortEx* -BBSortEx_init(BBSortEx *self, uint32_t mem_threshold, Vector *external) { - SortEx_init((SortExternal*)self); - BBSortExIVARS *const ivars = BBSortEx_IVARS(self); - ivars->external_tick = 0; - ivars->external = (Vector*)INCREF(external); - ivars->mem_consumed = 0; - BBSortEx_Set_Mem_Thresh(self, mem_threshold); - return self; -} - -void -BBSortEx_Destroy_IMP(BBSortEx *self) { - BBSortExIVARS *const ivars = BBSortEx_IVARS(self); - DECREF(ivars->external); - SUPER_DESTROY(self, BBSORTEX); -} - -void -BBSortEx_Clear_Buffer_IMP(BBSortEx *self) { - BBSortExIVARS *const ivars = BBSortEx_IVARS(self); - ivars->mem_consumed = 0; - BBSortEx_Clear_Buffer_t super_clear_buffer - = SUPER_METHOD_PTR(BBSORTEX, LUCY_BBSortEx_Clear_Buffer); - super_clear_buffer(self); -} - -void -BBSortEx_Feed_IMP(BBSortEx *self, Obj *item) { - BBSortExIVARS *const ivars = BBSortEx_IVARS(self); - BBSortEx_Feed_t super_feed - = SUPER_METHOD_PTR(BBSORTEX, LUCY_BBSortEx_Feed); - super_feed(self, item); - - // Flush() if necessary. - ByteBuf *bytebuf = (ByteBuf*)CERTIFY(item, BYTEBUF); - ivars->mem_consumed += BB_Get_Size(bytebuf); - if (ivars->mem_consumed >= ivars->mem_thresh) { - BBSortEx_Flush(self); - } -} - -void -BBSortEx_Flush_IMP(BBSortEx *self) { - BBSortExIVARS *const ivars = BBSortEx_IVARS(self); - uint32_t buf_count = ivars->buf_max - ivars->buf_tick; - Obj **buffer = ivars->buffer; - Vector *elems; - - if (!buf_count) { return; } - else { elems = Vec_new(buf_count); } - - // Sort, then create a new run. - BBSortEx_Sort_Buffer(self); - for (uint32_t i = ivars->buf_tick; i < ivars->buf_max; i++) { - Vec_Push(elems, buffer[i]); - } - BBSortEx *run = BBSortEx_new(0, elems); - DECREF(elems); - BBSortEx_Add_Run(self, (SortExternal*)run); - - // Blank the buffer vars. - ivars->buf_tick += buf_count; - BBSortEx_Clear_Buffer(self); -} - -uint32_t -BBSortEx_Refill_IMP(BBSortEx *self) { - BBSortExIVARS *const ivars = BBSortEx_IVARS(self); - - // Make sure buffer is empty, then set buffer tick vars. - if (ivars->buf_max - ivars->buf_tick > 0) { - THROW(ERR, "Refill called but buffer contains %u32 items", - ivars->buf_max - ivars->buf_tick); - } - ivars->buf_tick = 0; - ivars->buf_max = 0; - - // Read in elements. - while (1) { - ByteBuf *elem = NULL; - - if (ivars->mem_consumed >= ivars->mem_thresh) { - ivars->mem_consumed = 0; - break; - } - else if (ivars->external_tick >= Vec_Get_Size(ivars->external)) { - break; - } - else { - elem = (ByteBuf*)Vec_Fetch(ivars->external, ivars->external_tick); - ivars->external_tick++; - // Should be + sizeof(ByteBuf), but that's ok. - ivars->mem_consumed += BB_Get_Size(elem); - } - - if (ivars->buf_max == ivars->buf_cap) { - BBSortEx_Grow_Buffer(self, - Memory_oversize(ivars->buf_max + 1, - sizeof(Obj*))); - } - ivars->buffer[ivars->buf_max++] = INCREF(elem); - } - - return ivars->buf_max; -} - -void -BBSortEx_Flip_IMP(BBSortEx *self) { - BBSortExIVARS *const ivars = BBSortEx_IVARS(self); - uint32_t run_mem_thresh = 65536; - - BBSortEx_Flush(self); - - // Recalculate the approximate mem allowed for each run. - uint32_t num_runs = Vec_Get_Size(ivars->runs); - if (num_runs) { - run_mem_thresh = (ivars->mem_thresh / 2) / num_runs; - if (run_mem_thresh < 65536) { - run_mem_thresh = 65536; - } - } - - for (uint32_t i = 0; i < num_runs; i++) { - BBSortEx *run = (BBSortEx*)Vec_Fetch(ivars->runs, i); - BBSortEx_Set_Mem_Thresh(run, run_mem_thresh); - } - - // OK to fetch now. - ivars->flipped = true; -} - -int -BBSortEx_Compare_IMP(BBSortEx *self, void *va, void *vb) { - UNUSED_VAR(self); - return BB_compare((ByteBuf**)va, (ByteBuf**)vb); -} - -Vector* -BBSortEx_Peek_Cache_IMP(BBSortEx *self) { - BBSortExIVARS *const ivars = BBSortEx_IVARS(self); - uint32_t count = ivars->buf_max - ivars->buf_tick; - Obj **buffer = ivars->buffer; - Vector *retval = Vec_new(count); - - for (uint32_t i = ivars->buf_tick; i < ivars->buf_max; ++i) { - Vec_Push(retval, INCREF(buffer[i])); - } - - return retval; -} - -Obj* -BBSortEx_Peek_Last_IMP(BBSortEx *self) { - BBSortExIVARS *const ivars = BBSortEx_IVARS(self); - uint32_t count = ivars->buf_max - ivars->buf_tick; - if (count == 0) { return NULL; } - return INCREF(ivars->buffer[count-1]); -} - -uint32_t -BBSortEx_Get_Num_Runs_IMP(BBSortEx *self) { - BBSortExIVARS *const ivars = BBSortEx_IVARS(self); - return Vec_Get_Size(ivars->runs); -} - - http://git-wip-us.apache.org/repos/asf/lucy/blob/2f7df540/core/Lucy/Util/BBSortEx.cfh ---------------------------------------------------------------------- diff --git a/core/Lucy/Util/BBSortEx.cfh b/core/Lucy/Util/BBSortEx.cfh deleted file mode 100644 index eaac4c6..0000000 --- a/core/Lucy/Util/BBSortEx.cfh +++ /dev/null @@ -1,67 +0,0 @@ -/* Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -parcel Lucy; - -/** SortExternal for ByteBufs. - */ - -class Lucy::Util::BBSortEx - inherits Lucy::Util::SortExternal { - - Vector *external; - uint32_t external_tick; - uint32_t mem_consumed; - - inert BBSortEx* - new(uint32_t mem_thresh = 0x1000000, Vector *external = NULL); - - inert BBSortEx* - init(BBSortEx *self, uint32_t mem_thresh = 0x1000000, - Vector *external = NULL); - - void - Feed(BBSortEx *self, decremented Obj *item); - - void - Flush(BBSortEx *self); - - uint32_t - Refill(BBSortEx *self); - - void - Clear_Buffer(BBSortEx *self); - - void - Flip(BBSortEx *self); - - int - Compare(BBSortEx *self, void *va, void *vb); - - incremented Vector* - Peek_Cache(BBSortEx *self); - - incremented nullable Obj* - Peek_Last(BBSortEx *self); - - uint32_t - Get_Num_Runs(BBSortEx *self); - - public void - Destroy(BBSortEx *self); -} - - http://git-wip-us.apache.org/repos/asf/lucy/blob/2f7df540/core/Lucy/Util/BlobSortEx.c ---------------------------------------------------------------------- diff --git a/core/Lucy/Util/BlobSortEx.c b/core/Lucy/Util/BlobSortEx.c new file mode 100644 index 0000000..6d41488 --- /dev/null +++ b/core/Lucy/Util/BlobSortEx.c @@ -0,0 +1,201 @@ +/* Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#define C_LUCY_BLOBSORTEX +#define LUCY_USE_SHORT_NAMES +#include "Lucy/Util/ToolSet.h" + +#include "Lucy/Util/BlobSortEx.h" + +#include "Clownfish/Blob.h" +#include "Lucy/Index/Segment.h" +#include "Lucy/Store/InStream.h" +#include "Lucy/Store/Folder.h" +#include "Lucy/Store/OutStream.h" + +BlobSortEx* +BlobSortEx_new(uint32_t mem_threshold, Vector *external) { + BlobSortEx *self = (BlobSortEx*)Class_Make_Obj(BLOBSORTEX); + return BlobSortEx_init(self, mem_threshold, external); +} + +BlobSortEx* +BlobSortEx_init(BlobSortEx *self, uint32_t mem_threshold, Vector *external) { + SortEx_init((SortExternal*)self); + BlobSortExIVARS *const ivars = BlobSortEx_IVARS(self); + ivars->external_tick = 0; + ivars->external = (Vector*)INCREF(external); + ivars->mem_consumed = 0; + BlobSortEx_Set_Mem_Thresh(self, mem_threshold); + return self; +} + +void +BlobSortEx_Destroy_IMP(BlobSortEx *self) { + BlobSortExIVARS *const ivars = BlobSortEx_IVARS(self); + DECREF(ivars->external); + SUPER_DESTROY(self, BLOBSORTEX); +} + +void +BlobSortEx_Clear_Buffer_IMP(BlobSortEx *self) { + BlobSortExIVARS *const ivars = BlobSortEx_IVARS(self); + ivars->mem_consumed = 0; + BlobSortEx_Clear_Buffer_t super_clear_buffer + = SUPER_METHOD_PTR(BLOBSORTEX, LUCY_BlobSortEx_Clear_Buffer); + super_clear_buffer(self); +} + +void +BlobSortEx_Feed_IMP(BlobSortEx *self, Obj *item) { + BlobSortExIVARS *const ivars = BlobSortEx_IVARS(self); + BlobSortEx_Feed_t super_feed + = SUPER_METHOD_PTR(BLOBSORTEX, LUCY_BlobSortEx_Feed); + super_feed(self, item); + + // Flush() if necessary. + Blob *blob = (Blob*)CERTIFY(item, BLOB); + ivars->mem_consumed += Blob_Get_Size(blob); + if (ivars->mem_consumed >= ivars->mem_thresh) { + BlobSortEx_Flush(self); + } +} + +void +BlobSortEx_Flush_IMP(BlobSortEx *self) { + BlobSortExIVARS *const ivars = BlobSortEx_IVARS(self); + uint32_t buf_count = ivars->buf_max - ivars->buf_tick; + Obj **buffer = ivars->buffer; + Vector *elems; + + if (!buf_count) { return; } + else { elems = Vec_new(buf_count); } + + // Sort, then create a new run. + BlobSortEx_Sort_Buffer(self); + for (uint32_t i = ivars->buf_tick; i < ivars->buf_max; i++) { + Vec_Push(elems, buffer[i]); + } + BlobSortEx *run = BlobSortEx_new(0, elems); + DECREF(elems); + BlobSortEx_Add_Run(self, (SortExternal*)run); + + // Blank the buffer vars. + ivars->buf_tick += buf_count; + BlobSortEx_Clear_Buffer(self); +} + +uint32_t +BlobSortEx_Refill_IMP(BlobSortEx *self) { + BlobSortExIVARS *const ivars = BlobSortEx_IVARS(self); + + // Make sure buffer is empty, then set buffer tick vars. + if (ivars->buf_max - ivars->buf_tick > 0) { + THROW(ERR, "Refill called but buffer contains %u32 items", + ivars->buf_max - ivars->buf_tick); + } + ivars->buf_tick = 0; + ivars->buf_max = 0; + + // Read in elements. + while (1) { + Blob *elem = NULL; + + if (ivars->mem_consumed >= ivars->mem_thresh) { + ivars->mem_consumed = 0; + break; + } + else if (ivars->external_tick >= Vec_Get_Size(ivars->external)) { + break; + } + else { + elem = (Blob*)Vec_Fetch(ivars->external, ivars->external_tick); + ivars->external_tick++; + // Should be + sizeof(Blob), but that's ok. + ivars->mem_consumed += Blob_Get_Size(elem); + } + + if (ivars->buf_max == ivars->buf_cap) { + BlobSortEx_Grow_Buffer(self, + Memory_oversize(ivars->buf_max + 1, + sizeof(Obj*))); + } + ivars->buffer[ivars->buf_max++] = INCREF(elem); + } + + return ivars->buf_max; +} + +void +BlobSortEx_Flip_IMP(BlobSortEx *self) { + BlobSortExIVARS *const ivars = BlobSortEx_IVARS(self); + uint32_t run_mem_thresh = 65536; + + BlobSortEx_Flush(self); + + // Recalculate the approximate mem allowed for each run. + uint32_t num_runs = Vec_Get_Size(ivars->runs); + if (num_runs) { + run_mem_thresh = (ivars->mem_thresh / 2) / num_runs; + if (run_mem_thresh < 65536) { + run_mem_thresh = 65536; + } + } + + for (uint32_t i = 0; i < num_runs; i++) { + BlobSortEx *run = (BlobSortEx*)Vec_Fetch(ivars->runs, i); + BlobSortEx_Set_Mem_Thresh(run, run_mem_thresh); + } + + // OK to fetch now. + ivars->flipped = true; +} + +int +BlobSortEx_Compare_IMP(BlobSortEx *self, void *va, void *vb) { + UNUSED_VAR(self); + return Blob_compare((Blob**)va, (Blob**)vb); +} + +Vector* +BlobSortEx_Peek_Cache_IMP(BlobSortEx *self) { + BlobSortExIVARS *const ivars = BlobSortEx_IVARS(self); + uint32_t count = ivars->buf_max - ivars->buf_tick; + Obj **buffer = ivars->buffer; + Vector *retval = Vec_new(count); + + for (uint32_t i = ivars->buf_tick; i < ivars->buf_max; ++i) { + Vec_Push(retval, INCREF(buffer[i])); + } + + return retval; +} + +Obj* +BlobSortEx_Peek_Last_IMP(BlobSortEx *self) { + BlobSortExIVARS *const ivars = BlobSortEx_IVARS(self); + uint32_t count = ivars->buf_max - ivars->buf_tick; + if (count == 0) { return NULL; } + return INCREF(ivars->buffer[count-1]); +} + +uint32_t +BlobSortEx_Get_Num_Runs_IMP(BlobSortEx *self) { + BlobSortExIVARS *const ivars = BlobSortEx_IVARS(self); + return Vec_Get_Size(ivars->runs); +} + + http://git-wip-us.apache.org/repos/asf/lucy/blob/2f7df540/core/Lucy/Util/BlobSortEx.cfh ---------------------------------------------------------------------- diff --git a/core/Lucy/Util/BlobSortEx.cfh b/core/Lucy/Util/BlobSortEx.cfh new file mode 100644 index 0000000..47cef86 --- /dev/null +++ b/core/Lucy/Util/BlobSortEx.cfh @@ -0,0 +1,67 @@ +/* Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +parcel Lucy; + +/** SortExternal for Blobs. + */ + +class Lucy::Util::BlobSortEx + inherits Lucy::Util::SortExternal { + + Vector *external; + uint32_t external_tick; + uint32_t mem_consumed; + + inert BlobSortEx* + new(uint32_t mem_thresh = 0x1000000, Vector *external = NULL); + + inert BlobSortEx* + init(BlobSortEx *self, uint32_t mem_thresh = 0x1000000, + Vector *external = NULL); + + void + Feed(BlobSortEx *self, decremented Obj *item); + + void + Flush(BlobSortEx *self); + + uint32_t + Refill(BlobSortEx *self); + + void + Clear_Buffer(BlobSortEx *self); + + void + Flip(BlobSortEx *self); + + int + Compare(BlobSortEx *self, void *va, void *vb); + + incremented Vector* + Peek_Cache(BlobSortEx *self); + + incremented nullable Obj* + Peek_Last(BlobSortEx *self); + + uint32_t + Get_Num_Runs(BlobSortEx *self); + + public void + Destroy(BlobSortEx *self); +} + + http://git-wip-us.apache.org/repos/asf/lucy/blob/2f7df540/perl/t/015-sort_external.t ---------------------------------------------------------------------- diff --git a/perl/t/015-sort_external.t b/perl/t/015-sort_external.t index b656abf..403dab0 100644 --- a/perl/t/015-sort_external.t +++ b/perl/t/015-sort_external.t @@ -23,12 +23,12 @@ use bytes qw(); my ( $sortex, $buffer, @orig, @sort_output ); -$sortex = Lucy::Util::BBSortEx->new( mem_thresh => 4 ); -$sortex->feed( new_bytebuf('c') ); +$sortex = Lucy::Util::BlobSortEx->new( mem_thresh => 4 ); +$sortex->feed( new_blob('c') ); is( $sortex->buffer_count, 1, "feed elem into buffer" ); -$sortex->feed( new_bytebuf('b') ); -$sortex->feed( new_bytebuf('d') ); +$sortex->feed( new_blob('b') ); +$sortex->feed( new_blob('d') ); $sortex->sort_buffer; SKIP: { skip( "Restore when porting test to C", 1 ); @@ -36,13 +36,13 @@ SKIP: { is_deeply( $buffer, [qw( b c d )], "sort buffer" ); } -$sortex->feed( new_bytebuf('a') ); +$sortex->feed( new_blob('a') ); is( $sortex->buffer_count, 0, "buffer flushed automatically when mem_thresh crossed" ); #is( $sortex->get_num_runs, 1, "run added" ); -my @bytebufs = map { new_bytebuf($_) } qw( x y z ); -my $run = Lucy::Util::BBSortEx->new( external => \@bytebufs ); +my @blobs = map { new_blob($_) } qw( x y z ); +my $run = Lucy::Util::BlobSortEx->new( external => \@blobs ); $sortex->add_run($run); $sortex->flip; @orig = qw( a b c d x y z ); @@ -53,12 +53,12 @@ is_deeply( \@sort_output, \@orig, "Add_Run" ); @orig = (); @sort_output = (); -$sortex = Lucy::Util::BBSortEx->new( mem_thresh => 4 ); -$sortex->feed( new_bytebuf('c') ); +$sortex = Lucy::Util::BlobSortEx->new( mem_thresh => 4 ); +$sortex->feed( new_blob('c') ); $sortex->clear_buffer; is( $sortex->buffer_count, 0, "Clear_Buffer" ); -$sortex->feed( new_bytebuf('b') ); -$sortex->feed( new_bytebuf('a') ); +$sortex->feed( new_blob('b') ); +$sortex->feed( new_blob('a') ); $sortex->flush; $sortex->flip; @orig = qw( a b ); @@ -72,9 +72,9 @@ is_deeply( \@sort_output, \@orig, @orig = (); @sort_output = (); -$sortex = Lucy::Util::BBSortEx->new; +$sortex = Lucy::Util::BlobSortEx->new; @orig = ( 'a' .. 'z' ); -$sortex->feed( new_bytebuf($_) ) for shuffle(@orig); +$sortex->feed( new_blob($_) ) for shuffle(@orig); $sortex->flip; while ( defined( my $result = $sortex->fetch ) ) { push @sort_output, $result; @@ -83,9 +83,9 @@ is_deeply( \@sort_output, \@orig, "sort letters" ); @orig = (); @sort_output = (); -$sortex = Lucy::Util::BBSortEx->new; +$sortex = Lucy::Util::BlobSortEx->new; @orig = qw( a a a b c d x x x x x x y y ); -$sortex->feed( new_bytebuf($_) ) for shuffle(@orig); +$sortex->feed( new_blob($_) ) for shuffle(@orig); $sortex->flip; while ( defined( my $result = $sortex->fetch ) ) { push @sort_output, $result; @@ -94,9 +94,9 @@ is_deeply( \@sort_output, \@orig, "sort repeated letters" ); @orig = (); @sort_output = (); -$sortex = Lucy::Util::BBSortEx->new; +$sortex = Lucy::Util::BlobSortEx->new; @orig = ( '', '', 'a' .. 'z' ); -$sortex->feed( new_bytebuf($_) ) for shuffle(@orig); +$sortex->feed( new_blob($_) ) for shuffle(@orig); $sortex->flip; while ( defined( my $result = $sortex->fetch ) ) { push @sort_output, $result; @@ -105,9 +105,9 @@ is_deeply( \@sort_output, \@orig, "sort letters and empty strings" ); @orig = (); @sort_output = (); -$sortex = Lucy::Util::BBSortEx->new( mem_thresh => 30 ); +$sortex = Lucy::Util::BlobSortEx->new( mem_thresh => 30 ); @orig = 'a' .. 'z'; -$sortex->feed( new_bytebuf($_) ) for shuffle(@orig); +$sortex->feed( new_blob($_) ) for shuffle(@orig); $sortex->flip; while ( defined( my $result = $sortex->fetch ) ) { push @sort_output, $result; @@ -116,9 +116,9 @@ is_deeply( \@sort_output, \@orig, "... with an absurdly low mem_thresh" ); @orig = (); @sort_output = (); -$sortex = Lucy::Util::BBSortEx->new( mem_thresh => 1 ); +$sortex = Lucy::Util::BlobSortEx->new( mem_thresh => 1 ); @orig = 'a' .. 'z'; -$sortex->feed( new_bytebuf($_) ) for shuffle(@orig); +$sortex->feed( new_blob($_) ) for shuffle(@orig); $sortex->flip; while ( defined( my $result = $sortex->fetch ) ) { push @sort_output, $result; @@ -127,15 +127,15 @@ is_deeply( \@sort_output, \@orig, "... with an even lower mem_thresh" ); @orig = (); @sort_output = (); -$sortex = Lucy::Util::BBSortEx->new; +$sortex = Lucy::Util::BlobSortEx->new; $sortex->flip; @sort_output = $sortex->fetch; is_deeply( \@sort_output, [undef], "Sorting nothing returns undef" ); @sort_output = (); -$sortex = Lucy::Util::BBSortEx->new( mem_thresh => 5_000 ); +$sortex = Lucy::Util::BlobSortEx->new( mem_thresh => 5_000 ); @orig = map { pack( 'N', $_ ) } ( 0 .. 11_000 ); -$sortex->feed( new_bytebuf($_) ) for shuffle(@orig); +$sortex->feed( new_blob($_) ) for shuffle(@orig); $sortex->flip; while ( defined( my $item = $sortex->fetch ) ) { push @sort_output, $item; @@ -143,7 +143,7 @@ while ( defined( my $item = $sortex->fetch ) ) { is_deeply( \@sort_output, \@orig, "Sorting packed integers..." ); @sort_output = (); -$sortex = Lucy::Util::BBSortEx->new( mem_thresh => 15_000 ); +$sortex = Lucy::Util::BlobSortEx->new( mem_thresh => 15_000 ); @orig = (); for my $iter ( 0 .. 1_000 ) { my $string = ''; @@ -152,7 +152,7 @@ for my $iter ( 0 .. 1_000 ) { } push @orig, $string; } -$sortex->feed( new_bytebuf($_) ) for shuffle(@orig); +$sortex->feed( new_blob($_) ) for shuffle(@orig); @orig = sort @orig; $sortex->flip; while ( defined( my $item = $sortex->fetch ) ) { @@ -161,5 +161,5 @@ while ( defined( my $item = $sortex->fetch ) ) { is_deeply( \@sort_output, \@orig, "Random binary strings of random length" ); @sort_output = (); -sub new_bytebuf { Clownfish::ByteBuf->new(shift) } +sub new_blob { Clownfish::Blob->new(shift) }
