Switch TestSortExternal over to Blob

Project: http://git-wip-us.apache.org/repos/asf/lucy/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucy/commit/2f7df540
Tree: http://git-wip-us.apache.org/repos/asf/lucy/tree/2f7df540
Diff: http://git-wip-us.apache.org/repos/asf/lucy/diff/2f7df540

Branch: refs/heads/master
Commit: 2f7df540f27fae676082332612718f9da79bc7bb
Parents: 32e5dc4
Author: Nick Wellnhofer <[email protected]>
Authored: Mon May 4 17:51:53 2015 +0200
Committer: Nick Wellnhofer <[email protected]>
Committed: Tue May 5 11:23:05 2015 +0200

----------------------------------------------------------------------
 core/Lucy/Test/Util/TestSortExternal.c | 215 ++++++++++++++--------------
 core/Lucy/Util/BBSortEx.c              | 200 --------------------------
 core/Lucy/Util/BBSortEx.cfh            |  67 ---------
 core/Lucy/Util/BlobSortEx.c            | 201 ++++++++++++++++++++++++++
 core/Lucy/Util/BlobSortEx.cfh          |  67 +++++++++
 perl/t/015-sort_external.t             |  54 +++----
 6 files changed, 403 insertions(+), 401 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucy/blob/2f7df540/core/Lucy/Test/Util/TestSortExternal.c
----------------------------------------------------------------------
diff --git a/core/Lucy/Test/Util/TestSortExternal.c 
b/core/Lucy/Test/Util/TestSortExternal.c
index 059ccca..e9bbba9 100644
--- a/core/Lucy/Test/Util/TestSortExternal.c
+++ b/core/Lucy/Test/Util/TestSortExternal.c
@@ -22,17 +22,18 @@
 #include "Lucy/Util/ToolSet.h"
 #include "Lucy/Test/Util/TestSortExternal.h"
 
+#include "Clownfish/Blob.h"
 #include "Clownfish/TestHarness/TestBatchRunner.h"
-#include "Lucy/Util/BBSortEx.h"
+#include "Lucy/Util/BlobSortEx.h"
 #include "Lucy/Util/SortExternal.h"
 
-static ByteBuf *a_bb;
-static ByteBuf *b_bb;
-static ByteBuf *c_bb;
-static ByteBuf *d_bb;
-static ByteBuf *x_bb;
-static ByteBuf *y_bb;
-static ByteBuf *z_bb;
+static Blob *a_blob;
+static Blob *b_blob;
+static Blob *c_blob;
+static Blob *d_blob;
+static Blob *x_blob;
+static Blob *y_blob;
+static Blob *z_blob;
 
 TestSortExternal*
 TestSortExternal_new() {
@@ -40,78 +41,78 @@ TestSortExternal_new() {
 }
 
 static void
-S_init_bytebufs() {
-    a_bb = BB_new_bytes("a", 1);
-    b_bb = BB_new_bytes("b", 1);
-    c_bb = BB_new_bytes("c", 1);
-    d_bb = BB_new_bytes("d", 1);
-    x_bb = BB_new_bytes("x", 1);
-    y_bb = BB_new_bytes("y", 1);
-    z_bb = BB_new_bytes("z", 1);
+S_init_blobs() {
+    a_blob = Blob_new("a", 1);
+    b_blob = Blob_new("b", 1);
+    c_blob = Blob_new("c", 1);
+    d_blob = Blob_new("d", 1);
+    x_blob = Blob_new("x", 1);
+    y_blob = Blob_new("y", 1);
+    z_blob = Blob_new("z", 1);
 }
 
 static void
-S_destroy_bytebufs() {
-    DECREF(a_bb);
-    DECREF(b_bb);
-    DECREF(c_bb);
-    DECREF(d_bb);
-    DECREF(x_bb);
-    DECREF(y_bb);
-    DECREF(z_bb);
+S_destroy_blobs() {
+    DECREF(a_blob);
+    DECREF(b_blob);
+    DECREF(c_blob);
+    DECREF(d_blob);
+    DECREF(x_blob);
+    DECREF(y_blob);
+    DECREF(z_blob);
 }
 
 static void
 test_bbsortex(TestBatchRunner *runner) {
-    BBSortEx *sortex = BBSortEx_new(4, NULL);
+    BlobSortEx *sortex = BlobSortEx_new(4, NULL);
 
-    BBSortEx_Feed(sortex, INCREF(c_bb));
-    TEST_INT_EQ(runner, BBSortEx_Buffer_Count(sortex), 1,
+    BlobSortEx_Feed(sortex, INCREF(c_blob));
+    TEST_INT_EQ(runner, BlobSortEx_Buffer_Count(sortex), 1,
                 "feed elem into cache");
 
-    BBSortEx_Feed(sortex, INCREF(b_bb));
-    BBSortEx_Feed(sortex, INCREF(d_bb));
-    BBSortEx_Sort_Buffer(sortex);
+    BlobSortEx_Feed(sortex, INCREF(b_blob));
+    BlobSortEx_Feed(sortex, INCREF(d_blob));
+    BlobSortEx_Sort_Buffer(sortex);
 
     {
-        Vector *cache  = BBSortEx_Peek_Cache(sortex);
+        Vector *cache  = BlobSortEx_Peek_Cache(sortex);
         Vector *wanted = Vec_new(3);
-        Vec_Push(wanted, INCREF(b_bb));
-        Vec_Push(wanted, INCREF(c_bb));
-        Vec_Push(wanted, INCREF(d_bb));
+        Vec_Push(wanted, INCREF(b_blob));
+        Vec_Push(wanted, INCREF(c_blob));
+        Vec_Push(wanted, INCREF(d_blob));
         TEST_TRUE(runner, Vec_Equals(cache, (Obj*)wanted), "sort cache");
         DECREF(wanted);
         DECREF(cache);
     }
 
-    BBSortEx_Feed(sortex, INCREF(a_bb));
-    TEST_INT_EQ(runner, BBSortEx_Buffer_Count(sortex), 0,
+    BlobSortEx_Feed(sortex, INCREF(a_blob));
+    TEST_INT_EQ(runner, BlobSortEx_Buffer_Count(sortex), 0,
                 "cache flushed automatically when mem_thresh crossed");
-    TEST_INT_EQ(runner, BBSortEx_Get_Num_Runs(sortex), 1, "run added");
+    TEST_INT_EQ(runner, BlobSortEx_Get_Num_Runs(sortex), 1, "run added");
 
     Vector *external = Vec_new(3);
-    Vec_Push(external, INCREF(x_bb));
-    Vec_Push(external, INCREF(y_bb));
-    Vec_Push(external, INCREF(z_bb));
-    BBSortEx *run = BBSortEx_new(0x1000000, external);
-    BBSortEx_Add_Run(sortex, (SortExternal*)run);
-    BBSortEx_Flip(sortex);
+    Vec_Push(external, INCREF(x_blob));
+    Vec_Push(external, INCREF(y_blob));
+    Vec_Push(external, INCREF(z_blob));
+    BlobSortEx *run = BlobSortEx_new(0x1000000, external);
+    BlobSortEx_Add_Run(sortex, (SortExternal*)run);
+    BlobSortEx_Flip(sortex);
 
     {
         Vector *got = Vec_new(7);
         Obj *object;
-        while (NULL != (object = BBSortEx_Fetch(sortex))) {
+        while (NULL != (object = BlobSortEx_Fetch(sortex))) {
             Vec_Push(got, object);
         }
 
         Vector *wanted = Vec_new(7);
-        Vec_Push(wanted, INCREF(a_bb));
-        Vec_Push(wanted, INCREF(b_bb));
-        Vec_Push(wanted, INCREF(c_bb));
-        Vec_Push(wanted, INCREF(d_bb));
-        Vec_Push(wanted, INCREF(x_bb));
-        Vec_Push(wanted, INCREF(y_bb));
-        Vec_Push(wanted, INCREF(z_bb));
+        Vec_Push(wanted, INCREF(a_blob));
+        Vec_Push(wanted, INCREF(b_blob));
+        Vec_Push(wanted, INCREF(c_blob));
+        Vec_Push(wanted, INCREF(d_blob));
+        Vec_Push(wanted, INCREF(x_blob));
+        Vec_Push(wanted, INCREF(y_blob));
+        Vec_Push(wanted, INCREF(z_blob));
 
         TEST_TRUE(runner, Vec_Equals(got, (Obj*)wanted), "Add_Run");
 
@@ -125,26 +126,26 @@ test_bbsortex(TestBatchRunner *runner) {
 
 static void
 test_clear_buffer(TestBatchRunner *runner) {
-    BBSortEx *sortex = BBSortEx_new(4, NULL);
+    BlobSortEx *sortex = BlobSortEx_new(4, NULL);
 
-    BBSortEx_Feed(sortex, INCREF(c_bb));
-    BBSortEx_Clear_Buffer(sortex);
-    TEST_INT_EQ(runner, BBSortEx_Buffer_Count(sortex), 0, "Clear_Buffer");
+    BlobSortEx_Feed(sortex, INCREF(c_blob));
+    BlobSortEx_Clear_Buffer(sortex);
+    TEST_INT_EQ(runner, BlobSortEx_Buffer_Count(sortex), 0, "Clear_Buffer");
 
-    BBSortEx_Feed(sortex, INCREF(b_bb));
-    BBSortEx_Feed(sortex, INCREF(a_bb));
-    BBSortEx_Flush(sortex);
-    BBSortEx_Flip(sortex);
-    Obj *object = BBSortEx_Peek(sortex);
-    TEST_TRUE(runner, BB_Equals(a_bb, object), "Peek");
+    BlobSortEx_Feed(sortex, INCREF(b_blob));
+    BlobSortEx_Feed(sortex, INCREF(a_blob));
+    BlobSortEx_Flush(sortex);
+    BlobSortEx_Flip(sortex);
+    Obj *object = BlobSortEx_Peek(sortex);
+    TEST_TRUE(runner, Blob_Equals(a_blob, object), "Peek");
 
     Vector *got = Vec_new(2);
-    while (NULL != (object = BBSortEx_Fetch(sortex))) {
+    while (NULL != (object = BlobSortEx_Fetch(sortex))) {
         Vec_Push(got, object);
     }
     Vector *wanted = Vec_new(2);
-    Vec_Push(wanted, INCREF(a_bb));
-    Vec_Push(wanted, INCREF(b_bb));
+    Vec_Push(wanted, INCREF(a_blob));
+    Vec_Push(wanted, INCREF(b_blob));
     TEST_TRUE(runner, Vec_Equals(got, (Obj*)wanted),
               "elements cleared via Clear_Buffer truly cleared");
 
@@ -154,32 +155,32 @@ test_clear_buffer(TestBatchRunner *runner) {
 }
 
 static void
-S_test_sort(TestBatchRunner *runner, Vector *bytebufs, uint32_t mem_thresh,
+S_test_sort(TestBatchRunner *runner, Vector *blobs, uint32_t mem_thresh,
             const char *test_name) {
-    int        size     = (int)Vec_Get_Size(bytebufs);
-    BBSortEx  *sortex   = BBSortEx_new(mem_thresh, NULL);
-    ByteBuf  **shuffled = (ByteBuf**)MALLOCATE(size * sizeof(ByteBuf*));
+    int          size     = (int)Vec_Get_Size(blobs);
+    BlobSortEx  *sortex   = BlobSortEx_new(mem_thresh, NULL);
+    Blob       **shuffled = (Blob**)MALLOCATE(size * sizeof(Blob*));
 
     for (int i = 0; i < size; ++i) {
-        shuffled[i] = (ByteBuf*)CERTIFY(Vec_Fetch(bytebufs, i), BYTEBUF);
+        shuffled[i] = (Blob*)CERTIFY(Vec_Fetch(blobs, i), BLOB);
     }
     for (int i = size - 1; i > 0; --i) {
         int shuffle_pos = rand() % (i + 1);
-        ByteBuf *temp = shuffled[shuffle_pos];
+        Blob *temp = shuffled[shuffle_pos];
         shuffled[shuffle_pos] = shuffled[i];
         shuffled[i] = temp;
     }
     for (int i = 0; i < size; ++i) {
-        BBSortEx_Feed(sortex, INCREF(shuffled[i]));
+        BlobSortEx_Feed(sortex, INCREF(shuffled[i]));
     }
 
-    BBSortEx_Flip(sortex);
+    BlobSortEx_Flip(sortex);
     Vector *got = Vec_new(size);
     Obj *object;
-    while (NULL != (object = BBSortEx_Fetch(sortex))) {
+    while (NULL != (object = BlobSortEx_Fetch(sortex))) {
         Vec_Push(got, object);
     }
-    TEST_TRUE(runner, Vec_Equals(got, (Obj*)bytebufs), test_name);
+    TEST_TRUE(runner, Vec_Equals(got, (Obj*)blobs), test_name);
 
     FREEMEM(shuffled);
     DECREF(got);
@@ -190,18 +191,18 @@ static void
 S_test_sort_letters(TestBatchRunner *runner, const char *letters,
                     uint32_t mem_thresh, const char *test_name) {
     size_t  num_letters = strlen(letters);
-    Vector *bytebufs    = Vec_new(num_letters);
+    Vector *blobs       = Vec_new(num_letters);
 
     for (size_t i = 0; i < num_letters; ++i) {
         char ch = letters[i];
         size_t size = ch == '_' ? 0 : 1;
-        ByteBuf *bytebuf = BB_new_bytes(&ch, size);
-        Vec_Push(bytebufs, (Obj*)bytebuf);
+        Blob *blob = Blob_new(&ch, size);
+        Vec_Push(blobs, (Obj*)blob);
     }
 
-    S_test_sort(runner, bytebufs, mem_thresh, test_name);
+    S_test_sort(runner, blobs, mem_thresh, test_name);
 
-    DECREF(bytebufs);
+    DECREF(blobs);
 }
 
 static void
@@ -220,9 +221,9 @@ test_sort_letters(TestBatchRunner *runner) {
 
 static void
 test_sort_nothing(TestBatchRunner *runner) {
-    BBSortEx *sortex = BBSortEx_new(0x1000000, NULL);
-    BBSortEx_Flip(sortex);
-    TEST_TRUE(runner, BBSortEx_Fetch(sortex) == NULL,
+    BlobSortEx *sortex = BlobSortEx_new(0x1000000, NULL);
+    BlobSortEx_Flip(sortex);
+    TEST_TRUE(runner, BlobSortEx_Fetch(sortex) == NULL,
               "Sorting nothing returns undef");
     DECREF(sortex);
 }
@@ -230,7 +231,7 @@ test_sort_nothing(TestBatchRunner *runner) {
 static void
 test_sort_packed_ints(TestBatchRunner *runner) {
     size_t  num_ints = 11001;
-    Vector *bytebufs = Vec_new(num_ints);
+    Vector *blobs    = Vec_new(num_ints);
 
     for (uint32_t i = 0; i < num_ints; ++i) {
         char buf[4];
@@ -238,19 +239,19 @@ test_sort_packed_ints(TestBatchRunner *runner) {
         buf[1] = i >> 16;
         buf[2] = i >> 8;
         buf[3] = i;
-        ByteBuf *bytebuf = BB_new_bytes(&buf, 4);
-        Vec_Push(bytebufs, (Obj*)bytebuf);
+        Blob *blob = Blob_new(buf, 4);
+        Vec_Push(blobs, (Obj*)blob);
     }
 
-    S_test_sort(runner, bytebufs, 5000, "Sorting packed integers...");
+    S_test_sort(runner, blobs, 5000, "Sorting packed integers...");
 
-    DECREF(bytebufs);
+    DECREF(blobs);
 }
 
 static void
 test_sort_random_strings(TestBatchRunner *runner) {
     size_t  num_strings = 1001;
-    Vector *bytebufs    = Vec_new(num_strings);
+    Vector *blobs       = Vec_new(num_strings);
 
     for (uint32_t i = 0; i < num_strings; ++i) {
         char buf[1201];
@@ -258,15 +259,15 @@ test_sort_random_strings(TestBatchRunner *runner) {
         for (int i = 0; i < size; ++i) {
             buf[i] = rand();
         }
-        ByteBuf *bytebuf = BB_new_bytes(&buf, size);
-        Vec_Push(bytebufs, (Obj*)bytebuf);
+        Blob *blob = Blob_new(buf, size);
+        Vec_Push(blobs, (Obj*)blob);
     }
 
-    Vec_Sort(bytebufs);
-    S_test_sort(runner, bytebufs, 15000,
+    Vec_Sort(blobs);
+    S_test_sort(runner, blobs, 15000,
                 "Random binary strings of random length");
 
-    DECREF(bytebufs);
+    DECREF(blobs);
 }
 
 static void
@@ -274,27 +275,27 @@ test_run(TestBatchRunner *runner) {
     Vector *letters = Vec_new(26);
     for (int i = 0; i < 26; ++i) {
         char ch = 'a' + i;
-        ByteBuf *bytebuf = BB_new_bytes(&ch, 1);
-        Vec_Push(letters, (Obj*)bytebuf);
+        Blob *blob = Blob_new(&ch, 1);
+        Vec_Push(letters, (Obj*)blob);
     }
-    BBSortEx *run = BBSortEx_new(0x1000000, letters);
-    BBSortEx_Set_Mem_Thresh(run, 5);
+    BlobSortEx *run = BlobSortEx_new(0x1000000, letters);
+    BlobSortEx_Set_Mem_Thresh(run, 5);
 
-    BBSortEx_Refill(run);
-    TEST_INT_EQ(runner, BBSortEx_Buffer_Count(run), 5,
+    BlobSortEx_Refill(run);
+    TEST_INT_EQ(runner, BlobSortEx_Buffer_Count(run), 5,
                 "Refill doesn't exceed memory threshold");
 
-    Obj *endpost = BBSortEx_Peek_Last(run);
-    ByteBuf *wanted = BB_new_bytes("e", 1);
-    TEST_TRUE(runner, BB_Equals(wanted, endpost), "Peek_Last");
+    Obj *endpost = BlobSortEx_Peek_Last(run);
+    Blob *wanted = Blob_new("e", 1);
+    TEST_TRUE(runner, Blob_Equals(wanted, endpost), "Peek_Last");
 
     Vector *elems = Vec_new(26);
     do {
-        while (BBSortEx_Buffer_Count(run) > 0) {
-            Obj *object = BBSortEx_Fetch(run);
+        while (BlobSortEx_Buffer_Count(run) > 0) {
+            Obj *object = BlobSortEx_Fetch(run);
             Vec_Push(elems, object);
         }
-    } while (BBSortEx_Refill(run) > 0);
+    } while (BlobSortEx_Refill(run) > 0);
     TEST_TRUE(runner, Vec_Equals(elems, (Obj*)letters), "retrieve all elems");
 
     DECREF(elems);
@@ -309,7 +310,7 @@ TestSortExternal_Run_IMP(TestSortExternal *self, 
TestBatchRunner *runner) {
     TestBatchRunner_Plan(runner, (TestBatch*)self, 19);
 
     srand((unsigned int)time((time_t*)NULL));
-    S_init_bytebufs();
+    S_init_blobs();
     test_bbsortex(runner);
     test_clear_buffer(runner);
     test_sort_letters(runner);
@@ -317,7 +318,7 @@ TestSortExternal_Run_IMP(TestSortExternal *self, 
TestBatchRunner *runner) {
     test_sort_packed_ints(runner);
     test_sort_random_strings(runner);
     test_run(runner);
-    S_destroy_bytebufs();
+    S_destroy_blobs();
 }
 
 

http://git-wip-us.apache.org/repos/asf/lucy/blob/2f7df540/core/Lucy/Util/BBSortEx.c
----------------------------------------------------------------------
diff --git a/core/Lucy/Util/BBSortEx.c b/core/Lucy/Util/BBSortEx.c
deleted file mode 100644
index 5a9a3e0..0000000
--- a/core/Lucy/Util/BBSortEx.c
+++ /dev/null
@@ -1,200 +0,0 @@
-/* Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#define C_LUCY_BBSORTEX
-#define LUCY_USE_SHORT_NAMES
-#include "Lucy/Util/ToolSet.h"
-
-#include "Lucy/Util/BBSortEx.h"
-
-#include "Lucy/Index/Segment.h"
-#include "Lucy/Store/InStream.h"
-#include "Lucy/Store/Folder.h"
-#include "Lucy/Store/OutStream.h"
-
-BBSortEx*
-BBSortEx_new(uint32_t mem_threshold, Vector *external) {
-    BBSortEx *self = (BBSortEx*)Class_Make_Obj(BBSORTEX);
-    return BBSortEx_init(self, mem_threshold, external);
-}
-
-BBSortEx*
-BBSortEx_init(BBSortEx *self, uint32_t mem_threshold, Vector *external) {
-    SortEx_init((SortExternal*)self);
-    BBSortExIVARS *const ivars = BBSortEx_IVARS(self);
-    ivars->external_tick = 0;
-    ivars->external = (Vector*)INCREF(external);
-    ivars->mem_consumed = 0;
-    BBSortEx_Set_Mem_Thresh(self, mem_threshold);
-    return self;
-}
-
-void
-BBSortEx_Destroy_IMP(BBSortEx *self) {
-    BBSortExIVARS *const ivars = BBSortEx_IVARS(self);
-    DECREF(ivars->external);
-    SUPER_DESTROY(self, BBSORTEX);
-}
-
-void
-BBSortEx_Clear_Buffer_IMP(BBSortEx *self) {
-    BBSortExIVARS *const ivars = BBSortEx_IVARS(self);
-    ivars->mem_consumed = 0;
-    BBSortEx_Clear_Buffer_t super_clear_buffer
-        = SUPER_METHOD_PTR(BBSORTEX, LUCY_BBSortEx_Clear_Buffer);
-    super_clear_buffer(self);
-}
-
-void
-BBSortEx_Feed_IMP(BBSortEx *self, Obj *item) {
-    BBSortExIVARS *const ivars = BBSortEx_IVARS(self);
-    BBSortEx_Feed_t super_feed
-        = SUPER_METHOD_PTR(BBSORTEX, LUCY_BBSortEx_Feed);
-    super_feed(self, item);
-
-    // Flush() if necessary.
-    ByteBuf *bytebuf = (ByteBuf*)CERTIFY(item, BYTEBUF);
-    ivars->mem_consumed += BB_Get_Size(bytebuf);
-    if (ivars->mem_consumed >= ivars->mem_thresh) {
-        BBSortEx_Flush(self);
-    }
-}
-
-void
-BBSortEx_Flush_IMP(BBSortEx *self) {
-    BBSortExIVARS *const ivars = BBSortEx_IVARS(self);
-    uint32_t     buf_count = ivars->buf_max - ivars->buf_tick;
-    Obj        **buffer = ivars->buffer;
-    Vector      *elems;
-
-    if (!buf_count) { return; }
-    else            { elems = Vec_new(buf_count); }
-
-    // Sort, then create a new run.
-    BBSortEx_Sort_Buffer(self);
-    for (uint32_t i = ivars->buf_tick; i < ivars->buf_max; i++) {
-        Vec_Push(elems, buffer[i]);
-    }
-    BBSortEx *run = BBSortEx_new(0, elems);
-    DECREF(elems);
-    BBSortEx_Add_Run(self, (SortExternal*)run);
-
-    // Blank the buffer vars.
-    ivars->buf_tick += buf_count;
-    BBSortEx_Clear_Buffer(self);
-}
-
-uint32_t
-BBSortEx_Refill_IMP(BBSortEx *self) {
-    BBSortExIVARS *const ivars = BBSortEx_IVARS(self);
-
-    // Make sure buffer is empty, then set buffer tick vars.
-    if (ivars->buf_max - ivars->buf_tick > 0) {
-        THROW(ERR, "Refill called but buffer contains %u32 items",
-              ivars->buf_max - ivars->buf_tick);
-    }
-    ivars->buf_tick = 0;
-    ivars->buf_max  = 0;
-
-    // Read in elements.
-    while (1) {
-        ByteBuf *elem = NULL;
-
-        if (ivars->mem_consumed >= ivars->mem_thresh) {
-            ivars->mem_consumed = 0;
-            break;
-        }
-        else if (ivars->external_tick >= Vec_Get_Size(ivars->external)) {
-            break;
-        }
-        else {
-            elem = (ByteBuf*)Vec_Fetch(ivars->external, ivars->external_tick);
-            ivars->external_tick++;
-            // Should be + sizeof(ByteBuf), but that's ok.
-            ivars->mem_consumed += BB_Get_Size(elem);
-        }
-
-        if (ivars->buf_max == ivars->buf_cap) {
-            BBSortEx_Grow_Buffer(self,
-                                 Memory_oversize(ivars->buf_max + 1,
-                                                 sizeof(Obj*)));
-        }
-        ivars->buffer[ivars->buf_max++] = INCREF(elem);
-    }
-
-    return ivars->buf_max;
-}
-
-void
-BBSortEx_Flip_IMP(BBSortEx *self) {
-    BBSortExIVARS *const ivars = BBSortEx_IVARS(self);
-    uint32_t run_mem_thresh = 65536;
-
-    BBSortEx_Flush(self);
-
-    // Recalculate the approximate mem allowed for each run.
-    uint32_t num_runs = Vec_Get_Size(ivars->runs);
-    if (num_runs) {
-        run_mem_thresh = (ivars->mem_thresh / 2) / num_runs;
-        if (run_mem_thresh < 65536) {
-            run_mem_thresh = 65536;
-        }
-    }
-
-    for (uint32_t i = 0; i < num_runs; i++) {
-        BBSortEx *run = (BBSortEx*)Vec_Fetch(ivars->runs, i);
-        BBSortEx_Set_Mem_Thresh(run, run_mem_thresh);
-    }
-
-    // OK to fetch now.
-    ivars->flipped = true;
-}
-
-int
-BBSortEx_Compare_IMP(BBSortEx *self, void *va, void *vb) {
-    UNUSED_VAR(self);
-    return BB_compare((ByteBuf**)va, (ByteBuf**)vb);
-}
-
-Vector*
-BBSortEx_Peek_Cache_IMP(BBSortEx *self) {
-    BBSortExIVARS *const ivars = BBSortEx_IVARS(self);
-    uint32_t   count  = ivars->buf_max - ivars->buf_tick;
-    Obj      **buffer = ivars->buffer;
-    Vector    *retval = Vec_new(count);
-
-    for (uint32_t i = ivars->buf_tick; i < ivars->buf_max; ++i) {
-        Vec_Push(retval, INCREF(buffer[i]));
-    }
-
-    return retval;
-}
-
-Obj*
-BBSortEx_Peek_Last_IMP(BBSortEx *self) {
-    BBSortExIVARS *const ivars = BBSortEx_IVARS(self);
-    uint32_t count = ivars->buf_max - ivars->buf_tick;
-    if (count == 0) { return NULL; }
-    return INCREF(ivars->buffer[count-1]);
-}
-
-uint32_t
-BBSortEx_Get_Num_Runs_IMP(BBSortEx *self) {
-    BBSortExIVARS *const ivars = BBSortEx_IVARS(self);
-    return Vec_Get_Size(ivars->runs);
-}
-
-

http://git-wip-us.apache.org/repos/asf/lucy/blob/2f7df540/core/Lucy/Util/BBSortEx.cfh
----------------------------------------------------------------------
diff --git a/core/Lucy/Util/BBSortEx.cfh b/core/Lucy/Util/BBSortEx.cfh
deleted file mode 100644
index eaac4c6..0000000
--- a/core/Lucy/Util/BBSortEx.cfh
+++ /dev/null
@@ -1,67 +0,0 @@
-/* Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-parcel Lucy;
-
-/** SortExternal for ByteBufs.
- */
-
-class Lucy::Util::BBSortEx
-    inherits Lucy::Util::SortExternal {
-
-    Vector   *external;
-    uint32_t  external_tick;
-    uint32_t  mem_consumed;
-
-    inert BBSortEx*
-    new(uint32_t mem_thresh = 0x1000000, Vector *external = NULL);
-
-    inert BBSortEx*
-    init(BBSortEx *self, uint32_t mem_thresh = 0x1000000,
-        Vector *external = NULL);
-
-    void
-    Feed(BBSortEx *self, decremented Obj *item);
-
-    void
-    Flush(BBSortEx *self);
-
-    uint32_t
-    Refill(BBSortEx *self);
-
-    void
-    Clear_Buffer(BBSortEx *self);
-
-    void
-    Flip(BBSortEx *self);
-
-    int
-    Compare(BBSortEx *self, void *va, void *vb);
-
-    incremented Vector*
-    Peek_Cache(BBSortEx *self);
-
-    incremented nullable Obj*
-    Peek_Last(BBSortEx *self);
-
-    uint32_t
-    Get_Num_Runs(BBSortEx *self);
-
-    public void
-    Destroy(BBSortEx *self);
-}
-
-

http://git-wip-us.apache.org/repos/asf/lucy/blob/2f7df540/core/Lucy/Util/BlobSortEx.c
----------------------------------------------------------------------
diff --git a/core/Lucy/Util/BlobSortEx.c b/core/Lucy/Util/BlobSortEx.c
new file mode 100644
index 0000000..6d41488
--- /dev/null
+++ b/core/Lucy/Util/BlobSortEx.c
@@ -0,0 +1,201 @@
+/* Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#define C_LUCY_BLOBSORTEX
+#define LUCY_USE_SHORT_NAMES
+#include "Lucy/Util/ToolSet.h"
+
+#include "Lucy/Util/BlobSortEx.h"
+
+#include "Clownfish/Blob.h"
+#include "Lucy/Index/Segment.h"
+#include "Lucy/Store/InStream.h"
+#include "Lucy/Store/Folder.h"
+#include "Lucy/Store/OutStream.h"
+
+BlobSortEx*
+BlobSortEx_new(uint32_t mem_threshold, Vector *external) {
+    BlobSortEx *self = (BlobSortEx*)Class_Make_Obj(BLOBSORTEX);
+    return BlobSortEx_init(self, mem_threshold, external);
+}
+
+BlobSortEx*
+BlobSortEx_init(BlobSortEx *self, uint32_t mem_threshold, Vector *external) {
+    SortEx_init((SortExternal*)self);
+    BlobSortExIVARS *const ivars = BlobSortEx_IVARS(self);
+    ivars->external_tick = 0;
+    ivars->external = (Vector*)INCREF(external);
+    ivars->mem_consumed = 0;
+    BlobSortEx_Set_Mem_Thresh(self, mem_threshold);
+    return self;
+}
+
+void
+BlobSortEx_Destroy_IMP(BlobSortEx *self) {
+    BlobSortExIVARS *const ivars = BlobSortEx_IVARS(self);
+    DECREF(ivars->external);
+    SUPER_DESTROY(self, BLOBSORTEX);
+}
+
+void
+BlobSortEx_Clear_Buffer_IMP(BlobSortEx *self) {
+    BlobSortExIVARS *const ivars = BlobSortEx_IVARS(self);
+    ivars->mem_consumed = 0;
+    BlobSortEx_Clear_Buffer_t super_clear_buffer
+        = SUPER_METHOD_PTR(BLOBSORTEX, LUCY_BlobSortEx_Clear_Buffer);
+    super_clear_buffer(self);
+}
+
+void
+BlobSortEx_Feed_IMP(BlobSortEx *self, Obj *item) {
+    BlobSortExIVARS *const ivars = BlobSortEx_IVARS(self);
+    BlobSortEx_Feed_t super_feed
+        = SUPER_METHOD_PTR(BLOBSORTEX, LUCY_BlobSortEx_Feed);
+    super_feed(self, item);
+
+    // Flush() if necessary.
+    Blob *blob = (Blob*)CERTIFY(item, BLOB);
+    ivars->mem_consumed += Blob_Get_Size(blob);
+    if (ivars->mem_consumed >= ivars->mem_thresh) {
+        BlobSortEx_Flush(self);
+    }
+}
+
+void
+BlobSortEx_Flush_IMP(BlobSortEx *self) {
+    BlobSortExIVARS *const ivars = BlobSortEx_IVARS(self);
+    uint32_t     buf_count = ivars->buf_max - ivars->buf_tick;
+    Obj        **buffer = ivars->buffer;
+    Vector      *elems;
+
+    if (!buf_count) { return; }
+    else            { elems = Vec_new(buf_count); }
+
+    // Sort, then create a new run.
+    BlobSortEx_Sort_Buffer(self);
+    for (uint32_t i = ivars->buf_tick; i < ivars->buf_max; i++) {
+        Vec_Push(elems, buffer[i]);
+    }
+    BlobSortEx *run = BlobSortEx_new(0, elems);
+    DECREF(elems);
+    BlobSortEx_Add_Run(self, (SortExternal*)run);
+
+    // Blank the buffer vars.
+    ivars->buf_tick += buf_count;
+    BlobSortEx_Clear_Buffer(self);
+}
+
+uint32_t
+BlobSortEx_Refill_IMP(BlobSortEx *self) {
+    BlobSortExIVARS *const ivars = BlobSortEx_IVARS(self);
+
+    // Make sure buffer is empty, then set buffer tick vars.
+    if (ivars->buf_max - ivars->buf_tick > 0) {
+        THROW(ERR, "Refill called but buffer contains %u32 items",
+              ivars->buf_max - ivars->buf_tick);
+    }
+    ivars->buf_tick = 0;
+    ivars->buf_max  = 0;
+
+    // Read in elements.
+    while (1) {
+        Blob *elem = NULL;
+
+        if (ivars->mem_consumed >= ivars->mem_thresh) {
+            ivars->mem_consumed = 0;
+            break;
+        }
+        else if (ivars->external_tick >= Vec_Get_Size(ivars->external)) {
+            break;
+        }
+        else {
+            elem = (Blob*)Vec_Fetch(ivars->external, ivars->external_tick);
+            ivars->external_tick++;
+            // Should be + sizeof(Blob), but that's ok.
+            ivars->mem_consumed += Blob_Get_Size(elem);
+        }
+
+        if (ivars->buf_max == ivars->buf_cap) {
+            BlobSortEx_Grow_Buffer(self,
+                                 Memory_oversize(ivars->buf_max + 1,
+                                                 sizeof(Obj*)));
+        }
+        ivars->buffer[ivars->buf_max++] = INCREF(elem);
+    }
+
+    return ivars->buf_max;
+}
+
+void
+BlobSortEx_Flip_IMP(BlobSortEx *self) {
+    BlobSortExIVARS *const ivars = BlobSortEx_IVARS(self);
+    uint32_t run_mem_thresh = 65536;
+
+    BlobSortEx_Flush(self);
+
+    // Recalculate the approximate mem allowed for each run.
+    uint32_t num_runs = Vec_Get_Size(ivars->runs);
+    if (num_runs) {
+        run_mem_thresh = (ivars->mem_thresh / 2) / num_runs;
+        if (run_mem_thresh < 65536) {
+            run_mem_thresh = 65536;
+        }
+    }
+
+    for (uint32_t i = 0; i < num_runs; i++) {
+        BlobSortEx *run = (BlobSortEx*)Vec_Fetch(ivars->runs, i);
+        BlobSortEx_Set_Mem_Thresh(run, run_mem_thresh);
+    }
+
+    // OK to fetch now.
+    ivars->flipped = true;
+}
+
+int
+BlobSortEx_Compare_IMP(BlobSortEx *self, void *va, void *vb) {
+    UNUSED_VAR(self);
+    return Blob_compare((Blob**)va, (Blob**)vb);
+}
+
+Vector*
+BlobSortEx_Peek_Cache_IMP(BlobSortEx *self) {
+    BlobSortExIVARS *const ivars = BlobSortEx_IVARS(self);
+    uint32_t   count  = ivars->buf_max - ivars->buf_tick;
+    Obj      **buffer = ivars->buffer;
+    Vector    *retval = Vec_new(count);
+
+    for (uint32_t i = ivars->buf_tick; i < ivars->buf_max; ++i) {
+        Vec_Push(retval, INCREF(buffer[i]));
+    }
+
+    return retval;
+}
+
+Obj*
+BlobSortEx_Peek_Last_IMP(BlobSortEx *self) {
+    BlobSortExIVARS *const ivars = BlobSortEx_IVARS(self);
+    uint32_t count = ivars->buf_max - ivars->buf_tick;
+    if (count == 0) { return NULL; }
+    return INCREF(ivars->buffer[count-1]);
+}
+
+uint32_t
+BlobSortEx_Get_Num_Runs_IMP(BlobSortEx *self) {
+    BlobSortExIVARS *const ivars = BlobSortEx_IVARS(self);
+    return Vec_Get_Size(ivars->runs);
+}
+
+

http://git-wip-us.apache.org/repos/asf/lucy/blob/2f7df540/core/Lucy/Util/BlobSortEx.cfh
----------------------------------------------------------------------
diff --git a/core/Lucy/Util/BlobSortEx.cfh b/core/Lucy/Util/BlobSortEx.cfh
new file mode 100644
index 0000000..47cef86
--- /dev/null
+++ b/core/Lucy/Util/BlobSortEx.cfh
@@ -0,0 +1,67 @@
+/* Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+parcel Lucy;
+
+/** SortExternal for Blobs.
+ */
+
+class Lucy::Util::BlobSortEx
+    inherits Lucy::Util::SortExternal {
+
+    Vector   *external;
+    uint32_t  external_tick;
+    uint32_t  mem_consumed;
+
+    inert BlobSortEx*
+    new(uint32_t mem_thresh = 0x1000000, Vector *external = NULL);
+
+    inert BlobSortEx*
+    init(BlobSortEx *self, uint32_t mem_thresh = 0x1000000,
+        Vector *external = NULL);
+
+    void
+    Feed(BlobSortEx *self, decremented Obj *item);
+
+    void
+    Flush(BlobSortEx *self);
+
+    uint32_t
+    Refill(BlobSortEx *self);
+
+    void
+    Clear_Buffer(BlobSortEx *self);
+
+    void
+    Flip(BlobSortEx *self);
+
+    int
+    Compare(BlobSortEx *self, void *va, void *vb);
+
+    incremented Vector*
+    Peek_Cache(BlobSortEx *self);
+
+    incremented nullable Obj*
+    Peek_Last(BlobSortEx *self);
+
+    uint32_t
+    Get_Num_Runs(BlobSortEx *self);
+
+    public void
+    Destroy(BlobSortEx *self);
+}
+
+

http://git-wip-us.apache.org/repos/asf/lucy/blob/2f7df540/perl/t/015-sort_external.t
----------------------------------------------------------------------
diff --git a/perl/t/015-sort_external.t b/perl/t/015-sort_external.t
index b656abf..403dab0 100644
--- a/perl/t/015-sort_external.t
+++ b/perl/t/015-sort_external.t
@@ -23,12 +23,12 @@ use bytes qw();
 
 my ( $sortex, $buffer, @orig, @sort_output );
 
-$sortex = Lucy::Util::BBSortEx->new( mem_thresh => 4 );
-$sortex->feed( new_bytebuf('c') );
+$sortex = Lucy::Util::BlobSortEx->new( mem_thresh => 4 );
+$sortex->feed( new_blob('c') );
 is( $sortex->buffer_count, 1, "feed elem into buffer" );
 
-$sortex->feed( new_bytebuf('b') );
-$sortex->feed( new_bytebuf('d') );
+$sortex->feed( new_blob('b') );
+$sortex->feed( new_blob('d') );
 $sortex->sort_buffer;
 SKIP: {
     skip( "Restore when porting test to C", 1 );
@@ -36,13 +36,13 @@ SKIP: {
     is_deeply( $buffer, [qw( b c d )], "sort buffer" );
 }
 
-$sortex->feed( new_bytebuf('a') );
+$sortex->feed( new_blob('a') );
 is( $sortex->buffer_count, 0,
     "buffer flushed automatically when mem_thresh crossed" );
 #is( $sortex->get_num_runs, 1, "run added" );
 
-my @bytebufs = map { new_bytebuf($_) } qw( x y z );
-my $run = Lucy::Util::BBSortEx->new( external => \@bytebufs );
+my @blobs = map { new_blob($_) } qw( x y z );
+my $run = Lucy::Util::BlobSortEx->new( external => \@blobs );
 $sortex->add_run($run);
 $sortex->flip;
 @orig = qw( a b c d x y z );
@@ -53,12 +53,12 @@ is_deeply( \@sort_output, \@orig, "Add_Run" );
 @orig        = ();
 @sort_output = ();
 
-$sortex = Lucy::Util::BBSortEx->new( mem_thresh => 4 );
-$sortex->feed( new_bytebuf('c') );
+$sortex = Lucy::Util::BlobSortEx->new( mem_thresh => 4 );
+$sortex->feed( new_blob('c') );
 $sortex->clear_buffer;
 is( $sortex->buffer_count, 0, "Clear_Buffer" );
-$sortex->feed( new_bytebuf('b') );
-$sortex->feed( new_bytebuf('a') );
+$sortex->feed( new_blob('b') );
+$sortex->feed( new_blob('a') );
 $sortex->flush;
 $sortex->flip;
 @orig = qw( a b );
@@ -72,9 +72,9 @@ is_deeply( \@sort_output, \@orig,
 @orig        = ();
 @sort_output = ();
 
-$sortex = Lucy::Util::BBSortEx->new;
+$sortex = Lucy::Util::BlobSortEx->new;
 @orig   = ( 'a' .. 'z' );
-$sortex->feed( new_bytebuf($_) ) for shuffle(@orig);
+$sortex->feed( new_blob($_) ) for shuffle(@orig);
 $sortex->flip;
 while ( defined( my $result = $sortex->fetch ) ) {
     push @sort_output, $result;
@@ -83,9 +83,9 @@ is_deeply( \@sort_output, \@orig, "sort letters" );
 @orig        = ();
 @sort_output = ();
 
-$sortex = Lucy::Util::BBSortEx->new;
+$sortex = Lucy::Util::BlobSortEx->new;
 @orig   = qw( a a a b c d x x x x x x y y );
-$sortex->feed( new_bytebuf($_) ) for shuffle(@orig);
+$sortex->feed( new_blob($_) ) for shuffle(@orig);
 $sortex->flip;
 while ( defined( my $result = $sortex->fetch ) ) {
     push @sort_output, $result;
@@ -94,9 +94,9 @@ is_deeply( \@sort_output, \@orig, "sort repeated letters" );
 @orig        = ();
 @sort_output = ();
 
-$sortex = Lucy::Util::BBSortEx->new;
+$sortex = Lucy::Util::BlobSortEx->new;
 @orig = ( '', '', 'a' .. 'z' );
-$sortex->feed( new_bytebuf($_) ) for shuffle(@orig);
+$sortex->feed( new_blob($_) ) for shuffle(@orig);
 $sortex->flip;
 while ( defined( my $result = $sortex->fetch ) ) {
     push @sort_output, $result;
@@ -105,9 +105,9 @@ is_deeply( \@sort_output, \@orig, "sort letters and empty 
strings" );
 @orig        = ();
 @sort_output = ();
 
-$sortex = Lucy::Util::BBSortEx->new( mem_thresh => 30 );
+$sortex = Lucy::Util::BlobSortEx->new( mem_thresh => 30 );
 @orig = 'a' .. 'z';
-$sortex->feed( new_bytebuf($_) ) for shuffle(@orig);
+$sortex->feed( new_blob($_) ) for shuffle(@orig);
 $sortex->flip;
 while ( defined( my $result = $sortex->fetch ) ) {
     push @sort_output, $result;
@@ -116,9 +116,9 @@ is_deeply( \@sort_output, \@orig, "... with an absurdly low 
mem_thresh" );
 @orig        = ();
 @sort_output = ();
 
-$sortex = Lucy::Util::BBSortEx->new( mem_thresh => 1 );
+$sortex = Lucy::Util::BlobSortEx->new( mem_thresh => 1 );
 @orig = 'a' .. 'z';
-$sortex->feed( new_bytebuf($_) ) for shuffle(@orig);
+$sortex->feed( new_blob($_) ) for shuffle(@orig);
 $sortex->flip;
 while ( defined( my $result = $sortex->fetch ) ) {
     push @sort_output, $result;
@@ -127,15 +127,15 @@ is_deeply( \@sort_output, \@orig, "... with an even lower 
mem_thresh" );
 @orig        = ();
 @sort_output = ();
 
-$sortex = Lucy::Util::BBSortEx->new;
+$sortex = Lucy::Util::BlobSortEx->new;
 $sortex->flip;
 @sort_output = $sortex->fetch;
 is_deeply( \@sort_output, [undef], "Sorting nothing returns undef" );
 @sort_output = ();
 
-$sortex = Lucy::Util::BBSortEx->new( mem_thresh => 5_000 );
+$sortex = Lucy::Util::BlobSortEx->new( mem_thresh => 5_000 );
 @orig = map { pack( 'N', $_ ) } ( 0 .. 11_000 );
-$sortex->feed( new_bytebuf($_) ) for shuffle(@orig);
+$sortex->feed( new_blob($_) ) for shuffle(@orig);
 $sortex->flip;
 while ( defined( my $item = $sortex->fetch ) ) {
     push @sort_output, $item;
@@ -143,7 +143,7 @@ while ( defined( my $item = $sortex->fetch ) ) {
 is_deeply( \@sort_output, \@orig, "Sorting packed integers..." );
 @sort_output = ();
 
-$sortex = Lucy::Util::BBSortEx->new( mem_thresh => 15_000 );
+$sortex = Lucy::Util::BlobSortEx->new( mem_thresh => 15_000 );
 @orig = ();
 for my $iter ( 0 .. 1_000 ) {
     my $string = '';
@@ -152,7 +152,7 @@ for my $iter ( 0 .. 1_000 ) {
     }
     push @orig, $string;
 }
-$sortex->feed( new_bytebuf($_) ) for shuffle(@orig);
+$sortex->feed( new_blob($_) ) for shuffle(@orig);
 @orig = sort @orig;
 $sortex->flip;
 while ( defined( my $item = $sortex->fetch ) ) {
@@ -161,5 +161,5 @@ while ( defined( my $item = $sortex->fetch ) ) {
 is_deeply( \@sort_output, \@orig, "Random binary strings of random length" );
 @sort_output = ();
 
-sub new_bytebuf { Clownfish::ByteBuf->new(shift) }
+sub new_blob { Clownfish::Blob->new(shift) }
 

Reply via email to