master - Use counting sort to sort doc_ids in SortFieldWriter#Refill

marvin Tue, 01 Jul 2014 12:35:27 -0700

Use counting sort to sort doc_ids in SortFieldWriter#Refill

Since we already have the ordinals for each doc_id, we can use a
counting sort. This uses a temporary array of size run_cardinality but
runs in linear time.



Project: http://git-wip-us.apache.org/repos/asf/lucy/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucy/commit/d96ef18e
Tree: http://git-wip-us.apache.org/repos/asf/lucy/tree/d96ef18e
Diff: http://git-wip-us.apache.org/repos/asf/lucy/diff/d96ef18e

Branch: refs/heads/master
Commit: d96ef18e4c8e1ef19ce55be5926a0728254df30d
Parents: dd1a198
Author: Nick Wellnhofer <[email protected]>
Authored: Thu Sep 26 19:43:42 2013 +0200
Committer: Marvin Humphrey <[email protected]>
Committed: Wed Jun 18 21:23:00 2014 -0700

----------------------------------------------------------------------
 core/Lucy/Index/SortFieldWriter.c | 53 +++++++++++++++++++++-------------
 1 file changed, 33 insertions(+), 20 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucy/blob/d96ef18e/core/Lucy/Index/SortFieldWriter.c
----------------------------------------------------------------------
diff --git a/core/Lucy/Index/SortFieldWriter.c 
b/core/Lucy/Index/SortFieldWriter.c
index 0d07ce5..b6faec5 100644
--- a/core/Lucy/Index/SortFieldWriter.c
+++ b/core/Lucy/Index/SortFieldWriter.c
@@ -347,30 +347,43 @@ SortFieldWriter_Compare_IMP(SortFieldWriter *self, void 
*va, void *vb) {
     return comparison;
 }
 
-static int
-S_compare_doc_ids_by_ord_rev(void *context, const void *va, const void *vb) {
-    SortCache *sort_cache = (SortCache*)context;
-    int32_t a = *(int32_t*)va;
-    int32_t b = *(int32_t*)vb;
-    int32_t ord_a = SortCache_Ordinal(sort_cache, a);
-    int32_t ord_b = SortCache_Ordinal(sort_cache, b);
-    int32_t comparison = ord_a - ord_b;
-    if (comparison == 0) { comparison = a - b; }
-    return comparison;
-}
-
 static void
 S_lazy_init_sorted_ids(SortFieldWriter *self) {
     SortFieldWriterIVARS *const ivars = SortFieldWriter_IVARS(self);
-    if (!ivars->sorted_ids) {
-        ivars->sorted_ids
-            = (int32_t*)MALLOCATE((ivars->run_max + 1) * sizeof(int32_t));
-        for (int32_t i = 0, max = ivars->run_max; i <= max; i++) {
-            ivars->sorted_ids[i] = i;
-        }
-        Sort_quicksort(ivars->sorted_ids + 1, ivars->run_max, sizeof(int32_t),
-                       S_compare_doc_ids_by_ord_rev, ivars->sort_cache);
+    if (ivars->sorted_ids) { return; }
+
+    // Counting sort. Could be optimized by working directly on the
+    // ordinal arrays.
+
+    SortCache *sort_cache      = ivars->sort_cache;
+    int32_t    run_cardinality = ivars->run_cardinality;
+    int32_t    run_max         = ivars->run_max;
+
+    // Count.
+    int32_t *counts = (int32_t*)CALLOCATE(run_cardinality, sizeof(int32_t));
+    for (int32_t doc_id = 0; doc_id <= run_max; ++doc_id) {
+        int32_t ord = SortCache_Ordinal(sort_cache, doc_id);
+        ++counts[ord];
+    }
+
+    // Compute partial sums.
+    int32_t sum = 0;
+    for (int32_t ord = 0; ord < run_cardinality; ++ord) {
+        int32_t count = counts[ord];
+        counts[ord] = sum;
+        sum += count;
     }
+
+    // Distribute.
+    int32_t *sorted_ids = (int32_t*)MALLOCATE((run_max + 1) * sizeof(int32_t));
+    for (int32_t doc_id = 0; doc_id <= run_max; ++doc_id) {
+        int32_t ord = SortCache_Ordinal(sort_cache, doc_id);
+        int32_t pos = counts[ord]++;
+        sorted_ids[pos] = doc_id;
+    }
+
+    ivars->sorted_ids = sorted_ids;
+    FREEMEM(counts);
 }
 
 void

[5/8] git commit: refs/heads/master - Use counting sort to sort doc_ids in SortFieldWriter#Refill

Reply via email to