Repository: lucy
Updated Branches:
  refs/heads/master 488d6f4cf -> 291a32693


Avoid rescanning the index folder for latest snapshot

Eliminate up to four unnecessary scans in BackgroudMerger:

- BGMerger_init
- S_merge_updated_deletions (twice)
- BGMerger_Prepare_Commit

Eliminate two unnecessary scans in Indexer:

- Indexer_init
- Indexer_Prepare_Commit


Project: http://git-wip-us.apache.org/repos/asf/lucy/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucy/commit/291a3269
Tree: http://git-wip-us.apache.org/repos/asf/lucy/tree/291a3269
Diff: http://git-wip-us.apache.org/repos/asf/lucy/diff/291a3269

Branch: refs/heads/master
Commit: 291a3269312a29792f8294fdf1b1f68dbfd94a79
Parents: 488d6f4
Author: Nick Wellnhofer <wellnho...@aevum.de>
Authored: Wed Feb 22 18:55:05 2017 +0100
Committer: Nick Wellnhofer <wellnho...@aevum.de>
Committed: Sat Mar 4 18:28:32 2017 +0100

----------------------------------------------------------------------
 core/Lucy/Index/BackgroundMerger.c | 43 +++++++++++++++------------------
 core/Lucy/Index/IndexManager.c     | 27 ---------------------
 core/Lucy/Index/IndexManager.cfh   |  7 ------
 core/Lucy/Index/Indexer.c          | 25 ++++++++++---------
 core/Lucy/Index/Indexer.cfh        |  1 +
 core/Lucy/Util/IndexFileNames.c    |  8 ++++++
 core/Lucy/Util/IndexFileNames.cfh  |  6 +++++
 go/build.go                        |  1 -
 go/lucy/index.go                   | 12 ---------
 go/lucy/index_test.go              |  5 ----
 10 files changed, 48 insertions(+), 87 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucy/blob/291a3269/core/Lucy/Index/BackgroundMerger.c
----------------------------------------------------------------------
diff --git a/core/Lucy/Index/BackgroundMerger.c 
b/core/Lucy/Index/BackgroundMerger.c
index c729bda..409571b 100644
--- a/core/Lucy/Index/BackgroundMerger.c
+++ b/core/Lucy/Index/BackgroundMerger.c
@@ -115,7 +115,8 @@ BGMerger_init(BackgroundMerger *self, Obj *index, 
IndexManager *manager) {
     // Open a PolyReader, passing in the IndexManager so we get a read lock on
     // the Snapshot's files -- so that Indexers don't zap our files while
     // we're operating in the background.
-    ivars->polyreader = PolyReader_open((Obj*)folder, NULL, ivars->manager);
+    ivars->polyreader
+        = PolyReader_open((Obj*)folder, ivars->snapshot, ivars->manager);
 
     // Clone the PolyReader's schema.
     Obj *dump = (Obj*)Schema_Dump(PolyReader_Get_Schema(ivars->polyreader));
@@ -249,12 +250,12 @@ S_maybe_merge(BackgroundMerger *self) {
 }
 
 static bool
-S_merge_updated_deletions(BackgroundMerger *self) {
+S_merge_updated_deletions(BackgroundMerger *self, Snapshot *latest_snapshot) {
     BackgroundMergerIVARS *const ivars = BGMerger_IVARS(self);
     Hash *updated_deletions = NULL;
 
     PolyReader *new_polyreader
-        = PolyReader_open((Obj*)ivars->folder, NULL, NULL);
+        = PolyReader_open((Obj*)ivars->folder, latest_snapshot, NULL);
     Vector *new_seg_readers
         = PolyReader_Get_Seg_Readers(new_polyreader);
     Vector *old_seg_readers
@@ -305,8 +306,6 @@ S_merge_updated_deletions(BackgroundMerger *self) {
             = PolyReader_open((Obj*)ivars->folder, ivars->snapshot, NULL);
         Vector *merge_seg_readers
             = PolyReader_Get_Seg_Readers(merge_polyreader);
-        Snapshot *latest_snapshot
-            = Snapshot_Read_File(Snapshot_new(), ivars->folder, NULL);
         int64_t new_seg_num
             = IxManager_Highest_Seg_Num(ivars->manager, latest_snapshot) + 1;
         Segment   *new_segment = Seg_new(new_seg_num);
@@ -360,7 +359,6 @@ S_merge_updated_deletions(BackgroundMerger *self) {
         SegWriter_Finish(seg_writer);
         DECREF(seg_writer);
         DECREF(new_segment);
-        DECREF(latest_snapshot);
         DECREF(merge_polyreader);
         DECREF(updated_deletions);
     }
@@ -411,33 +409,27 @@ BGMerger_Prepare_Commit_IMP(BackgroundMerger *self) {
             RETHROW(INCREF(Err_get_error()));
         }
 
-        // Write temporary snapshot file.
-        DECREF(ivars->snapfile);
-        String *snapfile = IxManager_Make_Snapshot_Filename(ivars->manager);
-        ivars->snapfile = Str_Cat_Trusted_Utf8(snapfile, ".temp", 5);
-        DECREF(snapfile);
-        Folder_Delete(folder, ivars->snapfile);
-        Snapshot_Write_File(snapshot, folder, ivars->snapfile);
-
         // Determine whether the index has been updated while this background
         // merge process was running.
 
         String *start_snapfile
             = Snapshot_Get_Path(PolyReader_Get_Snapshot(ivars->polyreader));
-        Snapshot *latest_snapshot
-            = Snapshot_Read_File(Snapshot_new(), ivars->folder, NULL);
-        String *latest_snapfile = Snapshot_Get_Path(latest_snapshot);
+        String *latest_snapfile = IxFileNames_latest_snapshot(ivars->folder);
         bool index_updated
             = !Str_Equals(start_snapfile, (Obj*)latest_snapfile);
 
         if (index_updated) {
+            Snapshot *latest_snapshot
+                = Snapshot_Read_File(Snapshot_new(), ivars->folder,
+                                     latest_snapfile);
+
             /* See if new deletions have been applied since this
              * background merge process started against any of the
              * segments we just merged away.  If that's true, we need to
              * write another segment which applies the deletions against
              * the new composite segment.
              */
-            S_merge_updated_deletions(self);
+            S_merge_updated_deletions(self, latest_snapshot);
 
             // Add the fresh content to our snapshot. (It's important to
             // run this AFTER S_merge_updated_deletions, because otherwise
@@ -452,16 +444,21 @@ BGMerger_Prepare_Commit_IMP(BackgroundMerger *self) {
                     }
                 }
             }
-            DECREF(files);
 
-            // Since the snapshot content has changed, we need to rewrite it.
-            Folder_Delete(folder, ivars->snapfile);
-            Snapshot_Write_File(snapshot, folder, ivars->snapfile);
+            DECREF(files);
+            DECREF(latest_snapshot);
         }
 
-        DECREF(latest_snapshot);
+        // Write temporary snapshot file.
+        DECREF(ivars->snapfile);
+        uint64_t gen = IxFileNames_extract_gen(latest_snapfile);
+        ivars->snapfile = IxFileNames_make_temp_snapshot(gen + 1);
+        Folder_Delete(folder, ivars->snapfile);
+        Snapshot_Write_File(snapshot, folder, ivars->snapfile);
 
         ivars->needs_commit = true;
+
+        DECREF(latest_snapfile);
     }
 
     // Close reader, so that we can delete its files if appropriate.

http://git-wip-us.apache.org/repos/asf/lucy/blob/291a3269/core/Lucy/Index/IndexManager.c
----------------------------------------------------------------------
diff --git a/core/Lucy/Index/IndexManager.c b/core/Lucy/Index/IndexManager.c
index 15367fa..24eed1d 100644
--- a/core/Lucy/Index/IndexManager.c
+++ b/core/Lucy/Index/IndexManager.c
@@ -29,7 +29,6 @@
 #include "Lucy/Store/LockFactory.h"
 #include "Lucy/Util/IndexFileNames.h"
 #include "Lucy/Util/Json.h"
-#include "Lucy/Util/StringHelper.h"
 
 #include <stdlib.h>
 
@@ -133,32 +132,6 @@ IxManager_Highest_Seg_Num_IMP(IndexManager *self, Snapshot 
*snapshot) {
     return (int64_t)highest_seg_num;
 }
 
-String*
-IxManager_Make_Snapshot_Filename_IMP(IndexManager *self) {
-    IndexManagerIVARS *const ivars = IxManager_IVARS(self);
-    Folder *folder = (Folder*)CERTIFY(ivars->folder, FOLDER);
-    DirHandle *dh = Folder_Open_Dir(folder, NULL);
-    uint64_t max_gen = 0;
-
-    if (!dh) { RETHROW(INCREF(Err_get_error())); }
-    while (DH_Next(dh)) {
-        String *entry = DH_Get_Entry(dh);
-        if (Str_Starts_With_Utf8(entry, "snapshot_", 9)
-            && Str_Ends_With_Utf8(entry, ".json", 5)
-           ) {
-            uint64_t gen = IxFileNames_extract_gen(entry);
-            if (gen > max_gen) { max_gen = gen; }
-        }
-        DECREF(entry);
-    }
-    DECREF(dh);
-
-    uint64_t new_gen = max_gen + 1;
-    char  base36[StrHelp_MAX_BASE36_BYTES];
-    StrHelp_to_base36(new_gen, &base36);
-    return Str_newf("snapshot_%s.json", &base36);
-}
-
 static int
 S_compare_doc_count(const void *va, const void *vb) {
     SegReader *a = *(SegReader**)va;

http://git-wip-us.apache.org/repos/asf/lucy/blob/291a3269/core/Lucy/Index/IndexManager.cfh
----------------------------------------------------------------------
diff --git a/core/Lucy/Index/IndexManager.cfh b/core/Lucy/Index/IndexManager.cfh
index be6389a..f00cdc5 100644
--- a/core/Lucy/Index/IndexManager.cfh
+++ b/core/Lucy/Index/IndexManager.cfh
@@ -145,13 +145,6 @@ public class Lucy::Index::IndexManager nickname IxManager
     int64_t
     Highest_Seg_Num(IndexManager *self, Snapshot *snapshot);
 
-    /** Return the name of a new snapshot file, which shall contain a base-36
-     * "generation" embedded inside it greater than the generation of any
-     * snapshot file currently in the index folder.
-     */
-    incremented String*
-    Make_Snapshot_Filename(IndexManager *self);
-
     /** Setter for write lock timeout.  Default: 1000 milliseconds.
      */
     public void

http://git-wip-us.apache.org/repos/asf/lucy/blob/291a3269/core/Lucy/Index/Indexer.c
----------------------------------------------------------------------
diff --git a/core/Lucy/Index/Indexer.c b/core/Lucy/Index/Indexer.c
index 16b915b..05ee6d3 100644
--- a/core/Lucy/Index/Indexer.c
+++ b/core/Lucy/Index/Indexer.c
@@ -109,9 +109,9 @@ Indexer_init(Indexer *self, Schema *schema, Obj *index,
     }
 
     // Find the latest snapshot or create a new one.
-    String *latest_snapfile = IxFileNames_latest_snapshot(folder);
-    if (latest_snapfile) {
-        Snapshot_Read_File(latest_snapshot, folder, latest_snapfile);
+    ivars->latest_snapfile = IxFileNames_latest_snapshot(folder);
+    if (ivars->latest_snapfile) {
+        Snapshot_Read_File(latest_snapshot, folder, ivars->latest_snapfile);
     }
 
     // Look for an existing Schema if one wasn't supplied.
@@ -119,7 +119,7 @@ Indexer_init(Indexer *self, Schema *schema, Obj *index,
         ivars->schema = (Schema*)INCREF(schema);
     }
     else {
-        if (!latest_snapfile) {
+        if (!ivars->latest_snapfile) {
             S_release_write_lock(self);
             THROW(ERR, "No Schema supplied, and can't find one in the index");
         }
@@ -147,13 +147,13 @@ Indexer_init(Indexer *self, Schema *schema, Obj *index,
         ivars->truncate = true;
     }
     else {
-        // TODO: clone most recent snapshot rather than read it twice.
         ivars->snapshot = (Snapshot*)INCREF(latest_snapshot);
-        ivars->polyreader = latest_snapfile
-                           ? PolyReader_open((Obj*)folder, NULL, NULL)
+        ivars->polyreader = ivars->latest_snapfile
+                           ? PolyReader_open((Obj*)folder, latest_snapshot,
+                                             NULL)
                            : PolyReader_new(schema, folder, NULL, NULL, NULL);
 
-        if (latest_snapfile) {
+        if (ivars->latest_snapfile) {
             // Make sure than any existing fields which may have been
             // dynamically added during past indexing sessions get added.
             Schema *old_schema = PolyReader_Get_Schema(ivars->polyreader);
@@ -215,7 +215,6 @@ Indexer_init(Indexer *self, Schema *schema, Obj *index,
     ivars->del_writer = (DeletionsWriter*)INCREF(
                            SegWriter_Get_Del_Writer(ivars->seg_writer));
 
-    DECREF(latest_snapfile);
     DECREF(latest_snapshot);
 
     return self;
@@ -231,6 +230,7 @@ Indexer_Destroy_IMP(Indexer *self) {
     DECREF(ivars->segment);
     DECREF(ivars->manager);
     DECREF(ivars->stock_doc);
+    DECREF(ivars->latest_snapfile);
     DECREF(ivars->polyreader);
     DECREF(ivars->del_writer);
     DECREF(ivars->snapshot);
@@ -509,9 +509,10 @@ Indexer_Prepare_Commit_IMP(Indexer *self) {
 
         // Derive snapshot and schema file names.
         DECREF(ivars->snapfile);
-        String *snapfile = IxManager_Make_Snapshot_Filename(ivars->manager);
-        ivars->snapfile = Str_Cat_Trusted_Utf8(snapfile, ".temp", 5);
-        DECREF(snapfile);
+        uint64_t gen = ivars->latest_snapfile
+                       ? IxFileNames_extract_gen(ivars->latest_snapfile)
+                       : 0;
+        ivars->snapfile = IxFileNames_make_temp_snapshot(gen + 1);
         uint64_t schema_gen = IxFileNames_extract_gen(ivars->snapfile);
         char base36[StrHelp_MAX_BASE36_BYTES];
         StrHelp_to_base36(schema_gen, &base36);

http://git-wip-us.apache.org/repos/asf/lucy/blob/291a3269/core/Lucy/Index/Indexer.cfh
----------------------------------------------------------------------
diff --git a/core/Lucy/Index/Indexer.cfh b/core/Lucy/Index/Indexer.cfh
index 5a01f8b..3f75901 100644
--- a/core/Lucy/Index/Indexer.cfh
+++ b/core/Lucy/Index/Indexer.cfh
@@ -43,6 +43,7 @@ public class Lucy::Index::Indexer inherits Clownfish::Obj {
     Folder            *folder;
     Segment           *segment;
     IndexManager      *manager;
+    String            *latest_snapfile;
     PolyReader        *polyreader;
     Snapshot          *snapshot;
     SegWriter         *seg_writer;

http://git-wip-us.apache.org/repos/asf/lucy/blob/291a3269/core/Lucy/Util/IndexFileNames.c
----------------------------------------------------------------------
diff --git a/core/Lucy/Util/IndexFileNames.c b/core/Lucy/Util/IndexFileNames.c
index a1a063b..2298b3b 100644
--- a/core/Lucy/Util/IndexFileNames.c
+++ b/core/Lucy/Util/IndexFileNames.c
@@ -20,6 +20,7 @@
 #include "Lucy/Util/IndexFileNames.h"
 #include "Lucy/Store/DirHandle.h"
 #include "Lucy/Store/Folder.h"
+#include "Lucy/Util/StringHelper.h"
 
 String*
 IxFileNames_latest_snapshot(Folder *folder) {
@@ -69,6 +70,13 @@ IxFileNames_extract_gen(String *name) {
 }
 
 String*
+IxFileNames_make_temp_snapshot(uint64_t gen) {
+    char  base36[StrHelp_MAX_BASE36_BYTES];
+    StrHelp_to_base36(gen, &base36);
+    return Str_newf("snapshot_%s.json.temp", &base36);
+}
+
+String*
 IxFileNames_local_part(String *path) {
     StringIterator *top = Str_Tail(path);
     int32_t code_point = StrIter_Prev(top);

http://git-wip-us.apache.org/repos/asf/lucy/blob/291a3269/core/Lucy/Util/IndexFileNames.cfh
----------------------------------------------------------------------
diff --git a/core/Lucy/Util/IndexFileNames.cfh 
b/core/Lucy/Util/IndexFileNames.cfh
index e1bbff3..ea85a9a 100644
--- a/core/Lucy/Util/IndexFileNames.cfh
+++ b/core/Lucy/Util/IndexFileNames.cfh
@@ -35,6 +35,12 @@ inert class Lucy::Util::IndexFileNames nickname IxFileNames {
     inert incremented nullable String*
     latest_snapshot(Folder *folder);
 
+    /** Return the name of a new temporary snapshot file, which shall contain
+     * a base-36 "generation" embedded inside it.
+     */
+    inert incremented String*
+    make_temp_snapshot(uint64_t gen);
+
     /** Split the `path` on '/' and return the last component.
      * Trailing slashes will be stripped.
      */

http://git-wip-us.apache.org/repos/asf/lucy/blob/291a3269/go/build.go
----------------------------------------------------------------------
diff --git a/go/build.go b/go/build.go
index b751ba5..553b7a9 100644
--- a/go/build.go
+++ b/go/build.go
@@ -259,7 +259,6 @@ func specClasses(parcel *cfc.Parcel) {
        managerBinding.SpecMethod("Write_Merge_Data", "WriteMergeData(int64) 
error")
        managerBinding.SpecMethod("Read_Merge_Data", "ReadMergeData() 
(map[string]interface{}, error)")
        managerBinding.SpecMethod("Remove_Merge_Data", "RemoveMergeData() 
error")
-       managerBinding.SpecMethod("Make_Snapshot_Filename", 
"MakeSnapshotFilename() (string, error)")
        managerBinding.SpecMethod("Recycle", "Recycle(PolyReader, 
DeletionsWriter, int64, bool) ([]SegReader, error)")
        managerBinding.Register()
 

http://git-wip-us.apache.org/repos/asf/lucy/blob/291a3269/go/lucy/index.go
----------------------------------------------------------------------
diff --git a/go/lucy/index.go b/go/lucy/index.go
index 6f627f0..6e8142e 100644
--- a/go/lucy/index.go
+++ b/go/lucy/index.go
@@ -396,18 +396,6 @@ func (im *IndexManagerIMP) RemoveMergeData() error {
        })
 }
 
-func (im *IndexManagerIMP) MakeSnapshotFilename() (retval string, err error) {
-       err = clownfish.TrapErr(func() {
-               self := (*C.lucy_IndexManager)(clownfish.Unwrap(im, "im"))
-               retvalC := C.LUCY_IxManager_Make_Snapshot_Filename(self)
-               if retvalC != nil {
-                       defer C.cfish_decref(unsafe.Pointer(retvalC))
-                       retval = 
clownfish.ToGo(unsafe.Pointer(retvalC)).(string)
-               }
-       })
-       return retval, err
-}
-
 func (im *IndexManagerIMP) Recycle(reader PolyReader, delWriter 
DeletionsWriter,
        cutoff int64, optimize bool) (retval []SegReader, err error) {
        err = clownfish.TrapErr(func() {

http://git-wip-us.apache.org/repos/asf/lucy/blob/291a3269/go/lucy/index_test.go
----------------------------------------------------------------------
diff --git a/go/lucy/index_test.go b/go/lucy/index_test.go
index b8823e8..0049ee4 100644
--- a/go/lucy/index_test.go
+++ b/go/lucy/index_test.go
@@ -19,7 +19,6 @@ package lucy
 import "testing"
 import "os"
 import "reflect"
-import "strings"
 
 import 
"git-wip-us.apache.org/repos/asf/lucy-clownfish.git/runtime/go/clownfish"
 
@@ -224,10 +223,6 @@ func TestIndexManagerMergeData(t *testing.T) {
 
 func TestIndexManagerMisc(t *testing.T) {
        manager := NewIndexManager("", nil)
-       manager.SetFolder(NewRAMFolder(""))
-       if got, err := manager.MakeSnapshotFilename(); !strings.Contains(got, 
"snapshot") || err != nil {
-               t.Errorf("MakeSnapshotFilename: %s, %v", got, err)
-       }
        snapshot := NewSnapshot()
        snapshot.AddEntry("seg_4")
        snapshot.AddEntry("seg_5")

Reply via email to