This is an automated email from the ASF dual-hosted git repository.

jianliangqi pushed a commit to branch clucene
in repository https://gitbox.apache.org/repos/asf/doris-thirdparty.git


The following commit(s) were added to refs/heads/clucene by this push:
     new d05cb815 [fix](index writer)fix max buffered docs not working (#167)
d05cb815 is described below

commit d05cb8154ef4368bd40c43c94e8e3c679e13c490
Author: qiye <[email protected]>
AuthorDate: Wed Dec 27 18:54:39 2023 +0800

    [fix](index writer)fix max buffered docs not working (#167)
---
 src/core/CLucene/index/SDocumentWriter.cpp |  7 +++++++
 src/core/CLucene/index/SDocumentWriter.h   | 11 +++++++++--
 2 files changed, 16 insertions(+), 2 deletions(-)

diff --git a/src/core/CLucene/index/SDocumentWriter.cpp 
b/src/core/CLucene/index/SDocumentWriter.cpp
index 3c33a30b..d11e39bf 100644
--- a/src/core/CLucene/index/SDocumentWriter.cpp
+++ b/src/core/CLucene/index/SDocumentWriter.cpp
@@ -142,6 +142,13 @@ typename SDocumentsWriter<T>::ThreadState 
*SDocumentsWriter<T>::getThreadState(D
     // Only increment nextDocID & numDocsInRAM on successful init
     nextDocID++;
     numDocsInRAM++;
+    // We must at this point commit to flushing to ensure we
+    // always get N docs when we flush by doc count, even if
+    // > 1 thread is adding documents:
+    if (!flushPending && maxBufferedDocs != IndexWriter::DISABLE_AUTO_FLUSH && 
numDocsInRAM >= maxBufferedDocs) {
+        flushPending = true;
+        threadState->doFlushAfter = true;
+    }
 
     return threadState;
 }
diff --git a/src/core/CLucene/index/SDocumentWriter.h 
b/src/core/CLucene/index/SDocumentWriter.h
index 342089a7..b1217ba1 100644
--- a/src/core/CLucene/index/SDocumentWriter.h
+++ b/src/core/CLucene/index/SDocumentWriter.h
@@ -6,6 +6,7 @@
 #define _lucene_index_SDocumentsWriter_
 
 #include "CLucene/_ApiHeader.h"
+#include "CLucene/index/IndexWriter.h"
 #include "CLucene/store/Directory.h"
 #include "CLucene/store/_RAMDirectory.h"
 #include "CLucene/util/Array.h"
@@ -53,6 +54,9 @@ private:
     std::vector<uint32_t> freqBuffer;
     std::ostream* infoStream{};
     int64_t ramBufferSize;
+    // Flush @ this number of docs.  If rarmBufferSize is
+    // non-zero we will flush by RAM usage instead.
+    int32_t maxBufferedDocs;
     bool hasProx_ = false;
     IndexVersion indexVersion_ = IndexVersion::kV1;
 
@@ -679,6 +683,7 @@ public:
         postingsFreeCountDW = postingsAllocCountDW = 0;
         docStoreOffset = nextDocID = numDocsInRAM = numDocsInStore = 
nextWriteDocID = 0;
         ramBufferSize = (int64_t) (256 * 1024 * 1024);
+        maxBufferedDocs = IndexWriter::DEFAULT_MAX_BUFFERED_DOCS;
     }
     virtual ~SDocumentsWriter();
     int32_t flush(bool closeDocStore) override ;
@@ -730,7 +735,7 @@ public:
         return segmentFileName(string(extension));
     }
     int32_t getMaxBufferedDocs() override {
-        return 0;
+        return maxBufferedDocs;
     }
 
     int32_t getNumDocsInRAM() override {
@@ -748,7 +753,9 @@ public:
         return 0;
     }
     void abort(AbortException *ae) override {}
-    void setMaxBufferedDocs(int32_t count) override {}
+    void setMaxBufferedDocs(int32_t count) override {
+        maxBufferedDocs = count;
+    }
     void setInfoStream(std::ostream *is) override {
         this->infoStream = is;
     }


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to