This is an automated email from the ASF dual-hosted git repository.

airborne pushed a commit to branch clucene
in repository https://gitbox.apache.org/repos/asf/doris-thirdparty.git


The following commit(s) were added to refs/heads/clucene by this push:
     new 7cf6cf410d4 [fix](index compaction)Skip writing terms with a doc 
frequency of 0 (#248)
7cf6cf410d4 is described below

commit 7cf6cf410d41d95456edba263cc55b7b6f5ab027
Author: qiye <[email protected]>
AuthorDate: Thu Oct 31 17:12:22 2024 +0800

    [fix](index compaction)Skip writing terms with a doc frequency of 0 (#248)
---
 src/core/CLucene/index/IndexWriter.cpp | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/src/core/CLucene/index/IndexWriter.cpp 
b/src/core/CLucene/index/IndexWriter.cpp
index 8a2c50431cc..98cbf9af897 100644
--- a/src/core/CLucene/index/IndexWriter.cpp
+++ b/src/core/CLucene/index/IndexWriter.cpp
@@ -1831,6 +1831,17 @@ void IndexWriter::mergeTerms(bool hasProx, IndexVersion 
indexVersion) {
         }
 
         for (int i = 0; i < numDestIndexes; ++i) {
+            if (dfs[i] == 0) {
+                if (infoStream != nullptr) {
+                    std::string name = 
lucene_wcstoutf8string(smallestTerm->text(), smallestTerm->textLength());
+                    std::string field = 
lucene_wcstoutf8string(smallestTerm->field(), wcslen(smallestTerm->field()));
+                    std::stringstream ss;
+                    ss << "term: " << name << ", field: " << field << ", doc 
frequency is zero[" << dfs[i] << "], skip it." << "\n";
+                    message(ss.str());
+                }
+                // if doc frequency is 0, it means the term is deleted. So we 
should not write it.
+                continue;
+            }
             DefaultSkipListWriter *skipListWriter = skipListWriterList[i];
             CL_NS(store)::IndexOutput *freqOutput = freqOutputList[i];
             CL_NS(store)::IndexOutput *proxOutput = proxOutputList[i];


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to