This is an automated email from the ASF dual-hosted git repository.
airborne pushed a commit to branch clucene
in repository https://gitbox.apache.org/repos/asf/doris-thirdparty.git
The following commit(s) were added to refs/heads/clucene by this push:
new 7cf6cf410d4 [fix](index compaction)Skip writing terms with a doc
frequency of 0 (#248)
7cf6cf410d4 is described below
commit 7cf6cf410d41d95456edba263cc55b7b6f5ab027
Author: qiye <[email protected]>
AuthorDate: Thu Oct 31 17:12:22 2024 +0800
[fix](index compaction)Skip writing terms with a doc frequency of 0 (#248)
---
src/core/CLucene/index/IndexWriter.cpp | 11 +++++++++++
1 file changed, 11 insertions(+)
diff --git a/src/core/CLucene/index/IndexWriter.cpp
b/src/core/CLucene/index/IndexWriter.cpp
index 8a2c50431cc..98cbf9af897 100644
--- a/src/core/CLucene/index/IndexWriter.cpp
+++ b/src/core/CLucene/index/IndexWriter.cpp
@@ -1831,6 +1831,17 @@ void IndexWriter::mergeTerms(bool hasProx, IndexVersion
indexVersion) {
}
for (int i = 0; i < numDestIndexes; ++i) {
+ if (dfs[i] == 0) {
+ if (infoStream != nullptr) {
+ std::string name =
lucene_wcstoutf8string(smallestTerm->text(), smallestTerm->textLength());
+ std::string field =
lucene_wcstoutf8string(smallestTerm->field(), wcslen(smallestTerm->field()));
+ std::stringstream ss;
+ ss << "term: " << name << ", field: " << field << ", doc
frequency is zero[" << dfs[i] << "], skip it." << "\n";
+ message(ss.str());
+ }
+ // if doc frequency is 0, it means the term is deleted. So we
should not write it.
+ continue;
+ }
DefaultSkipListWriter *skipListWriter = skipListWriterList[i];
CL_NS(store)::IndexOutput *freqOutput = freqOutputList[i];
CL_NS(store)::IndexOutput *proxOutput = proxOutputList[i];
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]