This is an automated email from the ASF dual-hosted git repository.
airborne pushed a commit to branch clucene-2.0
in repository https://gitbox.apache.org/repos/asf/doris-thirdparty.git
The following commit(s) were added to refs/heads/clucene-2.0 by this push:
new 3526de75334 [fix](index compaction)Skip writing terms with a doc
frequency of 0 (#248) (#249)
3526de75334 is described below
commit 3526de75334f64aea3f299d395c8a460a9054e37
Author: qiye <[email protected]>
AuthorDate: Fri Nov 1 09:36:17 2024 +0800
[fix](index compaction)Skip writing terms with a doc frequency of 0 (#248)
(#249)
---
src/core/CLucene/index/IndexWriter.cpp | 11 +++++++++++
1 file changed, 11 insertions(+)
diff --git a/src/core/CLucene/index/IndexWriter.cpp
b/src/core/CLucene/index/IndexWriter.cpp
index 10dfd68c60d..dbca5655f55 100644
--- a/src/core/CLucene/index/IndexWriter.cpp
+++ b/src/core/CLucene/index/IndexWriter.cpp
@@ -1815,6 +1815,17 @@ void IndexWriter::mergeTerms(bool hasProx) {
}
for (int i = 0; i < numDestIndexes; ++i) {
+ if (dfs[i] == 0) {
+ if (infoStream != nullptr) {
+ std::string name =
lucene_wcstoutf8string(smallestTerm->text(), smallestTerm->textLength());
+ std::string field =
lucene_wcstoutf8string(smallestTerm->field(), wcslen(smallestTerm->field()));
+ std::stringstream ss;
+ ss << "term: " << name << ", field: " << field << ", doc
frequency is zero[" << dfs[i] << "], skip it." << "\n";
+ message(ss.str());
+ }
+ // if doc frequency is 0, it means the term is deleted. So we
should not write it.
+ continue;
+ }
DefaultSkipListWriter *skipListWriter = skipListWriterList[i];
CL_NS(store)::IndexOutput *freqOutput = freqOutputList[i];
CL_NS(store)::IndexOutput *proxOutput = proxOutputList[i];
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]