This is an automated email from the ASF dual-hosted git repository.
jianliangqi pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new 32964879f74 [opt](inverted index) Add null document interface to
optimize empty string indexing (#28661)
32964879f74 is described below
commit 32964879f74d1b3839e79c9a627fef18248d64a1
Author: qiye <[email protected]>
AuthorDate: Wed Dec 20 12:11:02 2023 +0800
[opt](inverted index) Add null document interface to optimize empty string
indexing (#28661)
---
be/src/clucene | 2 +-
.../olap/rowset/segment_v2/inverted_index_writer.cpp | 19 ++++++++++++++++---
2 files changed, 17 insertions(+), 4 deletions(-)
diff --git a/be/src/clucene b/be/src/clucene
index d6adff12de6..4bd7d450173 160000
--- a/be/src/clucene
+++ b/be/src/clucene
@@ -1 +1 @@
-Subproject commit d6adff12de6a687666edbc77ad8da6ec1ab32b88
+Subproject commit 4bd7d4501739c798c98b30d6350b243942d5f9bc
diff --git a/be/src/olap/rowset/segment_v2/inverted_index_writer.cpp
b/be/src/olap/rowset/segment_v2/inverted_index_writer.cpp
index d397910891f..718d46f5e2b 100644
--- a/be/src/olap/rowset/segment_v2/inverted_index_writer.cpp
+++ b/be/src/olap/rowset/segment_v2/inverted_index_writer.cpp
@@ -230,6 +230,17 @@ public:
return Status::OK();
}
+ Status add_null_document() {
+ try {
+ _index_writer->addNullDocument(_doc.get());
+ } catch (const CLuceneError& e) {
+ _dir->deleteDirectory();
+ return Status::Error<ErrorCode::INVERTED_INDEX_CLUCENE_ERROR>(
+ "CLuceneError add_null_document: {}", e.what());
+ }
+ return Status::OK();
+ }
+
Status add_nulls(uint32_t count) override {
_null_bitmap.addRange(_rid, _rid + count);
_rid += count;
@@ -242,7 +253,7 @@ public:
for (int i = 0; i < count; ++i) {
new_fulltext_field(empty_value.c_str(), 0);
- RETURN_IF_ERROR(add_document());
+ RETURN_IF_ERROR(add_null_document());
}
}
return Status::OK();
@@ -292,10 +303,11 @@ public:
VLOG_DEBUG << "fulltext index value length can be at most
256, but got "
<< "value length:" << v->get_size() << ",
ignore this value";
new_fulltext_field(empty_value.c_str(), 0);
+ RETURN_IF_ERROR(add_null_document());
} else {
new_fulltext_field(v->get_data(), v->get_size());
+ RETURN_IF_ERROR(add_document());
}
- RETURN_IF_ERROR(add_document());
++v;
_rid++;
}
@@ -341,11 +353,12 @@ public:
VLOG_DEBUG << "fulltext index value length can be at most
256, but got "
<< "value length:" << value.length() << ",
ignore this value";
new_fulltext_field(empty_value.c_str(), 0);
+ RETURN_IF_ERROR(add_null_document());
} else {
new_fulltext_field(value.c_str(), value.length());
+ RETURN_IF_ERROR(add_document());
}
_rid++;
- RETURN_IF_ERROR(add_document());
}
} else if constexpr (field_is_numeric_type(field_type)) {
for (int i = 0; i < count; ++i) {
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]