This is an automated email from the ASF dual-hosted git repository.
kxiao pushed a commit to branch branch-2.0
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-2.0 by this push:
new 5656addb6ac [Pick 2.0](inverted index) fix array inverted index
builder error (#29869) (#30643)
5656addb6ac is described below
commit 5656addb6ac6c835ef553ff9b2a76a9bc97eac4d
Author: airborne12 <[email protected]>
AuthorDate: Sat Feb 3 16:16:04 2024 +0800
[Pick 2.0](inverted index) fix array inverted index builder error (#29869)
(#30643)
---
be/src/olap/task/index_builder.cpp | 65 +++++++++++++++++++-------------------
1 file changed, 32 insertions(+), 33 deletions(-)
diff --git a/be/src/olap/task/index_builder.cpp
b/be/src/olap/task/index_builder.cpp
index cefeeda2ae2..ddfd027b34f 100644
--- a/be/src/olap/task/index_builder.cpp
+++ b/be/src/olap/task/index_builder.cpp
@@ -321,30 +321,26 @@ Status IndexBuilder::_add_nullable(const std::string&
column_name,
}
return step;
};
+ // TODO: need to process null data for inverted index
if (field->type() == FieldType::OLAP_FIELD_TYPE_ARRAY) {
DCHECK(field->get_sub_field_count() == 1);
- BitmapIterator null_iter(null_map, num_rows);
- bool is_null = false;
- size_t this_run = 0;
- while ((this_run = null_iter.Next(&is_null)) > 0) {
- if (is_null) {
-
RETURN_IF_ERROR(_inverted_index_builders[index_writer_sign]->add_nulls(this_run));
- } else {
- // [size, offset_ptr, item_data_ptr, item_nullmap_ptr]
- auto data_ptr = reinterpret_cast<const uint64_t*>(*ptr);
- // total number length
- size_t element_cnt = size_t((unsigned long)(*data_ptr));
- auto offset_data = *(data_ptr + 1);
- const uint8_t* offsets_ptr = (const uint8_t*)offset_data;
- if (element_cnt > 0) {
- auto data = *(data_ptr + 2);
- auto nested_null_map = *(data_ptr + 3);
-
RETURN_IF_ERROR(_inverted_index_builders[index_writer_sign]->add_array_values(
- field->get_sub_field(0)->size(),
reinterpret_cast<const void*>(data),
- reinterpret_cast<const uint8_t*>(nested_null_map),
offsets_ptr,
- num_rows));
- }
+ // [size, offset_ptr, item_data_ptr, item_nullmap_ptr]
+ auto data_ptr = reinterpret_cast<const uint64_t*>(*ptr);
+ // total number length
+ size_t element_cnt = size_t((unsigned long)(*data_ptr));
+ auto offset_data = *(data_ptr + 1);
+ const uint8_t* offsets_ptr = (const uint8_t*)offset_data;
+ try {
+ if (element_cnt > 0) {
+ auto data = *(data_ptr + 2);
+ auto nested_null_map = *(data_ptr + 3);
+
RETURN_IF_ERROR(_inverted_index_builders[index_writer_sign]->add_array_values(
+ field->get_sub_field(0)->size(),
reinterpret_cast<const void*>(data),
+ reinterpret_cast<const uint8_t*>(nested_null_map),
offsets_ptr, num_rows));
}
+ } catch (const std::exception& e) {
+ return Status::Error<ErrorCode::INVERTED_INDEX_CLUCENE_ERROR>(
+ "CLuceneError occured: {}", e.what());
}
return Status::OK();
}
@@ -355,15 +351,8 @@ Status IndexBuilder::_add_nullable(const std::string&
column_name,
if (null_map[offset]) {
RETURN_IF_ERROR(_inverted_index_builders[index_writer_sign]->add_nulls(step));
} else {
- if (field->type() == FieldType::OLAP_FIELD_TYPE_ARRAY) {
- DCHECK(field->get_sub_field_count() == 1);
- const auto* col_cursor = reinterpret_cast<const
CollectionValue*>(*ptr);
-
RETURN_IF_ERROR(_inverted_index_builders[index_writer_sign]->add_array_values(
- field->get_sub_field(0)->size(), col_cursor,
step));
- } else {
-
RETURN_IF_ERROR(_inverted_index_builders[index_writer_sign]->add_values(
- column_name, *ptr, step));
- }
+
RETURN_IF_ERROR(_inverted_index_builders[index_writer_sign]->add_values(
+ column_name, *ptr, step));
}
*ptr += field->size() * step;
offset += step;
@@ -382,9 +371,19 @@ Status IndexBuilder::_add_data(const std::string&
column_name,
try {
if (field->type() == FieldType::OLAP_FIELD_TYPE_ARRAY) {
DCHECK(field->get_sub_field_count() == 1);
- const auto* col_cursor = reinterpret_cast<const
CollectionValue*>(*ptr);
-
RETURN_IF_ERROR(_inverted_index_builders[index_writer_sign]->add_array_values(
- field->get_sub_field(0)->size(), col_cursor, num_rows));
+ // [size, offset_ptr, item_data_ptr, item_nullmap_ptr]
+ auto data_ptr = reinterpret_cast<const uint64_t*>(*ptr);
+ // total number length
+ size_t element_cnt = size_t((unsigned long)(*data_ptr));
+ auto offset_data = *(data_ptr + 1);
+ const uint8_t* offsets_ptr = (const uint8_t*)offset_data;
+ if (element_cnt > 0) {
+ auto data = *(data_ptr + 2);
+ auto nested_null_map = *(data_ptr + 3);
+
RETURN_IF_ERROR(_inverted_index_builders[index_writer_sign]->add_array_values(
+ field->get_sub_field(0)->size(),
reinterpret_cast<const void*>(data),
+ reinterpret_cast<const uint8_t*>(nested_null_map),
offsets_ptr, num_rows));
+ }
} else {
RETURN_IF_ERROR(_inverted_index_builders[index_writer_sign]->add_values(
column_name, *ptr, num_rows));
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]