This is an automated email from the ASF dual-hosted git repository.

kxiao pushed a commit to branch branch-2.0
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-2.0 by this push:
     new 5656addb6ac [Pick 2.0](inverted index) fix array inverted index 
builder error (#29869) (#30643)
5656addb6ac is described below

commit 5656addb6ac6c835ef553ff9b2a76a9bc97eac4d
Author: airborne12 <[email protected]>
AuthorDate: Sat Feb 3 16:16:04 2024 +0800

    [Pick 2.0](inverted index) fix array inverted index builder error (#29869) 
(#30643)
---
 be/src/olap/task/index_builder.cpp | 65 +++++++++++++++++++-------------------
 1 file changed, 32 insertions(+), 33 deletions(-)

diff --git a/be/src/olap/task/index_builder.cpp 
b/be/src/olap/task/index_builder.cpp
index cefeeda2ae2..ddfd027b34f 100644
--- a/be/src/olap/task/index_builder.cpp
+++ b/be/src/olap/task/index_builder.cpp
@@ -321,30 +321,26 @@ Status IndexBuilder::_add_nullable(const std::string& 
column_name,
         }
         return step;
     };
+    // TODO: need to process null data for inverted index
     if (field->type() == FieldType::OLAP_FIELD_TYPE_ARRAY) {
         DCHECK(field->get_sub_field_count() == 1);
-        BitmapIterator null_iter(null_map, num_rows);
-        bool is_null = false;
-        size_t this_run = 0;
-        while ((this_run = null_iter.Next(&is_null)) > 0) {
-            if (is_null) {
-                
RETURN_IF_ERROR(_inverted_index_builders[index_writer_sign]->add_nulls(this_run));
-            } else {
-                // [size, offset_ptr, item_data_ptr, item_nullmap_ptr]
-                auto data_ptr = reinterpret_cast<const uint64_t*>(*ptr);
-                // total number length
-                size_t element_cnt = size_t((unsigned long)(*data_ptr));
-                auto offset_data = *(data_ptr + 1);
-                const uint8_t* offsets_ptr = (const uint8_t*)offset_data;
-                if (element_cnt > 0) {
-                    auto data = *(data_ptr + 2);
-                    auto nested_null_map = *(data_ptr + 3);
-                    
RETURN_IF_ERROR(_inverted_index_builders[index_writer_sign]->add_array_values(
-                            field->get_sub_field(0)->size(), 
reinterpret_cast<const void*>(data),
-                            reinterpret_cast<const uint8_t*>(nested_null_map), 
offsets_ptr,
-                            num_rows));
-                }
+        // [size, offset_ptr, item_data_ptr, item_nullmap_ptr]
+        auto data_ptr = reinterpret_cast<const uint64_t*>(*ptr);
+        // total number length
+        size_t element_cnt = size_t((unsigned long)(*data_ptr));
+        auto offset_data = *(data_ptr + 1);
+        const uint8_t* offsets_ptr = (const uint8_t*)offset_data;
+        try {
+            if (element_cnt > 0) {
+                auto data = *(data_ptr + 2);
+                auto nested_null_map = *(data_ptr + 3);
+                
RETURN_IF_ERROR(_inverted_index_builders[index_writer_sign]->add_array_values(
+                        field->get_sub_field(0)->size(), 
reinterpret_cast<const void*>(data),
+                        reinterpret_cast<const uint8_t*>(nested_null_map), 
offsets_ptr, num_rows));
             }
+        } catch (const std::exception& e) {
+            return Status::Error<ErrorCode::INVERTED_INDEX_CLUCENE_ERROR>(
+                    "CLuceneError occured: {}", e.what());
         }
         return Status::OK();
     }
@@ -355,15 +351,8 @@ Status IndexBuilder::_add_nullable(const std::string& 
column_name,
             if (null_map[offset]) {
                 
RETURN_IF_ERROR(_inverted_index_builders[index_writer_sign]->add_nulls(step));
             } else {
-                if (field->type() == FieldType::OLAP_FIELD_TYPE_ARRAY) {
-                    DCHECK(field->get_sub_field_count() == 1);
-                    const auto* col_cursor = reinterpret_cast<const 
CollectionValue*>(*ptr);
-                    
RETURN_IF_ERROR(_inverted_index_builders[index_writer_sign]->add_array_values(
-                            field->get_sub_field(0)->size(), col_cursor, 
step));
-                } else {
-                    
RETURN_IF_ERROR(_inverted_index_builders[index_writer_sign]->add_values(
-                            column_name, *ptr, step));
-                }
+                
RETURN_IF_ERROR(_inverted_index_builders[index_writer_sign]->add_values(
+                        column_name, *ptr, step));
             }
             *ptr += field->size() * step;
             offset += step;
@@ -382,9 +371,19 @@ Status IndexBuilder::_add_data(const std::string& 
column_name,
     try {
         if (field->type() == FieldType::OLAP_FIELD_TYPE_ARRAY) {
             DCHECK(field->get_sub_field_count() == 1);
-            const auto* col_cursor = reinterpret_cast<const 
CollectionValue*>(*ptr);
-            
RETURN_IF_ERROR(_inverted_index_builders[index_writer_sign]->add_array_values(
-                    field->get_sub_field(0)->size(), col_cursor, num_rows));
+            // [size, offset_ptr, item_data_ptr, item_nullmap_ptr]
+            auto data_ptr = reinterpret_cast<const uint64_t*>(*ptr);
+            // total number length
+            size_t element_cnt = size_t((unsigned long)(*data_ptr));
+            auto offset_data = *(data_ptr + 1);
+            const uint8_t* offsets_ptr = (const uint8_t*)offset_data;
+            if (element_cnt > 0) {
+                auto data = *(data_ptr + 2);
+                auto nested_null_map = *(data_ptr + 3);
+                
RETURN_IF_ERROR(_inverted_index_builders[index_writer_sign]->add_array_values(
+                        field->get_sub_field(0)->size(), 
reinterpret_cast<const void*>(data),
+                        reinterpret_cast<const uint8_t*>(nested_null_map), 
offsets_ptr, num_rows));
+            }
         } else {
             
RETURN_IF_ERROR(_inverted_index_builders[index_writer_sign]->add_values(
                     column_name, *ptr, num_rows));


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to