This is an automated email from the ASF dual-hosted git repository.

kxiao pushed a commit to branch branch-2.1
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-2.1 by this push:
     new da0138a412a [Pick 2.1](segment iterator) fix shrink non-char column 
coredump #36275 (#36468)
da0138a412a is described below

commit da0138a412a4d91c9d485347022d5fe6a1f447ab
Author: airborne12 <[email protected]>
AuthorDate: Tue Jun 18 21:59:15 2024 +0800

    [Pick 2.1](segment iterator) fix shrink non-char column coredump #36275 
(#36468)
---
 be/src/olap/rowset/segment_v2/segment_iterator.cpp | 28 +++++-----
 be/src/olap/rowset/segment_v2/segment_iterator.h   |  2 +-
 .../data/inverted_index_p0/test_delete.out         |  4 ++
 .../suites/inverted_index_p0/test_delete.groovy    | 60 ++++++++++++++++++++++
 4 files changed, 81 insertions(+), 13 deletions(-)

diff --git a/be/src/olap/rowset/segment_v2/segment_iterator.cpp 
b/be/src/olap/rowset/segment_v2/segment_iterator.cpp
index 9702ef4a582..c3097440e08 100644
--- a/be/src/olap/rowset/segment_v2/segment_iterator.cpp
+++ b/be/src/olap/rowset/segment_v2/segment_iterator.cpp
@@ -321,10 +321,6 @@ Status SegmentIterator::_init_impl(const 
StorageReadOptions& opts) {
     }
 
     RETURN_IF_ERROR(init_iterators());
-    if (_char_type_idx.empty() && _char_type_idx_no_0.empty()) {
-        _is_char_type.resize(_schema->columns().size(), false);
-        _vec_init_char_column_id();
-    }
 
     if (opts.output_columns != nullptr) {
         _output_columns = *(opts.output_columns);
@@ -1735,19 +1731,23 @@ bool SegmentIterator::_has_char_type(const Field& 
column_desc) {
     }
 };
 
-void SegmentIterator::_vec_init_char_column_id() {
+void SegmentIterator::_vec_init_char_column_id(vectorized::Block* block) {
     for (size_t i = 0; i < _schema->num_column_ids(); i++) {
         auto cid = _schema->column_id(i);
         const Field* column_desc = _schema->column(cid);
 
-        if (_has_char_type(*column_desc)) {
-            _char_type_idx.emplace_back(i);
-            if (i != 0) {
-                _char_type_idx_no_0.emplace_back(i);
+        // The additional deleted filter condition will be in the materialized 
column at the end of the block.
+        // After _output_column_by_sel_idx, it will be erased, so we do not 
need to shrink it.
+        if (i < block->columns()) {
+            if (_has_char_type(*column_desc)) {
+                _char_type_idx.emplace_back(i);
+                if (i != 0) {
+                    _char_type_idx_no_0.emplace_back(i);
+                }
+            }
+            if (column_desc->type() == FieldType::OLAP_FIELD_TYPE_CHAR) {
+                _is_char_type[cid] = true;
             }
-        }
-        if (column_desc->type() == FieldType::OLAP_FIELD_TYPE_CHAR) {
-            _is_char_type[cid] = true;
         }
     }
 }
@@ -2180,6 +2180,10 @@ Status 
SegmentIterator::_next_batch_internal(vectorized::Block* block) {
         }
         _current_return_columns.resize(_schema->columns().size());
         _converted_column_ids.resize(_schema->columns().size(), 0);
+        if (_char_type_idx.empty() && _char_type_idx_no_0.empty()) {
+            _is_char_type.resize(_schema->columns().size(), false);
+            _vec_init_char_column_id(block);
+        }
         for (size_t i = 0; i < _schema->num_column_ids(); i++) {
             auto cid = _schema->column_id(i);
             auto column_desc = _schema->column(cid);
diff --git a/be/src/olap/rowset/segment_v2/segment_iterator.h 
b/be/src/olap/rowset/segment_v2/segment_iterator.h
index 50dafeac2f5..b03ed04603a 100644
--- a/be/src/olap/rowset/segment_v2/segment_iterator.h
+++ b/be/src/olap/rowset/segment_v2/segment_iterator.h
@@ -206,7 +206,7 @@ private:
     // TODO: Fix Me
     // CHAR type in storage layer padding the 0 in length. But query engine 
need ignore the padding 0.
     // so segment iterator need to shrink char column before output it. only 
use in vec query engine.
-    void _vec_init_char_column_id();
+    void _vec_init_char_column_id(vectorized::Block* block);
     bool _has_char_type(const Field& column_desc);
 
     uint32_t segment_id() const { return _segment->id(); }
diff --git a/regression-test/data/inverted_index_p0/test_delete.out 
b/regression-test/data/inverted_index_p0/test_delete.out
new file mode 100644
index 00000000000..4ee136aef2b
--- /dev/null
+++ b/regression-test/data/inverted_index_p0/test_delete.out
@@ -0,0 +1,4 @@
+-- This file is automatically generated. You should know what you did if you 
want to edit this
+-- !sql --
+2
+
diff --git a/regression-test/suites/inverted_index_p0/test_delete.groovy 
b/regression-test/suites/inverted_index_p0/test_delete.groovy
new file mode 100644
index 00000000000..3d6100ca4fe
--- /dev/null
+++ b/regression-test/suites/inverted_index_p0/test_delete.groovy
@@ -0,0 +1,60 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_delete"){
+    // prepare test table
+
+    def timeout = 60000
+    def delta_time = 1000
+    def alter_res = "null"
+    def useTime = 0
+
+    def indexTblName = "test_delete"
+
+    sql "DROP TABLE IF EXISTS ${indexTblName}"
+    // create 1 replica table
+    sql """
+       CREATE TABLE `${indexTblName}` (
+      `a` int NULL COMMENT '',
+      `b` varchar(60) NOT NULL COMMENT '',
+      `c` char(10) NULL COMMENT '',
+      INDEX index_b(b) USING INVERTED  COMMENT '',
+      INDEX index_c(c) USING INVERTED  COMMENT ''
+    ) ENGINE=OLAP
+    DUPLICATE KEY(`a`)
+    COMMENT 'OLAP'
+    DISTRIBUTED BY HASH(`a`) BUCKETS 1
+    PROPERTIES (
+    "replication_allocation" = "tag.location.default: 1",
+    "is_being_synced" = "false",
+    "storage_format" = "V2",
+    "light_schema_change" = "true",
+    "disable_auto_compaction" = "false",
+    "enable_single_replica_compaction" = "false"
+    );
+    """
+
+    sql """ INSERT INTO `${indexTblName}`(`a`, `b`, `c`) VALUES ('1', 
'6afef581285b6608bf80d5a4e46cf839', 'aaa'), ('2', 
'48a33ec3453a28bce84b8f96fe161956', 'bbb'),
+                                                                ('3', 
'021603e7dcfe65d44af0efd0e5aee154', 'ccc'), ('4', 
'ee27ee1da291e46403c408e220bed6e1', 'ddd'),
+                                                                ('5', 
'a648a447b8f71522f11632eba4b4adde', 'eee'), ('6', 
'a9fb5c985c90bf05f3bee5ca3ae95260', 'fff'),
+                                                                ('7', 
'0974e7a82e30d1af83205e474fadd0a2', 'ggg'); """
+
+
+    sql """ DELETE FROM ${indexTblName} WHERE c IN ('aaa','ccc'); """
+
+    qt_sql """ SELECT count(1) as cnt FROM ${indexTblName} WHERE a BETWEEN 1 
AND 6 AND b IN ('48a33ec3453a28bce84b8f96fe161956', 
'a9fb5c985c90bf05f3bee5ca3ae95260'); """
+}
\ No newline at end of file


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to