[doris] 20/47: [FIX](filter) update for filter_by_select logic (#25007)

kxiao Wed, 11 Oct 2023 09:55:24 -0700

This is an automated email from the ASF dual-hosted git repository.

kxiao pushed a commit to branch branch-2.0
in repository https://gitbox.apache.org/repos/asf/doris.git


commit e8e1e6178f6e6191876941d2137c393ed188333d
Author: amory <[email protected]>
AuthorDate: Mon Oct 9 21:27:40 2023 +0800

    [FIX](filter) update for filter_by_select logic  (#25007)
    
    this pr is aim to update for filter_by_select logic and change delete limit
    
    only support scala type in delete statement where condition
    only support column nullable and predict column support filter_by_select 
logic, because we can not push down non-scala type to storage layer to pack in 
predict column but do filter logic
---
 be/src/vec/columns/column.h                        |   6 +-
 be/src/vec/columns/column_array.cpp                |  32 -----
 be/src/vec/columns/column_array.h                  |   1 -
 be/src/vec/columns/column_map.cpp                  |  34 -----
 be/src/vec/columns/column_map.h                    |   1 -
 be/src/vec/columns/column_nullable.cpp             |  12 +-
 be/src/vec/columns/column_struct.cpp               |  10 --
 be/src/vec/columns/column_struct.h                 |   1 -
 be/src/vec/columns/column_vector.h                 |   6 -
 be/src/vec/exec/scan/vscan_node.cpp                |   6 +
 .../java/org/apache/doris/analysis/DeleteStmt.java |  10 +-
 .../data/delete_p0/test_array_column_delete.out    |   2 +
 .../data/delete_p0/test_map_column_delete.out      |   5 +-
 .../data/delete_p0/test_struct_column_delete.out   |   5 +-
 regression-test/data/query_p0/arr_max.orc          | Bin 0 -> 777600 bytes
 .../test_array_with_single_row_max_than_uint16.out |   8 ++
 .../delete_p0/test_array_column_delete.groovy      |   6 +-
 .../suites/delete_p0/test_map_column_delete.groovy |   6 +-
 .../delete_p0/test_struct_column_delete.groovy     |   6 +-
 .../index_change/test_index_change_7.groovy        | 141 ---------------------
 .../inverted_index_p0/test_array_index.groovy      |  73 -----------
 ...st_array_with_single_row_max_than_uint16.groovy |  62 +++++++++
 22 files changed, 125 insertions(+), 308 deletions(-)

diff --git a/be/src/vec/columns/column.h b/be/src/vec/columns/column.h
index 4bf4660927b..23469e7e694 100644
--- a/be/src/vec/columns/column.h
+++ b/be/src/vec/columns/column.h
@@ -426,9 +426,13 @@ public:
      *  convert(convert MutablePtr to ImmutablePtr or convert ImmutablePtr to 
MutablePtr)
      *  happends in filter_by_selector because of mem-reuse logic or 
ColumnNullable, I think this is meaningless;
      *  So using raw ptr directly here.
+     *  NOTICE: only column_nullable and predict_column, column_dictionary now 
support filter_by_selector
      */
     virtual Status filter_by_selector(const uint16_t* sel, size_t sel_size, 
IColumn* col_ptr) {
-        LOG(FATAL) << "column not support filter_by_selector";
+        LOG(FATAL) << get_name()
+                   << " do not support filter_by_selector, only 
column_nullable, column_dictionary "
+                      "and predict_column "
+                      "support";
         __builtin_unreachable();
     }
 
diff --git a/be/src/vec/columns/column_array.cpp 
b/be/src/vec/columns/column_array.cpp
index c528c54e847..06e3e35ce0b 100644
--- a/be/src/vec/columns/column_array.cpp
+++ b/be/src/vec/columns/column_array.cpp
@@ -808,38 +808,6 @@ void ColumnArray::insert_indices_from(const IColumn& src, 
const int* indices_beg
     }
 }
 
-Status ColumnArray::filter_by_selector(const uint16_t* sel, size_t sel_size, 
IColumn* col_ptr) {
-    auto to = reinterpret_cast<vectorized::ColumnArray*>(col_ptr);
-    auto& to_offsets = to->get_offsets();
-
-    size_t element_size = 0;
-    size_t max_offset = 0;
-    for (size_t i = 0; i < sel_size; ++i) {
-        element_size += size_at(sel[i]);
-        max_offset = std::max(max_offset, offset_at(sel[i]));
-    }
-    if (max_offset > std::numeric_limits<uint16_t>::max()) {
-        return Status::Corruption("array elements too large than 
uint16_t::max");
-    }
-
-    to_offsets.reserve(to_offsets.size() + sel_size);
-    auto nested_sel = std::make_unique<uint16_t[]>(element_size);
-    size_t nested_sel_size = 0;
-    for (size_t i = 0; i < sel_size; ++i) {
-        auto row_off = offset_at(sel[i]);
-        auto row_size = size_at(sel[i]);
-        to_offsets.push_back(to_offsets.back() + row_size);
-        for (auto j = 0; j < row_size; ++j) {
-            nested_sel[nested_sel_size++] = row_off + j;
-        }
-    }
-
-    if (nested_sel_size > 0) {
-        return data->filter_by_selector(nested_sel.get(), nested_sel_size, 
&to->get_data());
-    }
-    return Status::OK();
-}
-
 ColumnPtr ColumnArray::replicate(const IColumn::Offsets& replicate_offsets) 
const {
     if (replicate_offsets.empty()) return clone_empty();
 
diff --git a/be/src/vec/columns/column_array.h 
b/be/src/vec/columns/column_array.h
index bf89d74ba5d..6f317f5380a 100644
--- a/be/src/vec/columns/column_array.h
+++ b/be/src/vec/columns/column_array.h
@@ -252,7 +252,6 @@ public:
         offsets->clear();
     }
 
-    Status filter_by_selector(const uint16_t* sel, size_t sel_size, IColumn* 
col_ptr) override;
     size_t get_number_of_dimensions() const {
         const auto* nested_array = check_and_get_column<ColumnArray>(*data);
         if (!nested_array) {
diff --git a/be/src/vec/columns/column_map.cpp 
b/be/src/vec/columns/column_map.cpp
index 58a253c52ca..e134073f82e 100644
--- a/be/src/vec/columns/column_map.cpp
+++ b/be/src/vec/columns/column_map.cpp
@@ -404,40 +404,6 @@ size_t ColumnMap::filter(const Filter& filter) {
     return get_offsets().size();
 }
 
-Status ColumnMap::filter_by_selector(const uint16_t* sel, size_t sel_size, 
IColumn* col_ptr) {
-    auto to = reinterpret_cast<vectorized::ColumnMap*>(col_ptr);
-
-    auto& to_offsets = to->get_offsets();
-
-    size_t element_size = 0;
-    size_t max_offset = 0;
-    for (size_t i = 0; i < sel_size; ++i) {
-        element_size += size_at(sel[i]);
-        max_offset = std::max(max_offset, offset_at(sel[i]));
-    }
-    if (max_offset > std::numeric_limits<uint16_t>::max()) {
-        return Status::Corruption("map elements too large than uint16_t::max");
-    }
-
-    to_offsets.reserve(to_offsets.size() + sel_size);
-    auto nested_sel = std::make_unique<uint16_t[]>(element_size);
-    size_t nested_sel_size = 0;
-    for (size_t i = 0; i < sel_size; ++i) {
-        auto row_off = offset_at(sel[i]);
-        auto row_size = size_at(sel[i]);
-        to_offsets.push_back(to_offsets.back() + row_size);
-        for (auto j = 0; j < row_size; ++j) {
-            nested_sel[nested_sel_size++] = row_off + j;
-        }
-    }
-
-    if (nested_sel_size > 0) {
-        keys_column->filter_by_selector(nested_sel.get(), nested_sel_size, 
&to->get_keys());
-        values_column->filter_by_selector(nested_sel.get(), nested_sel_size, 
&to->get_values());
-    }
-    return Status::OK();
-}
-
 ColumnPtr ColumnMap::permute(const Permutation& perm, size_t limit) const {
     // Make a temp column array
     auto k_arr =
diff --git a/be/src/vec/columns/column_map.h b/be/src/vec/columns/column_map.h
index 5dc1c22aefc..c4edec97161 100644
--- a/be/src/vec/columns/column_map.h
+++ b/be/src/vec/columns/column_map.h
@@ -114,7 +114,6 @@ public:
 
     ColumnPtr filter(const Filter& filt, ssize_t result_size_hint) const 
override;
     size_t filter(const Filter& filter) override;
-    Status filter_by_selector(const uint16_t* sel, size_t sel_size, IColumn* 
col_ptr) override;
     ColumnPtr permute(const Permutation& perm, size_t limit) const override;
     ColumnPtr replicate(const Offsets& offsets) const override;
     void replicate(const uint32_t* indexs, size_t target_size, IColumn& 
column) const override;
diff --git a/be/src/vec/columns/column_nullable.cpp 
b/be/src/vec/columns/column_nullable.cpp
index 6ef951dcd42..342da451185 100644
--- a/be/src/vec/columns/column_nullable.cpp
+++ b/be/src/vec/columns/column_nullable.cpp
@@ -362,8 +362,16 @@ Status ColumnNullable::filter_by_selector(const uint16_t* 
sel, size_t sel_size,
     ColumnPtr null_map_ptr = nullable_col_ptr->null_map;
     RETURN_IF_ERROR(get_nested_column().filter_by_selector(
             sel, sel_size, 
const_cast<doris::vectorized::IColumn*>(nest_col_ptr.get())));
-    RETURN_IF_ERROR(get_null_map_column().filter_by_selector(
-            sel, sel_size, 
const_cast<doris::vectorized::IColumn*>(null_map_ptr.get())));
+    //insert cur nullmap into result nullmap which is empty
+    auto& res_nullmap = reinterpret_cast<vectorized::ColumnVector<UInt8>*>(
+                                
const_cast<doris::vectorized::IColumn*>(null_map_ptr.get()))
+                                ->get_data();
+    DCHECK(res_nullmap.empty());
+    res_nullmap.resize(sel_size);
+    auto& cur_nullmap = get_null_map_column().get_data();
+    for (size_t i = 0; i < sel_size; i++) {
+        res_nullmap[i] = cur_nullmap[sel[i]];
+    }
     return Status::OK();
 }
 
diff --git a/be/src/vec/columns/column_struct.cpp 
b/be/src/vec/columns/column_struct.cpp
index 78c438a5e1b..88391370398 100644
--- a/be/src/vec/columns/column_struct.cpp
+++ b/be/src/vec/columns/column_struct.cpp
@@ -262,16 +262,6 @@ size_t ColumnStruct::filter(const Filter& filter) {
     return result_size;
 }
 
-Status ColumnStruct::filter_by_selector(const uint16_t* sel, size_t sel_size, 
IColumn* col_ptr) {
-    auto to = reinterpret_cast<vectorized::ColumnStruct*>(col_ptr);
-    const size_t tuple_size = columns.size();
-    DCHECK_EQ(to->tuple_size(), tuple_size);
-    for (size_t i = 0; i < tuple_size; ++i) {
-        columns[i]->filter_by_selector(sel, sel_size, &to->get_column(i));
-    }
-    return Status::OK();
-}
-
 ColumnPtr ColumnStruct::permute(const Permutation& perm, size_t limit) const {
     const size_t tuple_size = columns.size();
     Columns new_columns(tuple_size);
diff --git a/be/src/vec/columns/column_struct.h 
b/be/src/vec/columns/column_struct.h
index 820c151a941..ba9c222b6a8 100644
--- a/be/src/vec/columns/column_struct.h
+++ b/be/src/vec/columns/column_struct.h
@@ -150,7 +150,6 @@ public:
     void insert_range_from(const IColumn& src, size_t start, size_t length) 
override;
     ColumnPtr filter(const Filter& filt, ssize_t result_size_hint) const 
override;
     size_t filter(const Filter& filter) override;
-    Status filter_by_selector(const uint16_t* sel, size_t sel_size, IColumn* 
col_ptr) override;
     ColumnPtr permute(const Permutation& perm, size_t limit) const override;
     ColumnPtr replicate(const Offsets& offsets) const override;
     void replicate(const uint32_t* counts, size_t target_size, IColumn& 
column) const override;
diff --git a/be/src/vec/columns/column_vector.h 
b/be/src/vec/columns/column_vector.h
index 04908d87116..fc4015e55b4 100644
--- a/be/src/vec/columns/column_vector.h
+++ b/be/src/vec/columns/column_vector.h
@@ -408,12 +408,6 @@ public:
     ColumnPtr filter(const IColumn::Filter& filt, ssize_t result_size_hint) 
const override;
     size_t filter(const IColumn::Filter& filter) override;
 
-    // note(wb) this method is only used in storage layer now
-    Status filter_by_selector(const uint16_t* sel, size_t sel_size, IColumn* 
col_ptr) override {
-        insert_res_column(sel, sel_size, 
reinterpret_cast<vectorized::ColumnVector<T>*>(col_ptr));
-        return Status::OK();
-    }
-
     ColumnPtr permute(const IColumn::Permutation& perm, size_t limit) const 
override;
 
     //    ColumnPtr index(const IColumn & indexes, size_t limit) const 
override;
diff --git a/be/src/vec/exec/scan/vscan_node.cpp 
b/be/src/vec/exec/scan/vscan_node.cpp
index 07c68d1b179..32bc5bbe88e 100644
--- a/be/src/vec/exec/scan/vscan_node.cpp
+++ b/be/src/vec/exec/scan/vscan_node.cpp
@@ -662,6 +662,12 @@ bool VScanNode::_is_predicate_acting_on_slot(
     if (_slot_id_to_value_range.end() == entry) {
         return false;
     }
+    // if the slot is a complex type(array/map/struct), we do not push down 
the predicate, because
+    // we delete pack these type into predict column, and origin pack action 
is wrong. we should
+    // make sense to push down this complex type after we delete predict 
column.
+    if (is_complex_type(remove_nullable(slot_ref->data_type()))) {
+        return false;
+    }
     *slot_desc = entry->second.first;
     DCHECK(child_contains_slot != nullptr);
     if (child_contains_slot->type().type != (*slot_desc)->type().type ||
diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/DeleteStmt.java 
b/fe/fe-core/src/main/java/org/apache/doris/analysis/DeleteStmt.java
index bbf09189c26..ff6be0f1d69 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/analysis/DeleteStmt.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/DeleteStmt.java
@@ -131,7 +131,7 @@ public class DeleteStmt extends DdlStmt {
             try {
                 analyzePredicate(wherePredicate, analyzer);
                 checkDeleteConditions();
-            } catch (Exception e) {
+            } catch (AnalysisException e) {
                 if (!(((OlapTable) targetTable).getKeysType() == 
KeysType.UNIQUE_KEYS)) {
                     throw new AnalysisException(e.getMessage(), e.getCause());
                 }
@@ -333,6 +333,14 @@ public class DeleteStmt extends DdlStmt {
             }
 
             Column column = nameToColumn.get(columnName);
+            // TODO(Now we can not push down non-scala type like 
array/map/struct to storage layer because of
+            //  predict_column in be not support non-scala type, so we just 
should ban this type in delete predict, when
+            //  we delete predict_column in be we should delete this ban)
+            if (!column.getType().isScalarType()) {
+                throw new AnalysisException(String.format("Can not apply 
delete condition to column type: %s ",
+                        column.getType()));
+
+            }
             // Due to rounding errors, most floating-point numbers end up 
being slightly imprecise,
             // it also means that numbers expected to be equal often differ 
slightly, so we do not allow compare with
             // floating-point numbers, floating-point number not allowed in 
where clause
diff --git a/regression-test/data/delete_p0/test_array_column_delete.out 
b/regression-test/data/delete_p0/test_array_column_delete.out
index 8324b608c95..84ff69b5b04 100644
--- a/regression-test/data/delete_p0/test_array_column_delete.out
+++ b/regression-test/data/delete_p0/test_array_column_delete.out
@@ -2,4 +2,6 @@
 -- !sql --
 2      [12, 3]
 3      []
+4      \N
+5      \N
 
diff --git a/regression-test/data/delete_p0/test_map_column_delete.out 
b/regression-test/data/delete_p0/test_map_column_delete.out
index 1d3928c4372..4973cd95200 100644
--- a/regression-test/data/delete_p0/test_map_column_delete.out
+++ b/regression-test/data/delete_p0/test_map_column_delete.out
@@ -1,4 +1,7 @@
 -- This file is automatically generated. You should know what you did if you 
want to edit this
 -- !sql --
-1      {1:"a", 2:"doris"}
 2      {}
+3      \N
+4      \N
+5      \N
+
diff --git a/regression-test/data/delete_p0/test_struct_column_delete.out 
b/regression-test/data/delete_p0/test_struct_column_delete.out
index 7fb3de0857a..8881e21102e 100644
--- a/regression-test/data/delete_p0/test_struct_column_delete.out
+++ b/regression-test/data/delete_p0/test_struct_column_delete.out
@@ -1,4 +1,7 @@
 -- This file is automatically generated. You should know what you did if you 
want to edit this
 -- !sql --
-1      {1, "a"}
+2      \N
+3      \N
+4      \N
+5      \N
 
diff --git a/regression-test/data/query_p0/arr_max.orc 
b/regression-test/data/query_p0/arr_max.orc
new file mode 100644
index 00000000000..aa3f223f6db
Binary files /dev/null and b/regression-test/data/query_p0/arr_max.orc differ
diff --git 
a/regression-test/data/query_p0/test_array_with_single_row_max_than_uint16.out 
b/regression-test/data/query_p0/test_array_with_single_row_max_than_uint16.out
new file mode 100644
index 00000000000..69d8ad12f6c
--- /dev/null
+++ 
b/regression-test/data/query_p0/test_array_with_single_row_max_than_uint16.out
@@ -0,0 +1,8 @@
+-- This file is automatically generated. You should know what you did if you 
want to edit this
+-- !sql --
+2
+
+-- !select --
+0      0       [35628.35424, 69196.54527, 620089.83930, 718055.42897, 
666144.56004, 581966.78124, 960555.59487, 962358.97280, 597982.61447, 
599915.30318, 189368.03938, 248323.33975, 487719.23824, 702336.66719, 
600343.11009, 314861.06115, 795215.86424, 978363.53641, 571101.30698, 
589576.62694, 53490.33403, 922022.51763, 74276.42954, 247725.12397, 
180779.69115, 767651.87400, 753392.11864, 110595.83618, 860460.12897, 
663661.05048, 179403.42942, 16163.98453, 515465.38864, 185150.72287, 
170502.12778, 78 [...]
+1      1       [922791.22464, 817263.50495, 299456.68735, 633370.12575, 
931884.61968, 692539.95931, 316451.16242, 102829.12044, 699700.97677, 
949370.34560, 834343.57470, 373731.91394, 179862.35906, 640657.76137, 
317546.75233, 235735.47890, 132475.88172, 982764.27206, 338973.08592, 
104625.94936, 786039.68905, 22590.93853, 535615.61487, 333129.30953, 
335180.67243, 795984.08118, 948803.99582, 680666.10378, 24498.46734, 
459431.97536, 183492.21048, 502627.67817, 757266.89062, 167055.23662, 
775036.26129, [...]
+
diff --git a/regression-test/suites/delete_p0/test_array_column_delete.groovy 
b/regression-test/suites/delete_p0/test_array_column_delete.groovy
index 0d75c257644..442b4f59957 100644
--- a/regression-test/suites/delete_p0/test_array_column_delete.groovy
+++ b/regression-test/suites/delete_p0/test_array_column_delete.groovy
@@ -21,6 +21,10 @@ suite("test_array_column_delete") {
     sql """ DROP TABLE IF EXISTS ${tableName}; """
     sql """ CREATE TABLE IF NOT EXISTS ${tableName} (id INT NULL, c_array 
ARRAY<INT> NULL) ENGINE=OLAP DUPLICATE KEY(id) DISTRIBUTED BY HASH(id) BUCKETS 
4 PROPERTIES ( "replication_allocation" = "tag.location.default: 1","in_memory" 
= "false","storage_format" = "V2") """
     sql """ insert into ${tableName} values(1, 
NULL),(2,[12,3]),(3,[]),(4,NULL),(5,NULL) """
-    sql """ DELETE FROM ${tableName} WHERE c_array is NULL """
+    test {
+        sql """ DELETE FROM ${tableName} WHERE c_array is NULL """
+        exception("java.sql.SQLException: errCode = 2, detailMessage = errCode 
= 2, detailMessage = Can not apply delete condition to column type: ARRAY<INT>")
+    }
+    sql """ DELETE FROM ${tableName} WHERE id = 1; """
     qt_sql """ SELECT * FROM ${tableName} order by id """
 }
diff --git a/regression-test/suites/delete_p0/test_map_column_delete.groovy 
b/regression-test/suites/delete_p0/test_map_column_delete.groovy
index cb3a36e60ef..e82c6885cd5 100644
--- a/regression-test/suites/delete_p0/test_map_column_delete.groovy
+++ b/regression-test/suites/delete_p0/test_map_column_delete.groovy
@@ -22,6 +22,10 @@ suite("test_map_column_delete") {
 
     sql """ CREATE TABLE IF NOT EXISTS ${tableName} (id INT NULL, m_map 
MAP<INT, VARCHAR(30)> NULL) ENGINE=OLAP DUPLICATE KEY(id) DISTRIBUTED BY 
HASH(id) BUCKETS 4 PROPERTIES ( "replication_allocation" = 
"tag.location.default: 1","in_memory" = "false","storage_format" = "V2") """
     sql """ insert into ${tableName} values(1, {1:'a', 
2:"doris"}),(2,{}),(3,NULL),(4,NULL),(5,NULL) """
-    sql """ DELETE FROM ${tableName} WHERE m_map is NULL """
+    test {
+        sql """ DELETE FROM ${tableName} WHERE m_map is NULL """
+        exception("java.sql.SQLException: errCode = 2, detailMessage = errCode 
= 2, detailMessage = Can not apply delete condition to column type: 
MAP<INT,VARCHAR(30)>")
+    }
+    sql """ DELETE FROM ${tableName} WHERE id = 1; """
     qt_sql """ SELECT * FROM ${tableName} order by id """
 }
diff --git a/regression-test/suites/delete_p0/test_struct_column_delete.groovy 
b/regression-test/suites/delete_p0/test_struct_column_delete.groovy
index baaf60161c3..26f24837a53 100644
--- a/regression-test/suites/delete_p0/test_struct_column_delete.groovy
+++ b/regression-test/suites/delete_p0/test_struct_column_delete.groovy
@@ -21,6 +21,10 @@ suite("test_struct_column_delete") {
     sql """ DROP TABLE IF EXISTS ${tableName}; """
     sql """ CREATE TABLE IF NOT EXISTS ${tableName} (id INT NULL, s_struct 
STRUCT<f1:INT, f2:VARCHAR(30)> NULL) ENGINE=OLAP DUPLICATE KEY(id) DISTRIBUTED 
BY HASH(id) BUCKETS 4 PROPERTIES ( "replication_allocation" = 
"tag.location.default: 1","in_memory" = "false","storage_format" = "V2") """
     sql """ insert into ${tableName} values(1, {1, 
'a'}),(2,NULL),(3,NULL),(4,NULL),(5,NULL) """
-    sql """ DELETE FROM ${tableName} WHERE s_struct is NULL """
+    test {
+        sql """ DELETE FROM ${tableName} WHERE s_struct is NULL """
+        exception("java.sql.SQLException: errCode = 2, detailMessage = errCode 
= 2, detailMessage = Can not apply delete condition to column type: 
STRUCT<f1:INT,f2:VARCHAR(30)>")
+    }
+    sql """ DELETE FROM ${tableName} WHERE id = 1; """
     qt_sql """ SELECT * FROM ${tableName} order by id """
 }
diff --git 
a/regression-test/suites/inverted_index_p0/index_change/test_index_change_7.groovy
 
b/regression-test/suites/inverted_index_p0/index_change/test_index_change_7.groovy
deleted file mode 100644
index 6c08392b4d6..00000000000
--- 
a/regression-test/suites/inverted_index_p0/index_change/test_index_change_7.groovy
+++ /dev/null
@@ -1,141 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-import org.codehaus.groovy.runtime.IOGroovyMethods
-
-suite("test_index_change_7") {
-    def timeout = 60000
-    def delta_time = 1000
-    def alter_res = "null"
-    def useTime = 0
-
-    def wait_for_latest_op_on_table_finish = { table_name, OpTimeout ->
-        for(int t = delta_time; t <= OpTimeout; t += delta_time){
-            alter_res = sql """SHOW ALTER TABLE COLUMN WHERE TableName = 
"${table_name}" ORDER BY CreateTime DESC LIMIT 1;"""
-            alter_res = alter_res.toString()
-            if(alter_res.contains("FINISHED")) {
-                sleep(3000) // wait change table state to normal
-                logger.info(table_name + " latest alter job finished, detail: 
" + alter_res)
-                break
-            }
-            useTime = t
-            sleep(delta_time)
-        }
-        assertTrue(useTime <= OpTimeout, "wait_for_latest_op_on_table_finish 
timeout")
-    }
-
-    def wait_for_build_index_on_partition_finish = { table_name, OpTimeout ->
-        for(int t = delta_time; t <= OpTimeout; t += delta_time){
-            alter_res = sql """SHOW BUILD INDEX WHERE TableName = 
"${table_name}";"""
-            def expected_finished_num = alter_res.size();
-            def finished_num = 0;
-            for (int i = 0; i < expected_finished_num; i++) {
-                logger.info(table_name + " build index job state: " + 
alter_res[i][7] + i)
-                if (alter_res[i][7] == "FINISHED") {
-                    ++finished_num;
-                }
-            }
-            if (finished_num == expected_finished_num) {
-                logger.info(table_name + " all build index jobs finished, 
detail: " + alter_res)
-                break
-            }
-            useTime = t
-            sleep(delta_time)
-        }
-        assertTrue(useTime <= OpTimeout, 
"wait_for_latest_build_index_on_partition_finish timeout")
-    }
-    
-    def tableName = "test_index_change_7"
-
-    sql """ DROP TABLE IF EXISTS ${tableName} """
-
-    sql """
-           CREATE TABLE IF NOT EXISTS ${tableName}(
-               `id`int(11)NULL,
-               `int_array` array<int(20)> NULL,
-               `c_array` array<varchar(20)> NULL,
-               INDEX c_array_idx(`c_array`) USING INVERTED 
PROPERTIES("parser"="english") COMMENT 'c_array index',
-               INDEX int_array_idx(`int_array`) USING INVERTED COMMENT 
'int_array index'
-           ) ENGINE=OLAP
-           DUPLICATE KEY(`id`)
-           COMMENT 'OLAP'
-           DISTRIBUTED BY HASH(`id`) BUCKETS 1
-           PROPERTIES(
-               "replication_allocation" = "tag.location.default: 1"
-           );
-        """
-
-    sql "INSERT INTO ${tableName} VALUES (1, [10,20,30], 
['i','love','china']), (2, [20,30,40], ['i','love','north korea']), (3, 
[30,40,50], NULL);"
-    sql "INSERT INTO ${tableName} VALUES (4, [40,50,60], NULL);"
-    
-    qt_select1 """ SELECT * FROM ${tableName} t ORDER BY id; """
-    qt_select2 """ SELECT * FROM ${tableName} t WHERE c_array MATCH 'china' 
ORDER BY id; """
-    qt_select3 """ SELECT * FROM ${tableName} t WHERE c_array MATCH 'love' 
ORDER BY id; """
-    qt_select4 """ SELECT * FROM ${tableName} t WHERE c_array MATCH 'north' 
ORDER BY id; """
-    qt_select5 """ SELECT * FROM ${tableName} t WHERE c_array MATCH 'korea' 
ORDER BY id; """
-    qt_select6 """ SELECT * FROM ${tableName} t WHERE int_array element_ge 40 
ORDER BY id; """
-    qt_select7 """ SELECT * FROM ${tableName} t WHERE int_array element_le 40 
ORDER BY id; """
-    qt_select8 """ SELECT * FROM ${tableName} t WHERE int_array element_gt 40 
ORDER BY id; """
-    qt_select9 """ SELECT * FROM ${tableName} t WHERE int_array element_lt 40 
ORDER BY id; """
-    qt_select10 """ SELECT * FROM ${tableName} t WHERE int_array element_eq 10 
ORDER BY id; """
-    qt_select11 """ SELECT * FROM ${tableName} t WHERE int_array element_eq 20 
ORDER BY id; """
-    qt_select12 """ SELECT * FROM ${tableName} t WHERE int_array element_eq 30 
ORDER BY id; """
-    qt_select13 """ SELECT * FROM ${tableName} t WHERE int_array element_eq 40 
ORDER BY id; """
-    qt_select14 """ SELECT * FROM ${tableName} t WHERE int_array element_eq 50 
ORDER BY id; """
-    qt_select15 """ SELECT * FROM ${tableName} t WHERE int_array element_eq 60 
ORDER BY id; """
-
-    // drop inverted index c_array_idx, int_array_idx
-    sql """ DROP INDEX c_array_idx ON ${tableName} """
-    sql """ DROP INDEX int_array_idx ON ${tableName} """
-
-    // create inverted index
-    sql """ CREATE INDEX c_array_idx ON ${tableName}(`c_array`) USING INVERTED 
PROPERTIES("parser"="english") """
-    sql """ CREATE INDEX int_array_idx ON ${tableName}(`int_array`) USING 
INVERTED """
-
-    wait_for_latest_op_on_table_finish(tableName, timeout)
-
-    // build inverted index
-    sql """ BUILD INDEX c_array_idx ON ${tableName} """
-    sql """ BUILD INDEX int_array_idx ON ${tableName} """
-    wait_for_build_index_on_partition_finish(tableName, timeout)
-
-    def show_result = sql "show index from ${tableName}"
-    logger.info("show index from " + tableName + " result: " + show_result)
-    assertEquals(show_result.size(), 2)
-    assertEquals(show_result[0][2], "c_array_idx")
-    assertEquals(show_result[1][2], "int_array_idx")
-
-    qt_select16 """ SELECT * FROM ${tableName} t WHERE c_array MATCH 'china' 
ORDER BY id; """
-    qt_select17 """ SELECT * FROM ${tableName} t WHERE c_array MATCH 'love' 
ORDER BY id; """
-    qt_select18 """ SELECT * FROM ${tableName} t WHERE c_array MATCH 'north' 
ORDER BY id; """
-    qt_select19 """ SELECT * FROM ${tableName} t WHERE c_array MATCH 'korea' 
ORDER BY id; """
-
-    try {
-        qt_select20 """ SELECT * FROM ${tableName} t WHERE int_array 
element_ge 40 ORDER BY id; """
-        qt_select21 """ SELECT * FROM ${tableName} t WHERE int_array 
element_le 40 ORDER BY id; """
-        qt_select22 """ SELECT * FROM ${tableName} t WHERE int_array 
element_gt 40 ORDER BY id; """
-        qt_select23 """ SELECT * FROM ${tableName} t WHERE int_array 
element_lt 40 ORDER BY id; """
-        qt_select24 """ SELECT * FROM ${tableName} t WHERE int_array 
element_eq 10 ORDER BY id; """
-        qt_select25 """ SELECT * FROM ${tableName} t WHERE int_array 
element_eq 20 ORDER BY id; """
-        qt_select26 """ SELECT * FROM ${tableName} t WHERE int_array 
element_eq 30 ORDER BY id; """
-        qt_select27 """ SELECT * FROM ${tableName} t WHERE int_array 
element_eq 40 ORDER BY id; """
-        qt_select28 """ SELECT * FROM ${tableName} t WHERE int_array 
element_eq 50 ORDER BY id; """
-        qt_select29 """ SELECT * FROM ${tableName} t WHERE int_array 
element_eq 60 ORDER BY id; """
-    } catch(Exception ex) {
-        logger.info("execute array element query failed when build index not 
finished, result: " + ex)
-    }
-}
diff --git a/regression-test/suites/inverted_index_p0/test_array_index.groovy 
b/regression-test/suites/inverted_index_p0/test_array_index.groovy
deleted file mode 100644
index 7fd95f5ad12..00000000000
--- a/regression-test/suites/inverted_index_p0/test_array_index.groovy
+++ /dev/null
@@ -1,73 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-
-suite("test_array_index"){
-    // prepare test table
-
-
-    def timeout = 60000
-    def delta_time = 1000
-    def alter_res = "null"
-    def useTime = 0
-
-    def indexTblName = "array_test"
-
-    sql "DROP TABLE IF EXISTS ${indexTblName}"
-    // create 1 replica table
-    sql """
-       CREATE TABLE IF NOT EXISTS ${indexTblName}(
-               `id`int(11)NULL,
-               `int_array` array<int(20)> NULL,
-               `c_array` array<varchar(20)> NULL,
-               INDEX c_array_idx(`c_array`) USING INVERTED 
PROPERTIES("parser"="english") COMMENT 'c_array index',
-               INDEX int_array_idx(`int_array`) USING INVERTED COMMENT 
'int_array index'
-       ) ENGINE=OLAP
-       DUPLICATE KEY(`id`)
-       COMMENT 'OLAP'
-       DISTRIBUTED BY HASH(`id`) BUCKETS 1
-       PROPERTIES(
-               "replication_allocation" = "tag.location.default: 1"
-       );
-    """
-    
-    def var_result = sql "show variables"
-    logger.info("show variales result: " + var_result )
-
-    sql "INSERT INTO $indexTblName VALUES (1, [10,20,30], 
['i','love','china']), (2, [20,30,40], ['i','love','north korea']), (3, 
[30,40,50], NULL);"
-    sql "INSERT INTO $indexTblName VALUES (4, [40,50,60], NULL);"
-    qt_sql "SELECT * FROM $indexTblName WHERE c_array MATCH 'china' ORDER BY 
id;"
-    qt_sql "SELECT * FROM $indexTblName WHERE c_array MATCH 'love' ORDER BY 
id;"
-    qt_sql "SELECT * FROM $indexTblName WHERE c_array MATCH 'north' ORDER BY 
id;"
-    qt_sql "SELECT * FROM $indexTblName WHERE c_array MATCH 'korea' ORDER BY 
id;"
-    qt_sql "SELECT * FROM $indexTblName WHERE int_array element_ge 40 ORDER BY 
id;"
-    qt_sql "SELECT * FROM $indexTblName WHERE int_array element_le 40 ORDER BY 
id;"
-    qt_sql "SELECT * FROM $indexTblName WHERE int_array element_gt 40 ORDER BY 
id;"
-    qt_sql "SELECT * FROM $indexTblName WHERE int_array element_lt 40 ORDER BY 
id;"
-    qt_sql "SELECT * FROM $indexTblName WHERE int_array element_eq 10 ORDER BY 
id;"
-    qt_sql "SELECT * FROM $indexTblName WHERE int_array element_eq 20 ORDER BY 
id;"
-    qt_sql "SELECT * FROM $indexTblName WHERE int_array element_eq 30 ORDER BY 
id;"
-    qt_sql "SELECT * FROM $indexTblName WHERE int_array element_eq 40 ORDER BY 
id;"
-    qt_sql "SELECT * FROM $indexTblName WHERE int_array element_eq 50 ORDER BY 
id;"
-    qt_sql "SELECT * FROM $indexTblName WHERE int_array element_eq 60 ORDER BY 
id;"
-
-    sql " ALTER TABLE $indexTblName drop index c_array_idx; "
-    qt_sql "SELECT * FROM $indexTblName WHERE c_array MATCH 'china' ORDER BY 
id;"
-    qt_sql "SELECT * FROM $indexTblName WHERE c_array MATCH 'love' ORDER BY 
id;"
-    qt_sql "SELECT * FROM $indexTblName WHERE c_array MATCH 'north' ORDER BY 
id;"
-    qt_sql "SELECT * FROM $indexTblName WHERE c_array MATCH 'korea' ORDER BY 
id;"
-}
diff --git 
a/regression-test/suites/query_p0/test_array_with_single_row_max_than_uint16.groovy
 
b/regression-test/suites/query_p0/test_array_with_single_row_max_than_uint16.groovy
new file mode 100644
index 00000000000..ab093df4438
--- /dev/null
+++ 
b/regression-test/suites/query_p0/test_array_with_single_row_max_than_uint16.groovy
@@ -0,0 +1,62 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_array_with_single_row_max_than_uint16", "query") {
+    // define a sql table
+    sql """set enable_nereids_planner=false"""
+    List<List<Object>> backends =  sql """ select * from backends(); """
+    assertTrue(backends.size() > 0)
+    def be_id = backends[0][0]
+    def dataFilePath = context.config.dataPath + "/query_p0/"
+    def testTable = "arr_dd"
+    sql "DROP TABLE IF EXISTS ${testTable}"
+    def outFilePath= dataFilePath
+    if (backends.size() > 1) {
+        // cluster mode need to make sure all be has this data
+        outFilePath="/"
+        def transFile01 = "${dataFilePath}/arr_max.orc"
+        for (List<Object> backend : backends) {
+            def be_host = backend[1]
+            scpFiles("root", be_host, transFile01, outFilePath, false)
+        }
+    }
+
+    sql """
+            CREATE TABLE IF NOT EXISTS ${testTable} (
+              `k1` INT(11) NULL,
+              `k2` int(11) NULL,
+              `c_decimal` array<decimalv3(18, 5)> NULL DEFAULT "[]"
+            ) ENGINE=OLAP
+            DUPLICATE KEY(`k1`)
+            COMMENT 'OLAP'
+            DISTRIBUTED BY HASH(`k1`) BUCKETS 1
+            PROPERTIES (
+            "replication_allocation" = "tag.location.default: 1",
+            "in_memory" = "false",
+            "storage_format" = "V2",
+            "disable_auto_compaction" = "false"
+            )
+            """
+    // prepare data
+    qt_sql """
+            insert into ${testTable} select * from local(
+                "file_path" = "${outFilePath}/arr_max.orc",
+                "backend_id" = "${be_id}",
+                "format" = "orc");"""
+
+    qt_select """ select * from ${testTable} order by k1"""
+}


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

[doris] 20/47: [FIX](filter) update for filter_by_select logic (#25007)

Reply via email to