This is an automated email from the ASF dual-hosted git repository.

yiguolei pushed a commit to branch branch-1.2-lts
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-1.2-lts by this push:
     new 71f3c92400 [Bug](ColumnArray) Fix array column replicate 
`replicate_offsets` not matched (#17641)
71f3c92400 is described below

commit 71f3c92400abab87331a48f3f1c79382969c36cc
Author: lihangyu <[email protected]>
AuthorDate: Fri Mar 10 19:56:44 2023 +0800

    [Bug](ColumnArray) Fix array column replicate `replicate_offsets` not 
matched (#17641)
    
    the input replicate_offsets should be the same size as ColumnArray's offset.
    ```
    IColumn::Offsets replicate_offsets(get_offsets().size(), 0);
    // 
|---------------------|-------------------------|-------------------------|
    // [0, begin)             [begin, begin + count_sz)  [begin + count_sz, 
size())
    //  do not need to copy    copy counts[n] times       do not need to copy
    ```
    
    we should
---
 be/src/vec/columns/column_array.cpp                | 18 +++++++--
 .../data/load/insert/test_insert_nested_array.out  | 26 ++++++++++++
 .../set_operations/sql/array_nested_with_join.out  | 25 ++++++++++++
 .../load/insert/test_insert_nested_array.groovy    |  1 +
 .../suites/query_p0/set_operations/load.groovy     | 46 +++++++++++++++++++++-
 .../set_operations/sql/array_nested_with_join.sql  |  7 ++++
 6 files changed, 117 insertions(+), 6 deletions(-)

diff --git a/be/src/vec/columns/column_array.cpp 
b/be/src/vec/columns/column_array.cpp
index 4afa62e36d..4ae7ada9f7 100644
--- a/be/src/vec/columns/column_array.cpp
+++ b/be/src/vec/columns/column_array.cpp
@@ -566,14 +566,22 @@ void ColumnArray::replicate(const uint32_t* counts, 
size_t target_size, IColumn&
     if (col_size == 0) {
         return;
     }
-
-    IColumn::Offsets replicate_offsets(col_size);
+    // 
|---------------------|-------------------------|-------------------------|
+    // [0, begin)             [begin, begin + count_sz)  [begin + count_sz, 
size())
+    //  do not need to copy    copy counts[n] times       do not need to copy
+    IColumn::Offsets replicate_offsets(get_offsets().size(), 0);
     size_t cur_offset = 0;
     size_t end = begin + col_size;
+    // copy original data at offset n counts[n] times
     for (size_t i = begin; i < end; ++i) {
         cur_offset += counts[i];
-        replicate_offsets[i - begin] = cur_offset;
+        replicate_offsets[i] = cur_offset;
+    }
+    // ignored
+    for (size_t i = end; i < size(); ++i) {
+        replicate_offsets[i] = replicate_offsets[i - 1];
     }
+
     if (cur_offset != target_size) {
         LOG(WARNING) << "ColumnArray replicate input target_size:" << 
target_size
                      << " not equal SUM(counts):" << cur_offset;
@@ -765,7 +773,9 @@ ColumnPtr ColumnArray::replicate_generic(const 
IColumn::Offsets& replicate_offse
         size_t size_to_replicate = replicate_offsets[i] - prev_offset;
         prev_offset = replicate_offsets[i];
 
-        for (size_t j = 0; j < size_to_replicate; ++j) 
res_concrete.insert_from(*this, i);
+        for (size_t j = 0; j < size_to_replicate; ++j) {
+            res_concrete.insert_from(*this, i);
+        }
     }
 
     return res;
diff --git a/regression-test/data/load/insert/test_insert_nested_array.out 
b/regression-test/data/load/insert/test_insert_nested_array.out
index 9dc4355cfb..1278109ac1 100644
--- a/regression-test/data/load/insert/test_insert_nested_array.out
+++ b/regression-test/data/load/insert/test_insert_nested_array.out
@@ -55,3 +55,29 @@
 6      [[[NULL]], [[1], [2, 3]], [[4, 5, 6], NULL, NULL]]
 6      [[[NULL]], [[1], [2, 3]], [[4, 5, 6], NULL, NULL]]
 
+-- !select --
+1      []      1       []
+1      []      1       []
+1      []      1       []
+1      []      1       []
+2      [NULL]  2       [NULL]
+2      [NULL]  2       [NULL]
+2      [NULL]  2       [NULL]
+2      [NULL]  2       [NULL]
+3      [[]]    3       [[]]
+3      [[]]    3       [[]]
+3      [[]]    3       [[]]
+3      [[]]    3       [[]]
+4      [[NULL]]        4       [[NULL]]
+4      [[NULL]]        4       [[NULL]]
+4      [[NULL]]        4       [[NULL]]
+4      [[NULL]]        4       [[NULL]]
+5      [[[]]]  5       [[[]]]
+5      [[[]]]  5       [[[]]]
+5      [[[]]]  5       [[[]]]
+5      [[[]]]  5       [[[]]]
+6      [[[NULL]], [[1], [2, 3]], [[4, 5, 6], NULL, NULL]]      6       
[[[NULL]], [[1], [2, 3]], [[4, 5, 6], NULL, NULL]]
+6      [[[NULL]], [[1], [2, 3]], [[4, 5, 6], NULL, NULL]]      6       
[[[NULL]], [[1], [2, 3]], [[4, 5, 6], NULL, NULL]]
+6      [[[NULL]], [[1], [2, 3]], [[4, 5, 6], NULL, NULL]]      6       
[[[NULL]], [[1], [2, 3]], [[4, 5, 6], NULL, NULL]]
+6      [[[NULL]], [[1], [2, 3]], [[4, 5, 6], NULL, NULL]]      6       
[[[NULL]], [[1], [2, 3]], [[4, 5, 6], NULL, NULL]]
+
diff --git 
a/regression-test/data/query_p0/set_operations/sql/array_nested_with_join.out 
b/regression-test/data/query_p0/set_operations/sql/array_nested_with_join.out
new file mode 100644
index 0000000000..220c1f7bdb
--- /dev/null
+++ 
b/regression-test/data/query_p0/set_operations/sql/array_nested_with_join.out
@@ -0,0 +1,25 @@
+-- This file is automatically generated. You should know what you did if you 
want to edit this
+-- !array_nested_with_join --
+1      F       AIR     1992-01-16      AIR     []
+1      F       AIR     1992-01-16      AIR     []
+1      F       AIR     1992-01-16      AIR     []
+1      F       AIR     1992-01-19      AIR     []
+1      F       AIR     1992-01-19      AIR     []
+1      F       AIR     1992-01-19      AIR     []
+1      F       AIR     1992-01-23      AIR     []
+1      F       AIR     1992-01-23      AIR     []
+1      F       AIR     1992-01-23      AIR     []
+1      F       AIR     1992-01-26      AIR     []
+
+-- !array_nested_with_join_2 --
+1      F       AIR     1992-01-16      AIR     []
+1      F       AIR     1992-01-16      AIR     []
+1      F       AIR     1992-01-16      AIR     []
+1      F       AIR     1992-01-19      AIR     []
+1      F       AIR     1992-01-19      AIR     []
+1      F       AIR     1992-01-19      AIR     []
+1      F       AIR     1992-01-23      AIR     []
+1      F       AIR     1992-01-23      AIR     []
+1      F       AIR     1992-01-23      AIR     []
+1      F       AIR     1992-01-26      AIR     []
+
diff --git a/regression-test/suites/load/insert/test_insert_nested_array.groovy 
b/regression-test/suites/load/insert/test_insert_nested_array.groovy
index bbfb615d61..5e1cf93272 100644
--- a/regression-test/suites/load/insert/test_insert_nested_array.groovy
+++ b/regression-test/suites/load/insert/test_insert_nested_array.groovy
@@ -88,6 +88,7 @@ suite("test_insert_nested_array", "load") {
                 (6, [[[null]], [[1], [2, 3]], [[4, 5, 6], null, null]])
         """
         qt_select "select * from ${tableName} order by `key`"
+        qt_select "select * from ${tableName} as t1 right join ${tableName} as 
t2 on t1.`key` = t2.`key` order by t1.`key`"
     }
 
     test_nested_array_2_depths.call(false)
diff --git a/regression-test/suites/query_p0/set_operations/load.groovy 
b/regression-test/suites/query_p0/set_operations/load.groovy
index 0086ed87a5..b374ab1e9c 100644
--- a/regression-test/suites/query_p0/set_operations/load.groovy
+++ b/regression-test/suites/query_p0/set_operations/load.groovy
@@ -28,7 +28,7 @@ suite("load") {
     for (String table in tables) {
         sql new File("""${context.file.parent}/ddl/${table}.sql""").text
     }
-
+    
     for (String tableName in tables) {
         streamLoad {
             // you can skip declare db, because a default db already specify 
in ${DORIS_HOME}/conf/regression-conf.groovy
@@ -59,4 +59,46 @@ suite("load") {
             }
         }
     }
-}
\ No newline at end of file
+
+    // nested array with join
+    def test_nested_array_2_depths = {
+        def tableName = "nested_array_test_2_vectorized"
+
+        sql "DROP TABLE IF EXISTS ${tableName}"
+        sql """
+            CREATE TABLE IF NOT EXISTS ${tableName} (
+                `key` INT,
+                value ARRAY<ARRAY<INT>>
+            ) DUPLICATE KEY (`key`) DISTRIBUTED BY HASH (`key`) BUCKETS 1
+            PROPERTIES ('replication_num' = '1')
+        """
+
+        sql "INSERT INTO ${tableName} VALUES (1, [])"
+        sql "INSERT INTO ${tableName} VALUES (2, [null])"
+        sql "INSERT INTO ${tableName} VALUES (3, [[]])"
+        sql "INSERT INTO ${tableName} VALUES (4, [[1, 2, 3], [4, 5, 6]])"
+        sql "INSERT INTO ${tableName} VALUES (5, [[1, 2, 3], null, [4, 5, 6]])"
+        sql "INSERT INTO ${tableName} VALUES (6, [[1, 2, null], null, [4, 
null, 6], null, [null, 8, 9]])"
+       
+        sql """
+            INSERT INTO ${tableName} VALUES
+                (1, []),
+                (2, [null]),
+                (3, [[]]),
+                (4, [[1, 2, 3], [4, 5, 6]]),
+                (5, [[1, 2, 3], null, [4, 5, 6]]),
+                (6, [[1, 2, null], null, [4, null, 6], null, [null, 8, 9]])
+        """ 
+
+         sql """
+            INSERT INTO ${tableName} VALUES
+                (1, []),
+                (2, [null]),
+                (3, [[]]),
+                (4, [[1, 2, 3], [4, 5, 6]]),
+                (5, [[1, 2, 3], null, [4, 5, 6]]),
+                (6, [[1, 2, null], null, [4, null, 6], null, [null, 8, 9]])
+        """ 
+    }
+    test_nested_array_2_depths.call()
+    }
diff --git 
a/regression-test/suites/query_p0/set_operations/sql/array_nested_with_join.sql 
b/regression-test/suites/query_p0/set_operations/sql/array_nested_with_join.sql
new file mode 100644
index 0000000000..d552ca552c
--- /dev/null
+++ 
b/regression-test/suites/query_p0/set_operations/sql/array_nested_with_join.sql
@@ -0,0 +1,7 @@
+select /*+ SET_VAR(query_timeout = 600) */ ref_52.`key` as k, 
ref_54.`linestatus` as c0,  ref_54.`shipmode` as c1,    ref_54.`shipdate` as 
c2,    ref_54.`shipmode` as c3, ref_52.`value`  as c4 
+from    nested_array_test_2_vectorized as ref_52  right join 
tpch_tiny_lineitem as ref_54  on (ref_52.`key` = ref_54.`linenumber` )
+where ref_52.`value` is not NULL order by 1, 2, 3, 4, 5 limit 10;
+
+select /*+ SET_VAR(query_timeout = 600) */ ref_52.`key`  as k, 
ref_54.`linestatus` as c0,  ref_54.`shipmode` as c1,    ref_54.`shipdate` as 
c2,    ref_54.`shipmode` as c3, ref_52.`value`  as c4 
+from    nested_array_test_2_vectorized as ref_52  right join 
tpch_tiny_lineitem as ref_54  on (ref_52.`key` = ref_54.`linenumber` )
+where ref_52.`value` is not NULL order by 1, 2, 3, 4, 5 limit 10;


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to