This is an automated email from the ASF dual-hosted git repository.

morrysnow pushed a commit to branch branch-3.1
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-3.1 by this push:
     new a8cc1fd75bc branch-3.1: [fix](array) Fix array distance functions 
#54348 (#54372)
a8cc1fd75bc is described below

commit a8cc1fd75bca319c1f6acc232615ea3b801fd27c
Author: github-actions[bot] 
<41898282+github-actions[bot]@users.noreply.github.com>
AuthorDate: Thu Aug 7 20:21:39 2025 +0800

    branch-3.1: [fix](array) Fix array distance functions #54348 (#54372)
    
    Cherry-picked from #54348
    
    Co-authored-by: zhiqiang <[email protected]>
---
 .../vec/functions/array/function_array_distance.h  |  20 +++++--
 .../functions/scalar/CosineDistance.java           |   5 +-
 .../expressions/functions/scalar/InnerProduct.java |   5 +-
 .../expressions/functions/scalar/L1Distance.java   |   5 +-
 .../expressions/functions/scalar/L2Distance.java   |   5 +-
 .../test_array_distance_functions.out              | Bin 235 -> 654 bytes
 .../test_array_distance_functions.groovy           |  64 +++++++++++++++++++++
 7 files changed, 87 insertions(+), 17 deletions(-)

diff --git a/be/src/vec/functions/array/function_array_distance.h 
b/be/src/vec/functions/array/function_array_distance.h
index 4349e93c97c..5a855c04988 100644
--- a/be/src/vec/functions/array/function_array_distance.h
+++ b/be/src/vec/functions/array/function_array_distance.h
@@ -141,25 +141,35 @@ public:
             }
 
             dst_null_data[row] = false;
-            if (offsets1[row] != offsets2[row]) [[unlikely]] {
+
+            // Calculate actual array sizes for current row.
+            // For nullable arrays, we cannot compare absolute offset values 
directly because:
+            // 1. When a row is null, its offset might equal the previous 
offset (no elements added)
+            // 2. Or it might include the array size even if the row is null 
(implementation dependent)
+            // Therefore, we must calculate the actual array size as: 
offsets[row] - offsets[row-1]
+            ssize_t size1 = offsets1[row] - offsets1[row - 1];
+            ssize_t size2 = offsets2[row] - offsets2[row - 1];
+
+            if (size1 != size2) [[unlikely]] {
                 return Status::InvalidArgument(
                         "function {} have different input element sizes of 
array: {} and {}",
-                        get_name(), offsets1[row] - offsets1[row - 1],
-                        offsets2[row] - offsets2[row - 1]);
+                        get_name(), size1, size2);
             }
 
             typename DistanceImpl::State st;
             for (ssize_t pos = offsets1[row - 1]; pos < offsets1[row]; ++pos) {
+                // Calculate corresponding position in the second array
+                ssize_t pos2 = offsets2[row - 1] + (pos - offsets1[row - 1]);
                 if (arr1.nested_nullmap_data && arr1.nested_nullmap_data[pos]) 
{
                     dst_null_data[row] = true;
                     break;
                 }
-                if (arr2.nested_nullmap_data && arr2.nested_nullmap_data[pos]) 
{
+                if (arr2.nested_nullmap_data && 
arr2.nested_nullmap_data[pos2]) {
                     dst_null_data[row] = true;
                     break;
                 }
                 DistanceImpl::accumulate(st, nested_col1->get_element(pos),
-                                         nested_col2->get_element(pos));
+                                         nested_col2->get_element(pos2));
             }
             if (!dst_null_data[row]) {
                 dst_data[row] = DistanceImpl::finalize(st);
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/CosineDistance.java
 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/CosineDistance.java
index 9761f949297..14c388bb933 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/CosineDistance.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/CosineDistance.java
@@ -20,9 +20,8 @@ package 
org.apache.doris.nereids.trees.expressions.functions.scalar;
 import org.apache.doris.catalog.FunctionSignature;
 import org.apache.doris.nereids.trees.expressions.Expression;
 import org.apache.doris.nereids.trees.expressions.functions.AlwaysNullable;
-import 
org.apache.doris.nereids.trees.expressions.functions.ComputePrecisionForArrayItemAgg;
 import 
org.apache.doris.nereids.trees.expressions.functions.ExplicitlyCastableSignature;
-import org.apache.doris.nereids.trees.expressions.shape.UnaryExpression;
+import org.apache.doris.nereids.trees.expressions.shape.BinaryExpression;
 import org.apache.doris.nereids.trees.expressions.visitor.ExpressionVisitor;
 import org.apache.doris.nereids.types.ArrayType;
 import org.apache.doris.nereids.types.DoubleType;
@@ -36,7 +35,7 @@ import java.util.List;
  * cosine_distance function
  */
 public class CosineDistance extends ScalarFunction implements 
ExplicitlyCastableSignature,
-        ComputePrecisionForArrayItemAgg, UnaryExpression, AlwaysNullable {
+        BinaryExpression, AlwaysNullable {
 
     public static final List<FunctionSignature> SIGNATURES = ImmutableList.of(
             FunctionSignature.ret(DoubleType.INSTANCE)
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/InnerProduct.java
 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/InnerProduct.java
index 5d226951cb1..a56d5d5a522 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/InnerProduct.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/InnerProduct.java
@@ -20,9 +20,8 @@ package 
org.apache.doris.nereids.trees.expressions.functions.scalar;
 import org.apache.doris.catalog.FunctionSignature;
 import org.apache.doris.nereids.trees.expressions.Expression;
 import org.apache.doris.nereids.trees.expressions.functions.AlwaysNullable;
-import 
org.apache.doris.nereids.trees.expressions.functions.ComputePrecisionForArrayItemAgg;
 import 
org.apache.doris.nereids.trees.expressions.functions.ExplicitlyCastableSignature;
-import org.apache.doris.nereids.trees.expressions.shape.UnaryExpression;
+import org.apache.doris.nereids.trees.expressions.shape.BinaryExpression;
 import org.apache.doris.nereids.trees.expressions.visitor.ExpressionVisitor;
 import org.apache.doris.nereids.types.ArrayType;
 import org.apache.doris.nereids.types.DoubleType;
@@ -36,7 +35,7 @@ import java.util.List;
  * inner_product function
  */
 public class InnerProduct extends ScalarFunction implements 
ExplicitlyCastableSignature,
-        ComputePrecisionForArrayItemAgg, UnaryExpression, AlwaysNullable {
+        BinaryExpression, AlwaysNullable {
 
     public static final List<FunctionSignature> SIGNATURES = ImmutableList.of(
             FunctionSignature.ret(DoubleType.INSTANCE)
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/L1Distance.java
 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/L1Distance.java
index 7423702fc7b..66a6ebd2bf4 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/L1Distance.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/L1Distance.java
@@ -20,9 +20,8 @@ package 
org.apache.doris.nereids.trees.expressions.functions.scalar;
 import org.apache.doris.catalog.FunctionSignature;
 import org.apache.doris.nereids.trees.expressions.Expression;
 import org.apache.doris.nereids.trees.expressions.functions.AlwaysNullable;
-import 
org.apache.doris.nereids.trees.expressions.functions.ComputePrecisionForArrayItemAgg;
 import 
org.apache.doris.nereids.trees.expressions.functions.ExplicitlyCastableSignature;
-import org.apache.doris.nereids.trees.expressions.shape.UnaryExpression;
+import org.apache.doris.nereids.trees.expressions.shape.BinaryExpression;
 import org.apache.doris.nereids.trees.expressions.visitor.ExpressionVisitor;
 import org.apache.doris.nereids.types.ArrayType;
 import org.apache.doris.nereids.types.DoubleType;
@@ -36,7 +35,7 @@ import java.util.List;
  * l1_distance function
  */
 public class L1Distance extends ScalarFunction implements 
ExplicitlyCastableSignature,
-        ComputePrecisionForArrayItemAgg, UnaryExpression, AlwaysNullable {
+        BinaryExpression, AlwaysNullable {
 
     public static final List<FunctionSignature> SIGNATURES = ImmutableList.of(
             FunctionSignature.ret(DoubleType.INSTANCE)
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/L2Distance.java
 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/L2Distance.java
index 14ffee389ae..a9775f59ad7 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/L2Distance.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/L2Distance.java
@@ -20,9 +20,8 @@ package 
org.apache.doris.nereids.trees.expressions.functions.scalar;
 import org.apache.doris.catalog.FunctionSignature;
 import org.apache.doris.nereids.trees.expressions.Expression;
 import org.apache.doris.nereids.trees.expressions.functions.AlwaysNullable;
-import 
org.apache.doris.nereids.trees.expressions.functions.ComputePrecisionForArrayItemAgg;
 import 
org.apache.doris.nereids.trees.expressions.functions.ExplicitlyCastableSignature;
-import org.apache.doris.nereids.trees.expressions.shape.UnaryExpression;
+import org.apache.doris.nereids.trees.expressions.shape.BinaryExpression;
 import org.apache.doris.nereids.trees.expressions.visitor.ExpressionVisitor;
 import org.apache.doris.nereids.types.ArrayType;
 import org.apache.doris.nereids.types.DoubleType;
@@ -36,7 +35,7 @@ import java.util.List;
  * l2_distance function
  */
 public class L2Distance extends ScalarFunction implements 
ExplicitlyCastableSignature,
-        ComputePrecisionForArrayItemAgg, UnaryExpression, AlwaysNullable {
+        BinaryExpression, AlwaysNullable {
 
     public static final List<FunctionSignature> SIGNATURES = ImmutableList.of(
             FunctionSignature.ret(DoubleType.INSTANCE)
diff --git 
a/regression-test/data/query_p0/sql_functions/array_functions/test_array_distance_functions.out
 
b/regression-test/data/query_p0/sql_functions/array_functions/test_array_distance_functions.out
index 10c9f2ae306..4a14b8c9a40 100644
Binary files 
a/regression-test/data/query_p0/sql_functions/array_functions/test_array_distance_functions.out
 and 
b/regression-test/data/query_p0/sql_functions/array_functions/test_array_distance_functions.out
 differ
diff --git 
a/regression-test/suites/query_p0/sql_functions/array_functions/test_array_distance_functions.groovy
 
b/regression-test/suites/query_p0/sql_functions/array_functions/test_array_distance_functions.groovy
index cf196e0d61a..9010750a2ec 100644
--- 
a/regression-test/suites/query_p0/sql_functions/array_functions/test_array_distance_functions.groovy
+++ 
b/regression-test/suites/query_p0/sql_functions/array_functions/test_array_distance_functions.groovy
@@ -25,6 +25,21 @@ suite("test_array_distance_functions") {
     qt_sql "SELECT l2_distance([1, 2, 3], NULL)"
     qt_sql "SELECT cosine_distance([1, 2, 3], [0, NULL, 0])"
 
+    // Test cases for nullable arrays with different null distributions
+    // These test the fix for correct array size comparison when nulls are 
present
+    qt_sql "SELECT l1_distance(NULL, NULL)"
+    qt_sql "SELECT l2_distance([1.0, 2.0], [3.0, 4.0])"
+    qt_sql "SELECT cosine_distance([1.0, 2.0, 3.0], [4.0, 5.0, 6.0])"
+    qt_sql "SELECT inner_product([2.0, 3.0], [4.0, 5.0])"
+    
+    // Test arrays with NULL elements inside
+    qt_sql "SELECT l1_distance([1.0, NULL, 3.0], [4.0, NULL, 6.0])"
+    qt_sql "SELECT l2_distance([NULL, 2.0], [NULL, 5.0])"
+    
+    // Test mixed nullable scenarios - these should work correctly after the 
fix
+    qt_sql "SELECT l1_distance([1.0, 2.0], [3.0, 4.0]) as result1, 
l1_distance(NULL, [5.0, 6.0]) as result2"
+    qt_sql "SELECT cosine_distance([1.0], [2.0]) as result1, 
cosine_distance([3.0], NULL) as result2"
+
     // abnormal test cases
     try {
         sql "SELECT l2_distance([0, 0], [1])"
@@ -37,4 +52,53 @@ suite("test_array_distance_functions") {
     } catch (Exception ex) {
         assert("${ex}".contains("function cosine_distance have different input 
element sizes"))
     }
+    
+    // Test cases for the nullable array offset fix
+    // These cases specifically test scenarios where absolute offsets might 
differ
+    // but actual array sizes are the same (should pass) or different (should 
fail)
+    try {
+        sql "SELECT l1_distance([1.0, 2.0, 3.0], [4.0, 5.0])"
+    } catch (Exception ex) {
+        assert("${ex}".contains("function l1_distance have different input 
element sizes"))
+    }
+    
+    try {
+        sql "SELECT inner_product([1.0], [2.0, 3.0, 4.0])"
+    } catch (Exception ex) {
+        assert("${ex}".contains("function inner_product have different input 
element sizes"))
+    }
+    
+    // Edge case: empty arrays should work
+    qt_sql "SELECT l1_distance(CAST([] as ARRAY<DOUBLE>), CAST([] as 
ARRAY<DOUBLE>))"
+    qt_sql "SELECT l2_distance(CAST([] as ARRAY<DOUBLE>), CAST([] as 
ARRAY<DOUBLE>))"
+    
+    // Comprehensive test for the offset fix: test with table data containing 
mixed nulls
+    // This specifically tests the scenario where offsets might differ due to 
null distribution
+    // but actual array sizes are the same
+    sql """
+        DROP TABLE IF EXISTS test_array_distance_nullable
+    """
+    sql """
+        CREATE TABLE test_array_distance_nullable (
+            id INT,
+            arr1 ARRAY<DOUBLE>,
+            arr2 ARRAY<DOUBLE>
+        ) PROPERTIES (
+            "replication_num" = "1"
+        )
+    """
+    sql """
+        INSERT INTO test_array_distance_nullable VALUES
+        (1, [1.0, 2.0], [3.0, 4.0]),
+        (2, NULL, [5.0, 6.0]),
+        (3, [7.0, 8.0], NULL),
+        (4, [9.0, 10.0], [11.0, 12.0]),
+        (5, NULL, NULL)
+    """
+    
+    // These queries should work correctly after the fix
+    qt_sql "SELECT id, l1_distance(arr1, arr2) FROM 
test_array_distance_nullable ORDER BY id"
+    qt_sql "SELECT id, l2_distance(arr1, arr2) FROM 
test_array_distance_nullable ORDER BY id"
+    qt_sql "SELECT id, cosine_distance(arr1, arr2) FROM 
test_array_distance_nullable ORDER BY id"
+    qt_sql "SELECT id, inner_product(arr1, arr2) FROM 
test_array_distance_nullable ORDER BY id"
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to