This is an automated email from the ASF dual-hosted git repository.
yiguolei pushed a commit to branch branch-2.1
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-2.1 by this push:
new 6513f9e01f7 [cherry-pick](branch-21)add some check for udf when result
is null (#51084) (#51196)
6513f9e01f7 is described below
commit 6513f9e01f7d0881df5b6c63b58295646846742a
Author: zhangstar333 <[email protected]>
AuthorDate: Fri May 23 20:43:31 2025 +0800
[cherry-pick](branch-21)add some check for udf when result is null (#51084)
(#51196)
---
.../apache/doris/common/jni/vec/VectorColumn.java | 44 ++++++++++++++++++++++
.../main/java/org/apache/doris/udf/StringTest.java | 3 ++
.../suites/javaudf_p0/test_javaudf_array.groovy | 11 ++++++
.../suites/javaudf_p0/test_javaudf_int.groovy | 26 +++++++++++++
.../suites/javaudf_p0/test_javaudf_string.groovy | 13 +++++++
5 files changed, 97 insertions(+)
diff --git
a/fe/be-java-extensions/java-common/src/main/java/org/apache/doris/common/jni/vec/VectorColumn.java
b/fe/be-java-extensions/java-common/src/main/java/org/apache/doris/common/jni/vec/VectorColumn.java
index 596f3a15472..6589558ea2a 100644
---
a/fe/be-java-extensions/java-common/src/main/java/org/apache/doris/common/jni/vec/VectorColumn.java
+++
b/fe/be-java-extensions/java-common/src/main/java/org/apache/doris/common/jni/vec/VectorColumn.java
@@ -305,6 +305,16 @@ public class VectorColumn {
}
}
+ public void checkNullable(Object[] batch, int rows) {
+ for (int i = 0; i < rows; ++i) {
+ if (batch[i] == null) {
+ throw new RuntimeException(
+ "the result of " + i + " row is null, but the return
type is not nullable, please check "
+ + "the always_nullable property in create
function statement, it's should be true");
+ }
+ }
+ }
+
public final boolean isNullAt(int rowId) {
if (numNulls == 0 || nullMap == 0) {
return false;
@@ -405,6 +415,7 @@ public class VectorColumn {
}
OffHeap.UNSAFE.copyMemory(batchNulls, OffHeap.BYTE_ARRAY_OFFSET,
null, nullMap + appendIndex, rows);
} else {
+ checkNullable(batch, rows);
for (int i = 0; i < rows; ++i) {
batchData[i] = (byte) (batch[i] ? 1 : 0);
}
@@ -462,6 +473,7 @@ public class VectorColumn {
}
OffHeap.UNSAFE.copyMemory(batchNulls, OffHeap.BYTE_ARRAY_OFFSET,
null, nullMap + appendIndex, rows);
} else {
+ checkNullable(batch, rows);
for (int i = 0; i < rows; ++i) {
batchData[i] = batch[i];
}
@@ -519,6 +531,7 @@ public class VectorColumn {
}
OffHeap.UNSAFE.copyMemory(batchNulls, OffHeap.BYTE_ARRAY_OFFSET,
null, nullMap + appendIndex, rows);
} else {
+ checkNullable(batch, rows);
for (int i = 0; i < rows; ++i) {
batchData[i] = batch[i];
}
@@ -576,6 +589,7 @@ public class VectorColumn {
}
OffHeap.UNSAFE.copyMemory(batchNulls, OffHeap.BYTE_ARRAY_OFFSET,
null, nullMap + appendIndex, rows);
} else {
+ checkNullable(batch, rows);
for (int i = 0; i < rows; ++i) {
batchData[i] = batch[i];
}
@@ -633,6 +647,7 @@ public class VectorColumn {
}
OffHeap.UNSAFE.copyMemory(batchNulls, OffHeap.BYTE_ARRAY_OFFSET,
null, nullMap + appendIndex, rows);
} else {
+ checkNullable(batch, rows);
for (int i = 0; i < rows; ++i) {
batchData[i] = batch[i];
}
@@ -690,6 +705,7 @@ public class VectorColumn {
}
OffHeap.UNSAFE.copyMemory(batchNulls, OffHeap.BYTE_ARRAY_OFFSET,
null, nullMap + appendIndex, rows);
} else {
+ checkNullable(batch, rows);
for (int i = 0; i < rows; ++i) {
batchData[i] = batch[i];
}
@@ -747,6 +763,7 @@ public class VectorColumn {
}
OffHeap.UNSAFE.copyMemory(batchNulls, OffHeap.BYTE_ARRAY_OFFSET,
null, nullMap + appendIndex, rows);
} else {
+ checkNullable(batch, rows);
for (int i = 0; i < rows; ++i) {
batchData[i] = batch[i];
}
@@ -788,6 +805,9 @@ public class VectorColumn {
}
public void appendBigInteger(BigInteger[] batch, boolean isNullable) {
+ if (!isNullable) {
+ checkNullable(batch, batch.length);
+ }
reserve(appendIndex + batch.length);
for (BigInteger v : batch) {
if (v == null) {
@@ -834,6 +854,9 @@ public class VectorColumn {
}
public void appendDecimal(BigDecimal[] batch, boolean isNullable) {
+ if (!isNullable) {
+ checkNullable(batch, batch.length);
+ }
reserve(appendIndex + batch.length);
for (BigDecimal v : batch) {
if (v == null) {
@@ -880,6 +903,9 @@ public class VectorColumn {
}
public void appendDate(LocalDate[] batch, boolean isNullable) {
+ if (!isNullable) {
+ checkNullable(batch, batch.length);
+ }
reserve(appendIndex + batch.length);
for (LocalDate v : batch) {
if (v == null) {
@@ -946,6 +972,9 @@ public class VectorColumn {
}
public void appendDateTime(LocalDateTime[] batch, boolean isNullable) {
+ if (!isNullable) {
+ checkNullable(batch, batch.length);
+ }
reserve(appendIndex + batch.length);
for (LocalDateTime v : batch) {
if (v == null) {
@@ -1047,6 +1076,9 @@ public class VectorColumn {
}
public void appendStringAndOffset(String[] batch, boolean isNullable) {
+ if (!isNullable) {
+ checkNullable(batch, batch.length);
+ }
reserve(appendIndex + batch.length);
for (String v : batch) {
byte[] bytes;
@@ -1063,6 +1095,9 @@ public class VectorColumn {
}
public void appendBinaryAndOffset(byte[][] batch, boolean isNullable) {
+ if (!isNullable) {
+ checkNullable(batch, batch.length);
+ }
reserve(appendIndex + batch.length);
for (byte[] v : batch) {
byte[] bytes = v;
@@ -1116,6 +1151,9 @@ public class VectorColumn {
}
public void appendArray(List<Object>[] batch, boolean isNullable) {
+ if (!isNullable) {
+ checkNullable(batch, batch.length);
+ }
reserve(appendIndex + batch.length);
int offset = childColumns[0].appendIndex;
for (List<Object> v : batch) {
@@ -1175,6 +1213,9 @@ public class VectorColumn {
}
public void appendMap(Map<Object, Object>[] batch, boolean isNullable) {
+ if (!isNullable) {
+ checkNullable(batch, batch.length);
+ }
reserve(appendIndex + batch.length);
int offset = childColumns[0].appendIndex;
for (Map<Object, Object> v : batch) {
@@ -1239,6 +1280,9 @@ public class VectorColumn {
}
public void appendStruct(Map<String, Object>[] batch, boolean isNullable) {
+ if (!isNullable) {
+ checkNullable(batch, batch.length);
+ }
reserve(appendIndex + batch.length);
Object[][] columnData = new Object[childColumns.length][];
for (int j = 0; j < childColumns.length; ++j) {
diff --git
a/regression-test/java-udf-src/src/main/java/org/apache/doris/udf/StringTest.java
b/regression-test/java-udf-src/src/main/java/org/apache/doris/udf/StringTest.java
index cc1a6a2bca7..822c484c706 100644
---
a/regression-test/java-udf-src/src/main/java/org/apache/doris/udf/StringTest.java
+++
b/regression-test/java-udf-src/src/main/java/org/apache/doris/udf/StringTest.java
@@ -22,6 +22,9 @@ import org.apache.hadoop.hive.ql.exec.UDF;
public class StringTest extends UDF {
public String evaluate(String field, Integer a, Integer b) {
+ if (field == null || a == null || b == null) {
+ return null;
+ }
return field.substring(0, a) + StringUtils.repeat("*", field.length()
- a -b) + field.substring(field.length()-b);
}
}
diff --git a/regression-test/suites/javaudf_p0/test_javaudf_array.groovy
b/regression-test/suites/javaudf_p0/test_javaudf_array.groovy
index ee02ca17cc3..ef2a43b51cc 100644
--- a/regression-test/suites/javaudf_p0/test_javaudf_array.groovy
+++ b/regression-test/suites/javaudf_p0/test_javaudf_array.groovy
@@ -117,8 +117,19 @@ suite("test_javaudf_array") {
); """
qt_select_13 """ SELECT java_udf_array_date_test(array(datev2_col)),
tinyint_col as result FROM ${tableName} ORDER BY result; """
+ sql """ CREATE FUNCTION
java_udf_array_list_test_not_nullable(array<string>) RETURNS array<string>
PROPERTIES (
+ "file"="file://${jarPath}",
+ "symbol"="org.apache.doris.udf.ArrayReturnArrayStringTest",
+ "always_nullable"="false",
+ "type"="JAVA_UDF"
+ ); """
+ test {
+ sql """ SELECT java_udf_array_list_test_not_nullable(NULL); """
+ exception "but the return type is not nullable"
+ }
} finally {
try_sql("DROP FUNCTION IF EXISTS java_udf_array_int_test(array<int>);")
+ try_sql("DROP FUNCTION IF EXISTS
java_udf_array_list_test_not_nullable(array<string>);")
try_sql("DROP FUNCTION IF EXISTS
java_udf_array_return_int_test(array<int>);")
try_sql("DROP FUNCTION IF EXISTS
java_udf_array_return_string_test(array<string>);")
try_sql("DROP FUNCTION IF EXISTS
java_udf_array_string_test(array<string>);")
diff --git a/regression-test/suites/javaudf_p0/test_javaudf_int.groovy
b/regression-test/suites/javaudf_p0/test_javaudf_int.groovy
index cb9b87b7bf0..7433866a9f2 100644
--- a/regression-test/suites/javaudf_p0/test_javaudf_int.groovy
+++ b/regression-test/suites/javaudf_p0/test_javaudf_int.groovy
@@ -123,6 +123,30 @@ suite("test_javaudf_int") {
qt_select_global_3 """ SELECT java_udf_int_test_global(3) result FROM
${tableName} ORDER BY result; """
qt_select_global_4 """ SELECT abs(java_udf_int_test_global(3)) result
FROM ${tableName} ORDER BY result; """
+ sql """ CREATE FUNCTION java_udf_int_test_not_nullable(int) RETURNS
int PROPERTIES (
+ "file"="file://${jarPath}",
+ "symbol"="org.apache.doris.udf.IntTest",
+ "always_nullable"="false",
+ "type"="JAVA_UDF"
+ ); """
+
+ test {
+ sql """ SELECT java_udf_int_test_not_nullable(NULL); """
+ exception "but the return type is not nullable"
+ }
+
+ sql """ CREATE FUNCTION java_udf_largeint_test_not_nullable(largeint)
RETURNS largeint PROPERTIES (
+ "file"="file://${jarPath}",
+ "symbol"="org.apache.doris.udf.LargeintTest",
+ "always_nullable"="false",
+ "type"="JAVA_UDF"
+ ); """
+
+ test {
+ sql """ SELECT java_udf_largeint_test_not_nullable(NULL); """
+ exception "but the return type is not nullable"
+ }
+
} finally {
try_sql("DROP GLOBAL FUNCTION IF EXISTS
java_udf_int_test_global(int);")
try_sql("DROP FUNCTION IF EXISTS java_udf_tinyint_test(tinyint);")
@@ -130,6 +154,8 @@ suite("test_javaudf_int") {
try_sql("DROP FUNCTION IF EXISTS java_udf_bigint_test(bigint);")
try_sql("DROP FUNCTION IF EXISTS java_udf_largeint_test(largeint);")
try_sql("DROP FUNCTION IF EXISTS java_udf_int_test(int);")
+ try_sql("DROP FUNCTION IF EXISTS java_udf_int_test_not_nullable(int);")
+ try_sql("DROP FUNCTION IF EXISTS
java_udf_largeint_test_not_nullable(largeint);")
try_sql("DROP TABLE IF EXISTS ${tableName}")
}
}
diff --git a/regression-test/suites/javaudf_p0/test_javaudf_string.groovy
b/regression-test/suites/javaudf_p0/test_javaudf_string.groovy
index e6484a1fde1..f309b88feb8 100644
--- a/regression-test/suites/javaudf_p0/test_javaudf_string.groovy
+++ b/regression-test/suites/javaudf_p0/test_javaudf_string.groovy
@@ -98,8 +98,21 @@ suite("test_javaudf_string") {
}
sql """ insert into tbl1 select random()%10000 * 10000, "5" from
tbl1;"""
qt_select_5 """ select count(0) from (select k1, max(k2) as k2 from
tbl1 group by k1)v where java_udf_string_test(k2, 0, 1) = "asd" """;
+
+ sql """ CREATE FUNCTION java_udf_string_test_not_nullabel(string, int,
int) RETURNS string PROPERTIES (
+ "file"="file://${jarPath}",
+ "symbol"="org.apache.doris.udf.StringTest",
+ "always_nullable"="false",
+ "type"="JAVA_UDF"
+ ); """
+
+ test {
+ sql """ SELECT java_udf_string_test_not_nullabel(NULL,NULL,NULL);
"""
+ exception "but the return type is not nullable"
+ }
} finally {
try_sql("DROP FUNCTION IF EXISTS java_udf_string_test(string, int,
int);")
+ try_sql("DROP FUNCTION IF EXISTS
java_udf_string_test_not_nullabel(string, int, int);")
try_sql("DROP TABLE IF EXISTS ${tableName}")
try_sql("DROP TABLE IF EXISTS tbl1")
try_sql("DROP TABLE IF EXISTS test_javaudf_string_2")
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]