This is an automated email from the ASF dual-hosted git repository.
yiguolei pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new c07e893d87e [improve](udf)add some check for udf when result is null
(#51084)
c07e893d87e is described below
commit c07e893d87ec6689ff2ed8f3d2ede0e8ce3e25a6
Author: zhangstar333 <[email protected]>
AuthorDate: Thu May 22 17:52:42 2025 +0800
[improve](udf)add some check for udf when result is null (#51084)
### What problem does this PR solve?
Problem Summary:
sometimes the user create udf function with "always_nullable"="false"
but the function will return null in the logical of udf,
so add some check let user could get error msg more clearly.
---
.../apache/doris/common/jni/vec/VectorColumn.java | 47 ++++++++++++++++++++++
.../main/java/org/apache/doris/udf/StringTest.java | 3 ++
.../suites/javaudf_p0/test_javaudf_array.groovy | 12 +++++-
.../suites/javaudf_p0/test_javaudf_int.groovy | 26 ++++++++++++
.../suites/javaudf_p0/test_javaudf_string.groovy | 13 ++++++
5 files changed, 100 insertions(+), 1 deletion(-)
diff --git
a/fe/be-java-extensions/java-common/src/main/java/org/apache/doris/common/jni/vec/VectorColumn.java
b/fe/be-java-extensions/java-common/src/main/java/org/apache/doris/common/jni/vec/VectorColumn.java
index d2f2e42f866..46d99191adb 100644
---
a/fe/be-java-extensions/java-common/src/main/java/org/apache/doris/common/jni/vec/VectorColumn.java
+++
b/fe/be-java-extensions/java-common/src/main/java/org/apache/doris/common/jni/vec/VectorColumn.java
@@ -350,6 +350,16 @@ public class VectorColumn {
}
}
+ public void checkNullable(Object[] batch, int rows) {
+ for (int i = 0; i < rows; ++i) {
+ if (batch[i] == null) {
+ throw new RuntimeException(
+ "the result of " + i + " row is null, but the return
type is not nullable, please check "
+ + "the always_nullable property in create
function statement, it's should be true");
+ }
+ }
+ }
+
public final boolean isNullAt(int rowId) {
if (numNulls == 0 || nullMap == 0) {
return false;
@@ -454,6 +464,7 @@ public class VectorColumn {
}
OffHeap.UNSAFE.copyMemory(batchNulls, OffHeap.BYTE_ARRAY_OFFSET,
null, nullMap + appendIndex, rows);
} else {
+ checkNullable(batch, rows);
for (int i = 0; i < rows; ++i) {
batchData[i] = (byte) (batch[i] ? 1 : 0);
}
@@ -511,6 +522,7 @@ public class VectorColumn {
}
OffHeap.UNSAFE.copyMemory(batchNulls, OffHeap.BYTE_ARRAY_OFFSET,
null, nullMap + appendIndex, rows);
} else {
+ checkNullable(batch, rows);
for (int i = 0; i < rows; ++i) {
batchData[i] = batch[i];
}
@@ -568,6 +580,7 @@ public class VectorColumn {
}
OffHeap.UNSAFE.copyMemory(batchNulls, OffHeap.BYTE_ARRAY_OFFSET,
null, nullMap + appendIndex, rows);
} else {
+ checkNullable(batch, rows);
for (int i = 0; i < rows; ++i) {
batchData[i] = batch[i];
}
@@ -625,6 +638,7 @@ public class VectorColumn {
}
OffHeap.UNSAFE.copyMemory(batchNulls, OffHeap.BYTE_ARRAY_OFFSET,
null, nullMap + appendIndex, rows);
} else {
+ checkNullable(batch, rows);
for (int i = 0; i < rows; ++i) {
batchData[i] = batch[i];
}
@@ -682,6 +696,7 @@ public class VectorColumn {
}
OffHeap.UNSAFE.copyMemory(batchNulls, OffHeap.BYTE_ARRAY_OFFSET,
null, nullMap + appendIndex, rows);
} else {
+ checkNullable(batch, rows);
for (int i = 0; i < rows; ++i) {
batchData[i] = batch[i];
}
@@ -739,6 +754,7 @@ public class VectorColumn {
}
OffHeap.UNSAFE.copyMemory(batchNulls, OffHeap.BYTE_ARRAY_OFFSET,
null, nullMap + appendIndex, rows);
} else {
+ checkNullable(batch, rows);
for (int i = 0; i < rows; ++i) {
batchData[i] = batch[i];
}
@@ -796,6 +812,7 @@ public class VectorColumn {
}
OffHeap.UNSAFE.copyMemory(batchNulls, OffHeap.BYTE_ARRAY_OFFSET,
null, nullMap + appendIndex, rows);
} else {
+ checkNullable(batch, rows);
for (int i = 0; i < rows; ++i) {
batchData[i] = batch[i];
}
@@ -837,6 +854,9 @@ public class VectorColumn {
}
public void appendBigInteger(BigInteger[] batch, boolean isNullable) {
+ if (!isNullable) {
+ checkNullable(batch, batch.length);
+ }
reserve(appendIndex + batch.length);
for (BigInteger v : batch) {
if (v == null) {
@@ -904,6 +924,9 @@ public class VectorColumn {
}
public void appendInetAddress(InetAddress[] batch, boolean isNullable) {
+ if (!isNullable) {
+ checkNullable(batch, batch.length);
+ }
reserve(appendIndex + batch.length);
for (InetAddress v : batch) {
if (v == null) {
@@ -933,6 +956,9 @@ public class VectorColumn {
}
public void appendDecimal(BigDecimal[] batch, boolean isNullable) {
+ if (!isNullable) {
+ checkNullable(batch, batch.length);
+ }
reserve(appendIndex + batch.length);
for (BigDecimal v : batch) {
if (v == null) {
@@ -979,6 +1005,9 @@ public class VectorColumn {
}
public void appendDate(LocalDate[] batch, boolean isNullable) {
+ if (!isNullable) {
+ checkNullable(batch, batch.length);
+ }
reserve(appendIndex + batch.length);
for (LocalDate v : batch) {
if (v == null) {
@@ -1045,6 +1074,9 @@ public class VectorColumn {
}
public void appendDateTime(LocalDateTime[] batch, boolean isNullable) {
+ if (!isNullable) {
+ checkNullable(batch, batch.length);
+ }
reserve(appendIndex + batch.length);
for (LocalDateTime v : batch) {
if (v == null) {
@@ -1146,6 +1178,9 @@ public class VectorColumn {
}
public void appendStringAndOffset(String[] batch, boolean isNullable) {
+ if (!isNullable) {
+ checkNullable(batch, batch.length);
+ }
reserve(appendIndex + batch.length);
for (String v : batch) {
byte[] bytes;
@@ -1162,6 +1197,9 @@ public class VectorColumn {
}
public void appendBinaryAndOffset(byte[][] batch, boolean isNullable) {
+ if (!isNullable) {
+ checkNullable(batch, batch.length);
+ }
reserve(appendIndex + batch.length);
for (byte[] v : batch) {
byte[] bytes = v;
@@ -1215,6 +1253,9 @@ public class VectorColumn {
}
public void appendArray(List<Object>[] batch, boolean isNullable) {
+ if (!isNullable) {
+ checkNullable(batch, batch.length);
+ }
reserve(appendIndex + batch.length);
int offset = childColumns[0].appendIndex;
for (List<Object> v : batch) {
@@ -1275,6 +1316,9 @@ public class VectorColumn {
}
public void appendMap(Map<Object, Object>[] batch, boolean isNullable) {
+ if (!isNullable) {
+ checkNullable(batch, batch.length);
+ }
reserve(appendIndex + batch.length);
int offset = childColumns[0].appendIndex;
for (Map<Object, Object> v : batch) {
@@ -1341,6 +1385,9 @@ public class VectorColumn {
}
public void appendStruct(Map<String, Object>[] batch, boolean isNullable) {
+ if (!isNullable) {
+ checkNullable(batch, batch.length);
+ }
reserve(appendIndex + batch.length);
Object[][] columnData = new Object[childColumns.length][];
Preconditions.checkArgument(this.getColumnType().getChildNames().size() ==
childColumns.length);
diff --git
a/regression-test/java-udf-src/src/main/java/org/apache/doris/udf/StringTest.java
b/regression-test/java-udf-src/src/main/java/org/apache/doris/udf/StringTest.java
index cc1a6a2bca7..822c484c706 100644
---
a/regression-test/java-udf-src/src/main/java/org/apache/doris/udf/StringTest.java
+++
b/regression-test/java-udf-src/src/main/java/org/apache/doris/udf/StringTest.java
@@ -22,6 +22,9 @@ import org.apache.hadoop.hive.ql.exec.UDF;
public class StringTest extends UDF {
public String evaluate(String field, Integer a, Integer b) {
+ if (field == null || a == null || b == null) {
+ return null;
+ }
return field.substring(0, a) + StringUtils.repeat("*", field.length()
- a -b) + field.substring(field.length()-b);
}
}
diff --git a/regression-test/suites/javaudf_p0/test_javaudf_array.groovy
b/regression-test/suites/javaudf_p0/test_javaudf_array.groovy
index 0d782c036b4..4cae4e872f9 100644
--- a/regression-test/suites/javaudf_p0/test_javaudf_array.groovy
+++ b/regression-test/suites/javaudf_p0/test_javaudf_array.groovy
@@ -124,9 +124,19 @@ suite("test_javaudf_array") {
"type"="JAVA_UDF"
); """
qt_select_14 """ SELECT java_udf_array_list_test(array(string_col)),
string_col, tinyint_col as result FROM ${tableName} ORDER BY result; """
-
+ sql """ CREATE FUNCTION
java_udf_array_list_test_not_nullable(array<string>) RETURNS array<string>
PROPERTIES (
+ "file"="file://${jarPath}",
+ "symbol"="org.apache.doris.udf.ArrayListTest",
+ "always_nullable"="false",
+ "type"="JAVA_UDF"
+ ); """
+ test {
+ sql """ SELECT java_udf_array_list_test_not_nullable(NULL); """
+ exception "but the return type is not nullable"
+ }
} finally {
try_sql("DROP FUNCTION IF EXISTS java_udf_array_int_test(array<int>);")
+ try_sql("DROP FUNCTION IF EXISTS
java_udf_array_list_test_not_nullable(array<string>);")
try_sql("DROP FUNCTION IF EXISTS
java_udf_array_return_int_test(array<int>);")
try_sql("DROP FUNCTION IF EXISTS
java_udf_array_return_string_test(array<string>);")
try_sql("DROP FUNCTION IF EXISTS
java_udf_array_string_test(array<string>);")
diff --git a/regression-test/suites/javaudf_p0/test_javaudf_int.groovy
b/regression-test/suites/javaudf_p0/test_javaudf_int.groovy
index cb9b87b7bf0..7433866a9f2 100644
--- a/regression-test/suites/javaudf_p0/test_javaudf_int.groovy
+++ b/regression-test/suites/javaudf_p0/test_javaudf_int.groovy
@@ -123,6 +123,30 @@ suite("test_javaudf_int") {
qt_select_global_3 """ SELECT java_udf_int_test_global(3) result FROM
${tableName} ORDER BY result; """
qt_select_global_4 """ SELECT abs(java_udf_int_test_global(3)) result
FROM ${tableName} ORDER BY result; """
+ sql """ CREATE FUNCTION java_udf_int_test_not_nullable(int) RETURNS
int PROPERTIES (
+ "file"="file://${jarPath}",
+ "symbol"="org.apache.doris.udf.IntTest",
+ "always_nullable"="false",
+ "type"="JAVA_UDF"
+ ); """
+
+ test {
+ sql """ SELECT java_udf_int_test_not_nullable(NULL); """
+ exception "but the return type is not nullable"
+ }
+
+ sql """ CREATE FUNCTION java_udf_largeint_test_not_nullable(largeint)
RETURNS largeint PROPERTIES (
+ "file"="file://${jarPath}",
+ "symbol"="org.apache.doris.udf.LargeintTest",
+ "always_nullable"="false",
+ "type"="JAVA_UDF"
+ ); """
+
+ test {
+ sql """ SELECT java_udf_largeint_test_not_nullable(NULL); """
+ exception "but the return type is not nullable"
+ }
+
} finally {
try_sql("DROP GLOBAL FUNCTION IF EXISTS
java_udf_int_test_global(int);")
try_sql("DROP FUNCTION IF EXISTS java_udf_tinyint_test(tinyint);")
@@ -130,6 +154,8 @@ suite("test_javaudf_int") {
try_sql("DROP FUNCTION IF EXISTS java_udf_bigint_test(bigint);")
try_sql("DROP FUNCTION IF EXISTS java_udf_largeint_test(largeint);")
try_sql("DROP FUNCTION IF EXISTS java_udf_int_test(int);")
+ try_sql("DROP FUNCTION IF EXISTS java_udf_int_test_not_nullable(int);")
+ try_sql("DROP FUNCTION IF EXISTS
java_udf_largeint_test_not_nullable(largeint);")
try_sql("DROP TABLE IF EXISTS ${tableName}")
}
}
diff --git a/regression-test/suites/javaudf_p0/test_javaudf_string.groovy
b/regression-test/suites/javaudf_p0/test_javaudf_string.groovy
index 48e98b0c5b6..2158c50e432 100644
--- a/regression-test/suites/javaudf_p0/test_javaudf_string.groovy
+++ b/regression-test/suites/javaudf_p0/test_javaudf_string.groovy
@@ -114,8 +114,21 @@ suite("test_javaudf_string") {
}
sql """ insert into tbl1 select random()%10000 * 10000, "5" from
tbl1;"""
qt_select_5 """ select count(0) from (select k1, max(k2) as k2 from
tbl1 group by k1)v where java_udf_string_test(k2, 0, 1) = "asd" """;
+
+ sql """ CREATE FUNCTION java_udf_string_test_not_nullabel(string, int,
int) RETURNS string PROPERTIES (
+ "file"="file://${jarPath}",
+ "symbol"="org.apache.doris.udf.StringTest",
+ "always_nullable"="false",
+ "type"="JAVA_UDF"
+ ); """
+
+ test {
+ sql """ SELECT java_udf_string_test_not_nullabel(NULL,NULL,NULL);
"""
+ exception "but the return type is not nullable"
+ }
} finally {
try_sql("DROP FUNCTION IF EXISTS java_udf_string_test(string, int,
int);")
+ try_sql("DROP FUNCTION IF EXISTS
java_udf_string_test_not_nullabel(string, int, int);")
try_sql("DROP TABLE IF EXISTS ${tableName}")
try_sql("DROP TABLE IF EXISTS tbl1")
try_sql("DROP TABLE IF EXISTS test_javaudf_string_2")
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]