This is an automated email from the ASF dual-hosted git repository.
lihaopeng pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new 5291f14aff [vectorized](udf) java udf support array type (#16841)
5291f14aff is described below
commit 5291f14aff410c46af6838677003873d6121d8ba
Author: zhangstar333 <[email protected]>
AuthorDate: Mon Feb 20 10:00:25 2023 +0800
[vectorized](udf) java udf support array type (#16841)
---
be/src/vec/functions/function_java_udf.cpp | 101 +++-
be/src/vec/functions/function_java_udf.h | 12 +
.../ecosystem/udf/java-user-defined-function.md | 2 +
.../ecosystem/udf/java-user-defined-function.md | 2 +
.../main/java/org/apache/doris/catalog/Type.java | 1 +
.../apache/doris/analysis/CreateFunctionStmt.java | 17 +-
.../java/org/apache/doris/udf/BaseExecutor.java | 658 ++++++++++++++++++++-
.../java/org/apache/doris/udf/UdfExecutor.java | 9 +-
.../main/java/org/apache/doris/udf/UdfUtils.java | 35 +-
gensrc/thrift/Types.thrift | 12 +
.../data/javaudf_p0/test_javaudf_array.out | 133 +++++
.../java/org/apache/doris/udf/ArrayDateTest.java | 29 +
.../org/apache/doris/udf/ArrayDateTimeTest.java | 29 +
.../java/org/apache/doris/udf/ArrayIntTest.java | 38 ++
.../apache/doris/udf/ArrayReturnArrayIntTest.java | 40 ++
.../doris/udf/ArrayReturnArrayStringTest.java | 41 ++
.../java/org/apache/doris/udf/ArrayStringTest.java | 39 ++
.../suites/javaudf_p0/test_javaudf_array.groovy | 122 ++++
run-regression-test.sh | 5 +-
19 files changed, 1293 insertions(+), 32 deletions(-)
diff --git a/be/src/vec/functions/function_java_udf.cpp
b/be/src/vec/functions/function_java_udf.cpp
index d5e3751ef5..abffb8d8bc 100644
--- a/be/src/vec/functions/function_java_udf.cpp
+++ b/be/src/vec/functions/function_java_udf.cpp
@@ -27,6 +27,8 @@
#include "runtime/exec_env.h"
#include "runtime/user_function_cache.h"
#include "util/jni-util.h"
+#include "vec/columns/column_array.h"
+#include "vec/columns/column_nullable.h"
#include "vec/columns/column_vector.h"
#include "vec/core/block.h"
#include "vec/data_types/data_type_bitmap.h"
@@ -78,9 +80,16 @@ Status JavaFunctionCall::prepare(FunctionContext* context,
ctor_params.__set_input_offsets_ptrs((int64_t)jni_ctx->input_offsets_ptrs.get());
ctor_params.__set_input_buffer_ptrs((int64_t)jni_ctx->input_values_buffer_ptr.get());
ctor_params.__set_input_nulls_ptrs((int64_t)jni_ctx->input_nulls_buffer_ptr.get());
+ ctor_params.__set_input_array_nulls_buffer_ptr(
+ (int64_t)jni_ctx->input_array_nulls_buffer_ptr.get());
+ ctor_params.__set_input_array_string_offsets_ptrs(
+ (int64_t)jni_ctx->input_array_string_offsets_ptrs.get());
ctor_params.__set_output_buffer_ptr((int64_t)jni_ctx->output_value_buffer.get());
ctor_params.__set_output_null_ptr((int64_t)jni_ctx->output_null_value.get());
ctor_params.__set_output_offsets_ptr((int64_t)jni_ctx->output_offsets_ptr.get());
+
ctor_params.__set_output_array_null_ptr((int64_t)jni_ctx->output_array_null_ptr.get());
+ ctor_params.__set_output_array_string_offsets_ptr(
+ (int64_t)jni_ctx->output_array_string_offsets_ptr.get());
ctor_params.__set_output_intermediate_state_ptr(
(int64_t)jni_ctx->output_intermediate_state_ptr.get());
ctor_params.__set_batch_size_ptr((int64_t)jni_ctx->batch_size_ptr.get());
@@ -142,6 +151,31 @@ Status JavaFunctionCall::execute(FunctionContext* context,
Block& block,
} else if (data_cols[arg_idx]->is_numeric() ||
data_cols[arg_idx]->is_column_decimal()) {
jni_ctx->input_values_buffer_ptr.get()[arg_idx] =
reinterpret_cast<int64_t>(data_cols[arg_idx]->get_raw_data().data);
+ } else if (data_cols[arg_idx]->is_column_array()) {
+ const ColumnArray* array_col =
+ assert_cast<const ColumnArray*>(data_cols[arg_idx].get());
+ jni_ctx->input_offsets_ptrs.get()[arg_idx] =
+
reinterpret_cast<int64_t>(array_col->get_offsets_column().get_raw_data().data);
+ const ColumnNullable& array_nested_nullable =
+ assert_cast<const ColumnNullable&>(array_col->get_data());
+ auto data_column_null_map =
array_nested_nullable.get_null_map_column_ptr();
+ auto data_column = array_nested_nullable.get_nested_column_ptr();
+ jni_ctx->input_array_nulls_buffer_ptr.get()[arg_idx] =
reinterpret_cast<int64_t>(
+
check_and_get_column<ColumnVector<UInt8>>(data_column_null_map)
+ ->get_data()
+ .data());
+
+ //need pass FE, nullamp and offset, chars
+ if (data_column->is_column_string()) {
+ const ColumnString* col = assert_cast<const
ColumnString*>(data_column.get());
+ jni_ctx->input_values_buffer_ptr.get()[arg_idx] =
+ reinterpret_cast<int64_t>(col->get_chars().data());
+ jni_ctx->input_array_string_offsets_ptrs.get()[arg_idx] =
+ reinterpret_cast<int64_t>(col->get_offsets().data());
+ } else {
+ jni_ctx->input_values_buffer_ptr.get()[arg_idx] =
+
reinterpret_cast<int64_t>(data_column->get_raw_data().data);
+ }
} else {
return Status::InvalidArgument(
strings::Substitute("Java UDF doesn't support type $0 now
!",
@@ -155,7 +189,6 @@ Status JavaFunctionCall::execute(FunctionContext* context,
Block& block,
auto null_type = std::reinterpret_pointer_cast<const
DataTypeNullable>(return_type);
auto data_col = null_type->get_nested_type()->create_column();
auto null_col = ColumnUInt8::create(data_col->size(), 0);
- null_col->reserve(num_rows);
null_col->resize(num_rows);
*(jni_ctx->output_null_value) =
reinterpret_cast<int64_t>(null_col->get_data().data());
@@ -168,9 +201,7 @@ Status JavaFunctionCall::execute(FunctionContext* context,
Block& block,
const_cast<ColumnString::Offsets&>(str_col->get_offsets());
\
int increase_buffer_size = 0;
\
int32_t buffer_size =
JniUtil::IncreaseReservedBufferSize(increase_buffer_size); \
- chars.reserve(buffer_size);
\
chars.resize(buffer_size);
\
- offsets.reserve(num_rows);
\
offsets.resize(num_rows);
\
*(jni_ctx->output_value_buffer) =
reinterpret_cast<int64_t>(chars.data()); \
*(jni_ctx->output_offsets_ptr) =
reinterpret_cast<int64_t>(offsets.data()); \
@@ -188,12 +219,74 @@ Status JavaFunctionCall::execute(FunctionContext*
context, Block& block,
nullptr);
\
}
\
} else if (data_col->is_numeric() || data_col->is_column_decimal()) {
\
- data_col->reserve(num_rows);
\
data_col->resize(num_rows);
\
*(jni_ctx->output_value_buffer) =
\
reinterpret_cast<int64_t>(data_col->get_raw_data().data);
\
env->CallNonvirtualVoidMethodA(jni_ctx->executor, executor_cl_,
executor_evaluate_id_, \
nullptr);
\
+ } else if (data_col->is_column_array()) {
\
+ ColumnArray* array_col = assert_cast<ColumnArray*>(data_col.get());
\
+ ColumnNullable& array_nested_nullable =
\
+ assert_cast<ColumnNullable&>(array_col->get_data());
\
+ auto data_column_null_map =
array_nested_nullable.get_null_map_column_ptr(); \
+ auto data_column = array_nested_nullable.get_nested_column_ptr();
\
+ auto& offset_column = array_col->get_offsets_column();
\
+ int increase_buffer_size = 0;
\
+ int32_t buffer_size =
JniUtil::IncreaseReservedBufferSize(increase_buffer_size); \
+ offset_column.resize(num_rows);
\
+ *(jni_ctx->output_offsets_ptr) =
\
+ reinterpret_cast<int64_t>(offset_column.get_raw_data().data);
\
+ data_column_null_map->resize(buffer_size);
\
+ auto& null_map_data =
\
+
assert_cast<ColumnVector<UInt8>*>(data_column_null_map.get())->get_data();
\
+ *(jni_ctx->output_array_null_ptr) =
reinterpret_cast<int64_t>(null_map_data.data()); \
+ jni_ctx->output_intermediate_state_ptr->row_idx = 0;
\
+ jni_ctx->output_intermediate_state_ptr->buffer_size = buffer_size;
\
+ if (data_column->is_column_string()) {
\
+ ColumnString* str_col =
assert_cast<ColumnString*>(data_column.get()); \
+ ColumnString::Chars& chars =
assert_cast<ColumnString::Chars&>(str_col->get_chars()); \
+ ColumnString::Offsets& offsets =
\
+
assert_cast<ColumnString::Offsets&>(str_col->get_offsets()); \
+ chars.resize(buffer_size);
\
+ offsets.resize(buffer_size);
\
+ *(jni_ctx->output_value_buffer) =
reinterpret_cast<int64_t>(chars.data()); \
+ *(jni_ctx->output_array_string_offsets_ptr) =
\
+ reinterpret_cast<int64_t>(offsets.data());
\
+ env->CallNonvirtualVoidMethodA(jni_ctx->executor, executor_cl_,
executor_evaluate_id_, \
+ nullptr);
\
+ while (jni_ctx->output_intermediate_state_ptr->row_idx < num_rows)
{ \
+ increase_buffer_size++;
\
+ buffer_size =
JniUtil::IncreaseReservedBufferSize(increase_buffer_size); \
+ null_map_data.resize(buffer_size);
\
+ chars.resize(buffer_size);
\
+ offsets.resize(buffer_size);
\
+ *(jni_ctx->output_array_null_ptr) =
\
+ reinterpret_cast<int64_t>(null_map_data.data());
\
+ *(jni_ctx->output_value_buffer) =
reinterpret_cast<int64_t>(chars.data()); \
+ jni_ctx->output_intermediate_state_ptr->buffer_size =
buffer_size; \
+ env->CallNonvirtualVoidMethodA(jni_ctx->executor,
executor_cl_, \
+ executor_evaluate_id_,
nullptr); \
+ }
\
+ } else {
\
+ data_column->resize(buffer_size);
\
+ *(jni_ctx->output_value_buffer) =
\
+
reinterpret_cast<int64_t>(data_column->get_raw_data().data); \
+ env->CallNonvirtualVoidMethodA(jni_ctx->executor, executor_cl_,
executor_evaluate_id_, \
+ nullptr);
\
+ while (jni_ctx->output_intermediate_state_ptr->row_idx < num_rows)
{ \
+ increase_buffer_size++;
\
+ buffer_size =
JniUtil::IncreaseReservedBufferSize(increase_buffer_size); \
+ null_map_data.resize(buffer_size);
\
+ data_column->resize(buffer_size);
\
+ *(jni_ctx->output_array_null_ptr) =
\
+ reinterpret_cast<int64_t>(null_map_data.data());
\
+ *(jni_ctx->output_value_buffer) =
\
+
reinterpret_cast<int64_t>(data_column->get_raw_data().data); \
+ jni_ctx->output_intermediate_state_ptr->buffer_size =
buffer_size; \
+ env->CallNonvirtualVoidMethodA(jni_ctx->executor,
executor_cl_, \
+ executor_evaluate_id_,
nullptr); \
+ }
\
+ }
\
} else {
\
return Status::InvalidArgument(strings::Substitute(
\
"Java UDF doesn't support return type $0 now !",
return_type->get_name())); \
diff --git a/be/src/vec/functions/function_java_udf.h
b/be/src/vec/functions/function_java_udf.h
index 9d9a4f8062..7ffa456d82 100644
--- a/be/src/vec/functions/function_java_udf.h
+++ b/be/src/vec/functions/function_java_udf.h
@@ -89,9 +89,17 @@ private:
std::unique_ptr<int64_t[]> input_values_buffer_ptr;
std::unique_ptr<int64_t[]> input_nulls_buffer_ptr;
std::unique_ptr<int64_t[]> input_offsets_ptrs;
+ //used for array type nested column null map, because array nested
column must be nullable
+ std::unique_ptr<int64_t[]> input_array_nulls_buffer_ptr;
+ //used for array type of nested string column offset, not the array
column offset
+ std::unique_ptr<int64_t[]> input_array_string_offsets_ptrs;
std::unique_ptr<int64_t> output_value_buffer;
std::unique_ptr<int64_t> output_null_value;
std::unique_ptr<int64_t> output_offsets_ptr;
+ //used for array type nested column null map
+ std::unique_ptr<int64_t> output_array_null_ptr;
+ //used for array type of nested string column offset
+ std::unique_ptr<int64_t> output_array_string_offsets_ptr;
std::unique_ptr<int32_t> batch_size_ptr;
// intermediate_state includes two parts: reserved / used buffer size
and rows
std::unique_ptr<IntermediateState> output_intermediate_state_ptr;
@@ -101,9 +109,13 @@ private:
input_values_buffer_ptr(new int64_t[num_args]),
input_nulls_buffer_ptr(new int64_t[num_args]),
input_offsets_ptrs(new int64_t[num_args]),
+ input_array_nulls_buffer_ptr(new int64_t[num_args]),
+ input_array_string_offsets_ptrs(new int64_t[num_args]),
output_value_buffer(new int64_t()),
output_null_value(new int64_t()),
output_offsets_ptr(new int64_t()),
+ output_array_null_ptr(new int64_t()),
+ output_array_string_offsets_ptr(new int64_t()),
batch_size_ptr(new int32_t()),
output_intermediate_state_ptr(new IntermediateState()) {}
diff --git a/docs/en/docs/ecosystem/udf/java-user-defined-function.md
b/docs/en/docs/ecosystem/udf/java-user-defined-function.md
index c70a6c6a12..06c421f39c 100644
--- a/docs/en/docs/ecosystem/udf/java-user-defined-function.md
+++ b/docs/en/docs/ecosystem/udf/java-user-defined-function.md
@@ -58,7 +58,9 @@ Java UDF provides users with a Java interface written in UDF
to facilitate the e
|Datetime|LocalDateTime|
|String|String|
|Decimal|BigDecimal|
+|```array<Type>```|```ArrayList<Type>```|
+* Array types can nested basic types, Eg: In Doris: ```array<int>```
corresponds to JAVA UDF Argument Type: ```ArrayList<Integer>```, Others is also.
## Write UDF functions
This section mainly introduces how to develop a Java UDF. Samples for the Java
version are provided under `samples/doris-demo/java-udf-demo/` for your
reference, Check it out
[here](https://github.com/apache/incubator-doris/tree/master/samples/doris-demo/java-udf-demo)
diff --git a/docs/zh-CN/docs/ecosystem/udf/java-user-defined-function.md
b/docs/zh-CN/docs/ecosystem/udf/java-user-defined-function.md
index 805e8ddddd..fa2fd6ec56 100644
--- a/docs/zh-CN/docs/ecosystem/udf/java-user-defined-function.md
+++ b/docs/zh-CN/docs/ecosystem/udf/java-user-defined-function.md
@@ -56,7 +56,9 @@ Java UDF 为用户提供UDF编写的Java接口,以方便用户使用Java语言
|Datetime|LocalDateTime|
|String|String|
|Decimal|BigDecimal|
+|```array<Type>```|```ArrayList<Type>```|
+* array类型可以嵌套基本类型,例如Doris: ```array<int>```对应JAVA UDF Argument Type:
```ArrayList<Integer>```, 其他依此类推
## 编写 UDF 函数
本小节主要介绍如何开发一个 Java UDF。在 `samples/doris-demo/java-udf-demo/`
下提供了示例,可供参考,查看点击[这里](https://github.com/apache/doris/tree/master/samples/doris-demo/java-udf-demo)
diff --git a/fe/fe-common/src/main/java/org/apache/doris/catalog/Type.java
b/fe/fe-common/src/main/java/org/apache/doris/catalog/Type.java
index 61e22fb32a..8829fe8687 100644
--- a/fe/fe-common/src/main/java/org/apache/doris/catalog/Type.java
+++ b/fe/fe-common/src/main/java/org/apache/doris/catalog/Type.java
@@ -238,6 +238,7 @@ public abstract class Type {
.put(PrimitiveType.DECIMAL32,
Sets.newHashSet(BigDecimal.class))
.put(PrimitiveType.DECIMAL64,
Sets.newHashSet(BigDecimal.class))
.put(PrimitiveType.DECIMAL128,
Sets.newHashSet(BigDecimal.class))
+ .put(PrimitiveType.ARRAY, Sets.newHashSet(ArrayList.class))
.build();
public static ArrayList<ScalarType> getIntegerTypes() {
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/analysis/CreateFunctionStmt.java
b/fe/fe-core/src/main/java/org/apache/doris/analysis/CreateFunctionStmt.java
index 1a9e90fb91..179b2f32ed 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/analysis/CreateFunctionStmt.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/CreateFunctionStmt.java
@@ -19,6 +19,7 @@ package org.apache.doris.analysis;
import org.apache.doris.catalog.AggregateFunction;
import org.apache.doris.catalog.AliasFunction;
+import org.apache.doris.catalog.ArrayType;
import org.apache.doris.catalog.Env;
import org.apache.doris.catalog.Function;
import org.apache.doris.catalog.Function.NullableMode;
@@ -541,17 +542,23 @@ public class CreateFunctionStmt extends DdlStmt {
private void checkUdfType(Class clazz, Method method, Type expType, Class
pType, String pname)
throws AnalysisException {
- if (!(expType instanceof ScalarType)) {
+ Set<Class> javaTypes;
+ if (expType instanceof ScalarType) {
+ ScalarType scalarType = (ScalarType) expType;
+ javaTypes =
Type.PrimitiveTypeToJavaClassType.get(scalarType.getPrimitiveType());
+ } else if (expType instanceof ArrayType) {
+ ArrayType arrayType = (ArrayType) expType;
+ javaTypes =
Type.PrimitiveTypeToJavaClassType.get(arrayType.getPrimitiveType());
+ } else {
throw new AnalysisException(
- String.format("Method '%s' in class '%s' does not support
non-scalar type '%s'",
+ String.format("Method '%s' in class '%s' does not support
type '%s'",
method.getName(), clazz.getCanonicalName(),
expType));
}
- ScalarType scalarType = (ScalarType) expType;
- Set<Class> javaTypes =
Type.PrimitiveTypeToJavaClassType.get(scalarType.getPrimitiveType());
+
if (javaTypes == null) {
throw new AnalysisException(
String.format("Method '%s' in class '%s' does not support
type '%s'",
- method.getName(), clazz.getCanonicalName(),
scalarType));
+ method.getName(), clazz.getCanonicalName(),
expType.toString()));
}
if (!javaTypes.contains(pType)) {
throw new AnalysisException(
diff --git a/fe/java-udf/src/main/java/org/apache/doris/udf/BaseExecutor.java
b/fe/java-udf/src/main/java/org/apache/doris/udf/BaseExecutor.java
index 9e481b521a..69a132315e 100644
--- a/fe/java-udf/src/main/java/org/apache/doris/udf/BaseExecutor.java
+++ b/fe/java-udf/src/main/java/org/apache/doris/udf/BaseExecutor.java
@@ -17,6 +17,7 @@
package org.apache.doris.udf;
+import org.apache.doris.catalog.PrimitiveType;
import org.apache.doris.catalog.Type;
import org.apache.doris.thrift.TJavaUdfExecutorCtorParams;
import org.apache.doris.udf.UdfUtils.JavaUdfDataType;
@@ -32,6 +33,9 @@ import java.math.BigInteger;
import java.math.RoundingMode;
import java.net.URLClassLoader;
import java.nio.charset.StandardCharsets;
+import java.time.LocalDate;
+import java.time.LocalDateTime;
+import java.util.ArrayList;
import java.util.Arrays;
public abstract class BaseExecutor {
@@ -48,33 +52,40 @@ public abstract class BaseExecutor {
public static final String UDAF_RESULT_FUNCTION = "getValue";
// Object to deserialize ctor params from BE.
- protected static final TBinaryProtocol.Factory PROTOCOL_FACTORY =
- new TBinaryProtocol.Factory();
+ protected static final TBinaryProtocol.Factory PROTOCOL_FACTORY = new
TBinaryProtocol.Factory();
protected Object udf;
// setup by init() and cleared by close()
protected URLClassLoader classLoader;
- // Return and argument types of the function inferred from the udf method
signature.
+ // Return and argument types of the function inferred from the udf method
+ // signature.
// The JavaUdfDataType enum maps it to corresponding primitive type.
protected JavaUdfDataType[] argTypes;
protected JavaUdfDataType retType;
- // Input buffer from the backend. This is valid for the duration of an
evaluate() call.
+ // Input buffer from the backend. This is valid for the duration of an
+ // evaluate() call.
// These buffers are allocated in the BE.
protected final long inputBufferPtrs;
protected final long inputNullsPtrs;
protected final long inputOffsetsPtrs;
+ protected final long inputArrayNullsPtrs;
+ protected final long inputArrayStringOffsetsPtrs;
- // Output buffer to return non-string values. These buffers are allocated
in the BE.
+ // Output buffer to return non-string values. These buffers are allocated
in the
+ // BE.
protected final long outputBufferPtr;
protected final long outputNullPtr;
protected final long outputOffsetsPtr;
+ protected final long outputArrayNullPtr;
+ protected final long outputArrayStringOffsetsPtr;
protected final long outputIntermediateStatePtr;
protected Class[] argClass;
/**
- * Create a UdfExecutor, using parameters from a serialized thrift object.
Used by
+ * Create a UdfExecutor, using parameters from a serialized thrift object.
Used
+ * by
* the backend.
*/
public BaseExecutor(byte[] thriftParams) throws Exception {
@@ -88,11 +99,14 @@ public abstract class BaseExecutor {
inputBufferPtrs = request.input_buffer_ptrs;
inputNullsPtrs = request.input_nulls_ptrs;
inputOffsetsPtrs = request.input_offsets_ptrs;
-
+ inputArrayNullsPtrs = request.input_array_nulls_buffer_ptr;
+ inputArrayStringOffsetsPtrs = request.input_array_string_offsets_ptrs;
outputBufferPtr = request.output_buffer_ptr;
outputNullPtr = request.output_null_ptr;
outputOffsetsPtr = request.output_offsets_ptr;
outputIntermediateStatePtr = request.output_intermediate_state_ptr;
+ outputArrayNullPtr = request.output_array_null_ptr;
+ outputArrayStringOffsetsPtr = request.output_array_string_offsets_ptr;
Type[] parameterTypes = new Type[request.fn.arg_types.size()];
for (int i = 0; i < request.fn.arg_types.size(); ++i) {
@@ -206,18 +220,24 @@ public abstract class BaseExecutor {
case STRING: {
long offset =
Integer.toUnsignedLong(UdfUtils.UNSAFE.getInt(null,
UdfUtils.UNSAFE.getLong(null,
UdfUtils.getAddressAtOffset(inputOffsetsPtrs, i))
+ 4L * row));
- long numBytes = row == 0 ? offset : offset -
Integer.toUnsignedLong(UdfUtils.UNSAFE.getInt(null,
- UdfUtils.UNSAFE.getLong(null,
-
UdfUtils.getAddressAtOffset(inputOffsetsPtrs, i)) + 4L * (row - 1)));
- long base =
- row == 0 ? UdfUtils.UNSAFE.getLong(null,
UdfUtils.getAddressAtOffset(inputBufferPtrs, i)) :
- UdfUtils.UNSAFE.getLong(null,
UdfUtils.getAddressAtOffset(inputBufferPtrs, i))
- + offset - numBytes;
+ long numBytes = row == 0 ? offset
+ : offset -
Integer.toUnsignedLong(UdfUtils.UNSAFE.getInt(null,
+ UdfUtils.UNSAFE.getLong(null,
+
UdfUtils.getAddressAtOffset(inputOffsetsPtrs, i)) + 4L * (row - 1)));
+ long base = row == 0
+ ? UdfUtils.UNSAFE.getLong(null,
UdfUtils.getAddressAtOffset(inputBufferPtrs, i))
+ : UdfUtils.UNSAFE.getLong(null,
UdfUtils.getAddressAtOffset(inputBufferPtrs, i))
+ + offset - numBytes;
byte[] bytes = new byte[(int) numBytes];
UdfUtils.copyMemory(null, base, bytes,
UdfUtils.BYTE_ARRAY_OFFSET, numBytes);
inputObjects[i] = new String(bytes,
StandardCharsets.UTF_8);
break;
}
+ case ARRAY_TYPE: {
+ Type type = argTypes[i].getItemType();
+ inputObjects[i] = arrayTypeInputData(type, i, row);
+ break;
+ }
default:
throw new UdfRuntimeException("Unsupported argument type:
" + argTypes[i]);
}
@@ -225,6 +245,233 @@ public abstract class BaseExecutor {
return inputObjects;
}
+ public ArrayList<?> arrayTypeInputData(Type type, int argIdx, long row)
+ throws UdfRuntimeException {
+ long offsetStart = (row == 0) ? 0
+ : Integer.toUnsignedLong(UdfUtils.UNSAFE.getInt(null,
UdfUtils.UNSAFE.getLong(null,
+ UdfUtils.getAddressAtOffset(inputOffsetsPtrs, argIdx))
+ 8L * (row - 1)));
+ long offsetEnd = Integer.toUnsignedLong(UdfUtils.UNSAFE.getInt(null,
UdfUtils.UNSAFE.getLong(null,
+ UdfUtils.getAddressAtOffset(inputOffsetsPtrs, argIdx)) + 8L *
row));
+ long arrayNullMapBase = UdfUtils.UNSAFE.getLong(null,
UdfUtils.getAddressAtOffset(inputArrayNullsPtrs, argIdx));
+ long arrayInputBufferBase = UdfUtils.UNSAFE.getLong(null,
UdfUtils.getAddressAtOffset(inputBufferPtrs, argIdx));
+
+ switch (type.getPrimitiveType()) {
+ case BOOLEAN: {
+ ArrayList<Boolean> data = new ArrayList<>();
+ for (long offsetRow = offsetStart; offsetRow < offsetEnd;
++offsetRow) {
+ if ((UdfUtils.UNSAFE.getByte(null, arrayNullMapBase +
offsetRow) == 1)) {
+ data.add(null);
+ } else {
+ boolean value = UdfUtils.UNSAFE.getBoolean(null,
arrayInputBufferBase + offsetRow);
+ data.add(value);
+ }
+ }
+ return data;
+ }
+ case TINYINT: {
+ ArrayList<Byte> data = new ArrayList<>();
+ for (long offsetRow = offsetStart; offsetRow < offsetEnd;
++offsetRow) {
+ if ((UdfUtils.UNSAFE.getByte(null, arrayNullMapBase +
offsetRow) == 1)) {
+ data.add(null);
+ } else {
+ byte value = UdfUtils.UNSAFE.getByte(null,
arrayInputBufferBase + offsetRow);
+ data.add(value);
+ }
+ }
+ return data;
+ }
+ case SMALLINT: {
+ ArrayList<Short> data = new ArrayList<>();
+ for (long offsetRow = offsetStart; offsetRow < offsetEnd;
++offsetRow) {
+ if ((UdfUtils.UNSAFE.getByte(null, arrayNullMapBase +
offsetRow) == 1)) {
+ data.add(null);
+ } else {
+ short value = UdfUtils.UNSAFE.getShort(null,
arrayInputBufferBase + 2L * offsetRow);
+ data.add(value);
+ }
+ }
+ return data;
+ }
+ case INT: {
+ ArrayList<Integer> data = new ArrayList<>();
+ for (long offsetRow = offsetStart; offsetRow < offsetEnd;
++offsetRow) {
+ if ((UdfUtils.UNSAFE.getByte(null, arrayNullMapBase +
offsetRow) == 1)) {
+ data.add(null);
+ } else {
+ int value = UdfUtils.UNSAFE.getInt(null,
arrayInputBufferBase + 4L * offsetRow);
+ data.add(value);
+ }
+ }
+ return data;
+ }
+ case BIGINT: {
+ ArrayList<Long> data = new ArrayList<>();
+ for (long offsetRow = offsetStart; offsetRow < offsetEnd;
++offsetRow) {
+ if ((UdfUtils.UNSAFE.getByte(null, arrayNullMapBase +
offsetRow) == 1)) {
+ data.add(null);
+ } else {
+ long value = UdfUtils.UNSAFE.getLong(null,
arrayInputBufferBase + 8L * offsetRow);
+ data.add(value);
+ }
+ }
+ return data;
+ }
+ case FLOAT: {
+ ArrayList<Float> data = new ArrayList<>();
+ for (long offsetRow = offsetStart; offsetRow < offsetEnd;
++offsetRow) {
+ if ((UdfUtils.UNSAFE.getByte(null, arrayNullMapBase +
offsetRow) == 1)) {
+ data.add(null);
+ } else {
+ float value = UdfUtils.UNSAFE.getFloat(null,
arrayInputBufferBase + 4L * offsetRow);
+ data.add(value);
+ }
+ }
+ return data;
+ }
+ case DOUBLE: {
+ ArrayList<Double> data = new ArrayList<>();
+ for (long offsetRow = offsetStart; offsetRow < offsetEnd;
++offsetRow) {
+ if ((UdfUtils.UNSAFE.getByte(null, arrayNullMapBase +
offsetRow) == 1)) {
+ data.add(null);
+ } else {
+ double value = UdfUtils.UNSAFE.getDouble(null,
arrayInputBufferBase + 8L * offsetRow);
+ data.add(value);
+ }
+ }
+ return data;
+ }
+ case DATE: {
+ ArrayList<LocalDate> data = new ArrayList<>();
+ for (long offsetRow = offsetStart; offsetRow < offsetEnd;
++offsetRow) {
+ if ((UdfUtils.UNSAFE.getByte(null, arrayNullMapBase +
offsetRow) == 1)) {
+ data.add(null);
+ } else {
+ long value = UdfUtils.UNSAFE.getLong(null,
arrayInputBufferBase + 8L * offsetRow);
+ // TODO: now argClass[argIdx + argClassOffset] is
java.util.ArrayList, can't get
+ // nested class type
+ // LocalDate obj =
UdfUtils.convertDateToJavaDate(value, argClass[argIdx +
+ // argClassOffset]);
+ LocalDate obj = (LocalDate)
UdfUtils.convertDateToJavaDate(value, LocalDate.class);
+ data.add(obj);
+ }
+ }
+ return data;
+ }
+ case DATETIME: {
+ ArrayList<LocalDateTime> data = new ArrayList<>();
+ for (long offsetRow = offsetStart; offsetRow < offsetEnd;
++offsetRow) {
+ if ((UdfUtils.UNSAFE.getByte(null, arrayNullMapBase +
offsetRow) == 1)) {
+ data.add(null);
+ } else {
+ long value = UdfUtils.UNSAFE.getLong(null,
arrayInputBufferBase + 8L * offsetRow);
+ // Object obj =
UdfUtils.convertDateTimeToJavaDateTime(value, argClass[argIdx +
+ // argClassOffset]);
+ LocalDateTime obj = (LocalDateTime)
UdfUtils.convertDateTimeToJavaDateTime(value,
+ LocalDateTime.class);
+ data.add(obj);
+ }
+ }
+ return data;
+ }
+ case DATEV2: {
+ ArrayList<LocalDate> data = new ArrayList<>();
+ for (long offsetRow = offsetStart; offsetRow < offsetEnd;
++offsetRow) {
+ if ((UdfUtils.UNSAFE.getByte(null, arrayNullMapBase +
offsetRow) == 1)) {
+ data.add(null);
+ } else {
+ int value = UdfUtils.UNSAFE.getInt(null,
arrayInputBufferBase + 4L * offsetRow);
+ // Object obj =
UdfUtils.convertDateV2ToJavaDate(value, argClass[argIdx +
+ // argClassOffset]);
+ LocalDate obj = (LocalDate)
UdfUtils.convertDateV2ToJavaDate(value, LocalDate.class);
+ data.add(obj);
+ }
+ }
+ return data;
+ }
+ case DATETIMEV2: {
+ ArrayList<LocalDateTime> data = new ArrayList<>();
+ for (long offsetRow = offsetStart; offsetRow < offsetEnd;
++offsetRow) {
+ if ((UdfUtils.UNSAFE.getByte(null, arrayNullMapBase +
offsetRow) == 1)) {
+ data.add(null);
+ } else {
+ long value = UdfUtils.UNSAFE.getLong(null,
arrayInputBufferBase + 8L * offsetRow);
+ LocalDateTime obj = (LocalDateTime)
UdfUtils.convertDateTimeV2ToJavaDateTime(value,
+ LocalDateTime.class);
+ data.add(obj);
+ }
+ }
+ return data;
+ }
+ case LARGEINT: {
+ ArrayList<BigInteger> data = new ArrayList<>();
+ byte[] bytes = new byte[16];
+ for (long offsetRow = offsetStart; offsetRow < offsetEnd;
++offsetRow) {
+ if ((UdfUtils.UNSAFE.getByte(null, arrayNullMapBase +
offsetRow) == 1)) {
+ data.add(null);
+ } else {
+ long value = UdfUtils.UNSAFE.getLong(null,
arrayInputBufferBase + 16L * offsetRow);
+ UdfUtils.copyMemory(null, value, bytes,
UdfUtils.BYTE_ARRAY_OFFSET, 16);
+ data.add(new
BigInteger(UdfUtils.convertByteOrder(bytes)));
+ }
+ }
+ return data;
+ }
+ case DECIMALV2:
+ case DECIMAL32:
+ case DECIMAL64:
+ case DECIMAL128: {
+ int len;
+ if (type.getPrimitiveType() == PrimitiveType.DECIMAL32) {
+ len = 4;
+ } else if (type.getPrimitiveType() == PrimitiveType.DECIMAL64)
{
+ len = 8;
+ } else {
+ len = 16;
+ }
+ ArrayList<BigDecimal> data = new ArrayList<>();
+ byte[] bytes = new byte[len];
+ for (long offsetRow = offsetStart; offsetRow < offsetEnd;
++offsetRow) {
+ if ((UdfUtils.UNSAFE.getByte(null, arrayNullMapBase +
offsetRow) == 1)) {
+ data.add(null);
+ } else {
+ long value = UdfUtils.UNSAFE.getLong(null,
arrayInputBufferBase + len * offsetRow);
+ UdfUtils.copyMemory(null, value, bytes,
UdfUtils.BYTE_ARRAY_OFFSET, len);
+ BigInteger bigInteger = new
BigInteger(UdfUtils.convertByteOrder(bytes));
+ data.add(new BigDecimal(bigInteger,
argTypes[argIdx].getScale()));
+ }
+ }
+ return data;
+ }
+ case CHAR:
+ case VARCHAR:
+ case STRING: {
+ ArrayList<String> data = new ArrayList<>();
+ long strOffsetBase = UdfUtils.UNSAFE
+ .getLong(null,
UdfUtils.getAddressAtOffset(inputArrayStringOffsetsPtrs, argIdx));
+ for (long offsetRow = offsetStart; offsetRow < offsetEnd;
++offsetRow) {
+ if ((UdfUtils.UNSAFE.getByte(null, arrayNullMapBase +
offsetRow) == 1)) {
+ data.add(null);
+ } else {
+ long stringOffsetStart = (offsetRow == 0) ? 0
+ : Integer.toUnsignedLong(
+ UdfUtils.UNSAFE.getInt(null,
strOffsetBase + 4L * (offsetRow - 1)));
+ long stringOffsetEnd = Integer
+ .toUnsignedLong(UdfUtils.UNSAFE.getInt(null,
strOffsetBase + 4L * offsetRow));
+
+ long numBytes = stringOffsetEnd - stringOffsetStart;
+ long base = arrayInputBufferBase + stringOffsetStart;
+ byte[] bytes = new byte[(int) numBytes];
+ UdfUtils.copyMemory(null, base, bytes,
UdfUtils.BYTE_ARRAY_OFFSET, numBytes);
+ data.add(new String(bytes, StandardCharsets.UTF_8));
+ }
+ }
+ return data;
+ }
+ default:
+ throw new UdfRuntimeException("Unsupported argument type in
nested array: " + type);
+ }
+ }
+
protected abstract long getCurrentOutputOffset(long row);
/**
@@ -380,11 +627,394 @@ public abstract class BaseExecutor {
updateOutputOffset(offset);
return true;
}
+ case ARRAY_TYPE: {
+ Type type = retType.getItemType();
+ return arrayTypeOutputData(obj, type, row);
+ }
default:
throw new UdfRuntimeException("Unsupported return type: " +
retType);
}
}
+ public boolean arrayTypeOutputData(Object obj, Type type, long row) throws
UdfRuntimeException {
+ long offset = getCurrentOutputOffset(row);
+ long bufferSize = UdfUtils.UNSAFE.getLong(null,
outputIntermediateStatePtr);
+ long outputNullMapBase = UdfUtils.UNSAFE.getLong(null,
outputArrayNullPtr);
+ long outputBufferBase = UdfUtils.UNSAFE.getLong(null, outputBufferPtr);
+ switch (type.getPrimitiveType()) {
+ case BOOLEAN: {
+ ArrayList<Boolean> data = (ArrayList<Boolean>) obj;
+ int num = data.size();
+ if (offset + num > bufferSize) {
+ return false;
+ }
+ for (int i = 0; i < num; ++i) {
+ Boolean value = data.get(i);
+ if (value == null) {
+ UdfUtils.UNSAFE.putByte(outputNullMapBase + (offset +
i), (byte) 1);
+ } else {
+ UdfUtils.UNSAFE.putByte(outputNullMapBase + (offset +
i), (byte) 0);
+ UdfUtils.UNSAFE.putByte(outputBufferBase + (offset +
i), value ? (byte) 1 : 0);
+ }
+ }
+ offset += num;
+ UdfUtils.UNSAFE.putLong(null, UdfUtils.UNSAFE.getLong(null,
outputOffsetsPtr) + 8L * row,
+ Long.parseUnsignedLong(String.valueOf(offset)));
+ updateOutputOffset(offset);
+ return true;
+ }
+ case TINYINT: {
+ ArrayList<Byte> data = (ArrayList<Byte>) obj;
+ int num = data.size();
+ if (offset + num > bufferSize) {
+ return false;
+ }
+ for (int i = 0; i < num; ++i) {
+ Byte value = data.get(i);
+ if (value == null) {
+ UdfUtils.UNSAFE.putByte(outputNullMapBase + (offset +
i), (byte) 1);
+ } else {
+ UdfUtils.UNSAFE.putByte(outputNullMapBase + (offset +
i), (byte) 0);
+ UdfUtils.UNSAFE.putByte(outputBufferBase + (offset +
i), value);
+ }
+ }
+ offset += num;
+ UdfUtils.UNSAFE.putLong(null, UdfUtils.UNSAFE.getLong(null,
outputOffsetsPtr) + 8L * row,
+ Long.parseUnsignedLong(String.valueOf(offset)));
+ updateOutputOffset(offset);
+ return true;
+ }
+ case SMALLINT: {
+ ArrayList<Short> data = (ArrayList<Short>) obj;
+ int num = data.size();
+ if (offset + num > bufferSize) {
+ return false;
+ }
+ for (int i = 0; i < num; ++i) {
+ Short value = data.get(i);
+ if (value == null) {
+ UdfUtils.UNSAFE.putByte(outputNullMapBase + (offset +
i), (byte) 1);
+ } else {
+ UdfUtils.UNSAFE.putByte(outputNullMapBase + (offset +
i), (byte) 0);
+ UdfUtils.UNSAFE.putShort(outputBufferBase + ((offset +
i) * 2L), value);
+ }
+ }
+ offset += num;
+ UdfUtils.UNSAFE.putLong(null, UdfUtils.UNSAFE.getLong(null,
outputOffsetsPtr) + 8L * row,
+ Long.parseUnsignedLong(String.valueOf(offset)));
+ updateOutputOffset(offset);
+ return true;
+ }
+ case INT: {
+ ArrayList<Integer> data = (ArrayList<Integer>) obj;
+ int num = data.size();
+ if (offset + num > bufferSize) {
+ return false;
+ }
+ for (int i = 0; i < num; ++i) {
+ Integer value = data.get(i);
+ if (value == null) {
+ UdfUtils.UNSAFE.putByte(outputNullMapBase + (offset +
i), (byte) 1);
+ } else {
+ UdfUtils.UNSAFE.putByte(outputNullMapBase + (offset +
i), (byte) 0);
+ UdfUtils.UNSAFE.putInt(outputBufferBase + ((offset +
i) * 4L), value);
+ }
+ }
+ offset += num;
+ UdfUtils.UNSAFE.putLong(null, UdfUtils.UNSAFE.getLong(null,
outputOffsetsPtr) + 8L * row,
+ Long.parseUnsignedLong(String.valueOf(offset)));
+ updateOutputOffset(offset);
+ return true;
+ }
+ case BIGINT: {
+ ArrayList<Long> data = (ArrayList<Long>) obj;
+ int num = data.size();
+ if (offset + num > bufferSize) {
+ return false;
+ }
+ for (int i = 0; i < num; ++i) {
+ Long value = data.get(i);
+ if (value == null) {
+ UdfUtils.UNSAFE.putByte(outputNullMapBase + (offset +
i), (byte) 1);
+ } else {
+ UdfUtils.UNSAFE.putByte(outputNullMapBase + (offset +
i), (byte) 0);
+ UdfUtils.UNSAFE.putLong(outputBufferBase + ((offset +
i) * 8L), value);
+ }
+ }
+ offset += num;
+ UdfUtils.UNSAFE.putLong(null, UdfUtils.UNSAFE.getLong(null,
outputOffsetsPtr) + 8L * row,
+ Long.parseUnsignedLong(String.valueOf(offset)));
+ updateOutputOffset(offset);
+ return true;
+ }
+ case FLOAT: {
+ ArrayList<Float> data = (ArrayList<Float>) obj;
+ int num = data.size();
+ if (offset + num > bufferSize) {
+ return false;
+ }
+ for (int i = 0; i < num; ++i) {
+ Float value = data.get(i);
+ if (value == null) {
+ UdfUtils.UNSAFE.putByte(outputNullMapBase + (offset +
i), (byte) 1);
+ } else {
+ UdfUtils.UNSAFE.putByte(outputNullMapBase + (offset +
i), (byte) 0);
+ UdfUtils.UNSAFE.putFloat(outputBufferBase + ((offset +
i) * 4L), value);
+ }
+ }
+ offset += num;
+ UdfUtils.UNSAFE.putLong(null, UdfUtils.UNSAFE.getLong(null,
outputOffsetsPtr) + 8L * row,
+ Long.parseUnsignedLong(String.valueOf(offset)));
+ updateOutputOffset(offset);
+ return true;
+ }
+ case DOUBLE: {
+ ArrayList<Double> data = (ArrayList<Double>) obj;
+ int num = data.size();
+ if (offset + num > bufferSize) {
+ return false;
+ }
+ for (int i = 0; i < num; ++i) {
+ Double value = data.get(i);
+ if (value == null) {
+ UdfUtils.UNSAFE.putByte(outputNullMapBase + (offset +
i), (byte) 1);
+ } else {
+ UdfUtils.UNSAFE.putByte(outputNullMapBase + (offset +
i), (byte) 0);
+ UdfUtils.UNSAFE.putDouble(outputBufferBase + ((offset
+ i) * 8L), value);
+ }
+ }
+ offset += num;
+ UdfUtils.UNSAFE.putLong(null, UdfUtils.UNSAFE.getLong(null,
outputOffsetsPtr) + 8L * row,
+ Long.parseUnsignedLong(String.valueOf(offset)));
+ updateOutputOffset(offset);
+ return true;
+ }
+ case DATE: {
+ ArrayList<LocalDate> data = (ArrayList<LocalDate>) obj;
+ int num = data.size();
+ if (offset + num > bufferSize) {
+ return false;
+ }
+ for (int i = 0; i < num; ++i) {
+ LocalDate value = data.get(i);
+ if (value == null) {
+ UdfUtils.UNSAFE.putByte(outputNullMapBase + (offset +
i), (byte) 1);
+ } else {
+ UdfUtils.UNSAFE.putByte(outputNullMapBase + (offset +
i), (byte) 0);
+ long time = UdfUtils.convertToDate(value,
LocalDate.class);
+ UdfUtils.UNSAFE.putLong(outputBufferBase + ((offset +
i) * 8L), time);
+ }
+ }
+ offset += num;
+ UdfUtils.UNSAFE.putLong(null, UdfUtils.UNSAFE.getLong(null,
outputOffsetsPtr) + 8L * row,
+ Long.parseUnsignedLong(String.valueOf(offset)));
+ updateOutputOffset(offset);
+ return true;
+ }
+ case DATETIME: {
+ ArrayList<LocalDateTime> data = (ArrayList<LocalDateTime>) obj;
+ int num = data.size();
+ if (offset + num > bufferSize) {
+ return false;
+ }
+ for (int i = 0; i < num; ++i) {
+ LocalDateTime value = data.get(i);
+ if (value == null) {
+ UdfUtils.UNSAFE.putByte(outputNullMapBase + (offset +
i), (byte) 1);
+ } else {
+ UdfUtils.UNSAFE.putByte(outputNullMapBase + (offset +
i), (byte) 0);
+ long time = UdfUtils.convertToDateTime(value,
LocalDateTime.class);
+ UdfUtils.UNSAFE.putLong(outputBufferBase + ((offset +
i) * 8L), time);
+ }
+ }
+ offset += num;
+ UdfUtils.UNSAFE.putLong(null, UdfUtils.UNSAFE.getLong(null,
outputOffsetsPtr) + 8L * row,
+ Long.parseUnsignedLong(String.valueOf(offset)));
+ updateOutputOffset(offset);
+ return true;
+ }
+ case DATEV2: {
+ ArrayList<LocalDate> data = (ArrayList<LocalDate>) obj;
+ int num = data.size();
+ if (offset + num > bufferSize) {
+ return false;
+ }
+ for (int i = 0; i < num; ++i) {
+ LocalDate value = data.get(i);
+ if (value == null) {
+ UdfUtils.UNSAFE.putByte(outputNullMapBase + (offset +
i), (byte) 1);
+ } else {
+ UdfUtils.UNSAFE.putByte(outputNullMapBase + (offset +
i), (byte) 0);
+ int time = UdfUtils.convertToDateV2(value,
LocalDate.class);
+ UdfUtils.UNSAFE.putInt(outputBufferBase + ((offset +
i) * 4L), time);
+ }
+ }
+ offset += num;
+ UdfUtils.UNSAFE.putLong(null, UdfUtils.UNSAFE.getLong(null,
outputOffsetsPtr) + 8L * row,
+ Long.parseUnsignedLong(String.valueOf(offset)));
+ updateOutputOffset(offset);
+ return true;
+ }
+ case DATETIMEV2: {
+ ArrayList<LocalDateTime> data = (ArrayList<LocalDateTime>) obj;
+ int num = data.size();
+ if (offset + num > bufferSize) {
+ return false;
+ }
+ for (int i = 0; i < num; ++i) {
+ LocalDateTime value = data.get(i);
+ if (value == null) {
+ UdfUtils.UNSAFE.putByte(outputNullMapBase + (offset +
i), (byte) 1);
+ } else {
+ UdfUtils.UNSAFE.putByte(outputNullMapBase + (offset +
i), (byte) 0);
+ long time = UdfUtils.convertToDateTimeV2(value,
LocalDateTime.class);
+ UdfUtils.UNSAFE.putLong(outputBufferBase + ((offset +
i) * 8L), time);
+ }
+ }
+ offset += num;
+ UdfUtils.UNSAFE.putLong(null, UdfUtils.UNSAFE.getLong(null,
outputOffsetsPtr) + 8L * row,
+ Long.parseUnsignedLong(String.valueOf(offset)));
+ updateOutputOffset(offset);
+ return true;
+ }
+ case LARGEINT: {
+ ArrayList<BigInteger> data = (ArrayList<BigInteger>) obj;
+ int num = data.size();
+ if (offset + num > bufferSize) {
+ return false;
+ }
+ for (int i = 0; i < num; ++i) {
+ BigInteger bigInteger = data.get(i);
+ if (bigInteger == null) {
+ UdfUtils.UNSAFE.putByte(outputNullMapBase + (offset +
i), (byte) 1);
+ } else {
+ UdfUtils.UNSAFE.putByte(outputNullMapBase + (offset +
i), (byte) 0);
+ byte[] bytes =
UdfUtils.convertByteOrder(bigInteger.toByteArray());
+ byte[] value = new byte[16];
+ // check data is negative
+ if (bigInteger.signum() == -1) {
+ Arrays.fill(value, (byte) -1);
+ }
+ for (int index = 0; index < Math.min(bytes.length,
value.length); ++index) {
+ value[index] = bytes[index];
+ }
+ UdfUtils.copyMemory(value, UdfUtils.BYTE_ARRAY_OFFSET,
null,
+ outputBufferBase + ((offset + i) * 16L),
value.length);
+ }
+ }
+ offset += num;
+ UdfUtils.UNSAFE.putLong(null, UdfUtils.UNSAFE.getLong(null,
outputOffsetsPtr) + 8L * row,
+ Long.parseUnsignedLong(String.valueOf(offset)));
+ updateOutputOffset(offset);
+ return true;
+ }
+ case DECIMALV2: {
+ ArrayList<BigDecimal> data = (ArrayList<BigDecimal>) obj;
+ int num = data.size();
+ if (offset + num > bufferSize) {
+ return false;
+ }
+ for (int i = 0; i < num; ++i) {
+ BigDecimal bigDecimal = data.get(i);
+ if (bigDecimal == null) {
+ UdfUtils.UNSAFE.putByte(outputNullMapBase + (offset +
i), (byte) 1);
+ } else {
+ BigInteger bigInteger = bigDecimal.setScale(9,
RoundingMode.HALF_EVEN).unscaledValue();
+ UdfUtils.UNSAFE.putByte(outputNullMapBase + (offset +
i), (byte) 0);
+ byte[] bytes =
UdfUtils.convertByteOrder(bigInteger.toByteArray());
+ byte[] value = new byte[16];
+ // check data is negative
+ if (bigInteger.signum() == -1) {
+ Arrays.fill(value, (byte) -1);
+ }
+ for (int index = 0; index < Math.min(bytes.length,
value.length); ++index) {
+ value[index] = bytes[index];
+ }
+ UdfUtils.copyMemory(value, UdfUtils.BYTE_ARRAY_OFFSET,
null,
+ outputBufferBase + ((offset + i) * 16L),
value.length);
+ }
+ }
+ offset += num;
+ UdfUtils.UNSAFE.putLong(null, UdfUtils.UNSAFE.getLong(null,
outputOffsetsPtr) + 8L * row,
+ Long.parseUnsignedLong(String.valueOf(offset)));
+ updateOutputOffset(offset);
+ return true;
+ }
+ case DECIMAL32:
+ case DECIMAL64:
+ case DECIMAL128: {
+ ArrayList<BigDecimal> data = (ArrayList<BigDecimal>) obj;
+ int num = data.size();
+ if (offset + num > bufferSize) {
+ return false;
+ }
+ for (int i = 0; i < num; ++i) {
+ BigDecimal bigDecimal = data.get(i);
+ if (bigDecimal == null) {
+ UdfUtils.UNSAFE.putByte(outputNullMapBase + (offset +
i), (byte) 1);
+ } else {
+ BigInteger bigInteger =
bigDecimal.setScale(retType.getScale(), RoundingMode.HALF_EVEN)
+ .unscaledValue();
+ UdfUtils.UNSAFE.putByte(outputNullMapBase + (offset +
i), (byte) 0);
+ byte[] bytes =
UdfUtils.convertByteOrder(bigInteger.toByteArray());
+ byte[] value = new byte[16];
+ // check data is negative
+ if (bigInteger.signum() == -1) {
+ Arrays.fill(value, (byte) -1);
+ }
+ for (int index = 0; index < Math.min(bytes.length,
value.length); ++index) {
+ value[index] = bytes[index];
+ }
+ UdfUtils.copyMemory(value, UdfUtils.BYTE_ARRAY_OFFSET,
null,
+ outputBufferBase + ((offset + i) * 16L),
value.length);
+ }
+ }
+ offset += num;
+ UdfUtils.UNSAFE.putLong(null, UdfUtils.UNSAFE.getLong(null,
outputOffsetsPtr) + 8L * row,
+ Long.parseUnsignedLong(String.valueOf(offset)));
+ updateOutputOffset(offset);
+ return true;
+ }
+ case CHAR:
+ case VARCHAR:
+ case STRING: {
+ ArrayList<String> data = (ArrayList<String>) obj;
+ int num = data.size();
+ if (offset + num > bufferSize) {
+ return false;
+ }
+ long outputStrOffsetBase = UdfUtils.UNSAFE.getLong(null,
outputArrayStringOffsetsPtr);
+ for (int i = 0; i < num; ++i) {
+ String value = data.get(i);
+ if (value == null) {
+ UdfUtils.UNSAFE.putByte(outputNullMapBase + (offset +
i), (byte) 1);
+ } else {
+ byte[] bytes = value.getBytes(StandardCharsets.UTF_8);
+ long strOffset = (offset + i == 0) ? 0
+ :
Integer.toUnsignedLong(UdfUtils.UNSAFE.getInt(null,
+ outputStrOffsetBase + ((offset + i -
1) * 4L)));
+ if (strOffset + bytes.length > bufferSize) {
+ return false;
+ }
+ UdfUtils.UNSAFE.putByte(outputNullMapBase + (offset +
i), (byte) 0);
+ strOffset += bytes.length;
+ UdfUtils.UNSAFE.putInt(null, outputStrOffsetBase + 4L
* (offset + i),
+
Integer.parseUnsignedInt(String.valueOf(strOffset)));
+ UdfUtils.copyMemory(bytes, UdfUtils.BYTE_ARRAY_OFFSET,
null,
+ outputBufferBase + strOffset - bytes.length,
bytes.length);
+ }
+ }
+ offset += num;
+ UdfUtils.UNSAFE.putLong(null, UdfUtils.UNSAFE.getLong(null,
outputOffsetsPtr) + 8L * row,
+ Long.parseUnsignedLong(String.valueOf(offset)));
+ updateOutputOffset(offset);
+ return true;
+ }
+ default:
+ throw new UdfRuntimeException("Unsupported argument type in
nested array: " + type);
+ }
+ }
+
protected void updateOutputOffset(long offset) {
}
}
diff --git a/fe/java-udf/src/main/java/org/apache/doris/udf/UdfExecutor.java
b/fe/java-udf/src/main/java/org/apache/doris/udf/UdfExecutor.java
index 5f043f64a8..5cc295dd8e 100644
--- a/fe/java-udf/src/main/java/org/apache/doris/udf/UdfExecutor.java
+++ b/fe/java-udf/src/main/java/org/apache/doris/udf/UdfExecutor.java
@@ -72,7 +72,7 @@ public class UdfExecutor extends BaseExecutor {
int batchSize = UdfUtils.UNSAFE.getInt(null, batchSizePtr);
try {
if (retType.equals(JavaUdfDataType.STRING) ||
retType.equals(JavaUdfDataType.VARCHAR)
- || retType.equals(JavaUdfDataType.CHAR)) {
+ || retType.equals(JavaUdfDataType.CHAR) ||
retType.equals(JavaUdfDataType.ARRAY_TYPE)) {
// If this udf return variable-size type (e.g.) String, we
have to allocate output
// buffer multiple times until buffer size is enough to store
output column. So we
// always begin with the last evaluated row instead of
beginning of this batch.
@@ -95,12 +95,12 @@ public class UdfExecutor extends BaseExecutor {
}
}
} catch (Exception e) {
- if (retType.equals(JavaUdfDataType.STRING)) {
+ if (retType.equals(JavaUdfDataType.STRING) ||
retType.equals(JavaUdfDataType.ARRAY_TYPE)) {
UdfUtils.UNSAFE.putLong(null, outputIntermediateStatePtr + 8,
batchSize);
}
throw new UdfRuntimeException("UDF::evaluate() ran into a
problem.", e);
}
- if (retType.equals(JavaUdfDataType.STRING)) {
+ if (retType.equals(JavaUdfDataType.STRING) ||
retType.equals(JavaUdfDataType.ARRAY_TYPE)) {
UdfUtils.UNSAFE.putLong(null, outputIntermediateStatePtr + 8,
rowIdx);
}
}
@@ -131,6 +131,9 @@ public class UdfExecutor extends BaseExecutor {
if (retType.equals(JavaUdfDataType.STRING)) {
UdfUtils.UNSAFE.putInt(null, UdfUtils.UNSAFE.getLong(null,
outputOffsetsPtr)
+ 4L * row,
Integer.parseUnsignedInt(String.valueOf(outputOffset)));
+ } else if (retType.equals(JavaUdfDataType.ARRAY_TYPE)) {
+ UdfUtils.UNSAFE.putLong(null, UdfUtils.UNSAFE.getLong(null,
outputOffsetsPtr) + 8L * row,
+ Long.parseUnsignedLong(String.valueOf(outputOffset)));
}
return true;
}
diff --git a/fe/java-udf/src/main/java/org/apache/doris/udf/UdfUtils.java
b/fe/java-udf/src/main/java/org/apache/doris/udf/UdfUtils.java
index 4746350cff..9df2174636 100644
--- a/fe/java-udf/src/main/java/org/apache/doris/udf/UdfUtils.java
+++ b/fe/java-udf/src/main/java/org/apache/doris/udf/UdfUtils.java
@@ -17,6 +17,7 @@
package org.apache.doris.udf;
+import org.apache.doris.catalog.ArrayType;
import org.apache.doris.catalog.PrimitiveType;
import org.apache.doris.catalog.ScalarType;
import org.apache.doris.catalog.Type;
@@ -89,13 +90,15 @@ public class UdfUtils {
DATETIMEV2("DATETIMEV2", TPrimitiveType.DATETIMEV2, 8),
DECIMAL32("DECIMAL32", TPrimitiveType.DECIMAL32, 4),
DECIMAL64("DECIMAL64", TPrimitiveType.DECIMAL64, 8),
- DECIMAL128("DECIMAL128", TPrimitiveType.DECIMAL128I, 16);
+ DECIMAL128("DECIMAL128", TPrimitiveType.DECIMAL128I, 16),
+ ARRAY_TYPE("ARRAY_TYPE", TPrimitiveType.ARRAY, 0);
private final String description;
private final TPrimitiveType thriftType;
private final int len;
private int precision;
private int scale;
+ private Type itemType;
JavaUdfDataType(String description, TPrimitiveType thriftType, int
len) {
this.description = description;
@@ -144,6 +147,8 @@ public class UdfUtils {
} else if (c == BigDecimal.class) {
return Sets.newHashSet(JavaUdfDataType.DECIMALV2,
JavaUdfDataType.DECIMAL32, JavaUdfDataType.DECIMAL64,
JavaUdfDataType.DECIMAL128);
+ } else if (c == java.util.ArrayList.class) {
+ return Sets.newHashSet(JavaUdfDataType.ARRAY_TYPE);
}
return Sets.newHashSet(JavaUdfDataType.INVALID_TYPE);
}
@@ -175,6 +180,14 @@ public class UdfUtils {
public void setScale(int scale) {
this.scale = scale;
}
+
+ public Type getItemType() {
+ return itemType;
+ }
+
+ public void setItemType(Type type) {
+ this.itemType = type;
+ }
}
protected static Pair<Type, Integer> fromThrift(TTypeDesc typeDesc, int
nodeIdx) throws InternalException {
@@ -201,6 +214,14 @@ public class UdfUtils {
}
break;
}
+ case ARRAY: {
+ Preconditions.checkState(nodeIdx + 1 <
typeDesc.getTypesSize());
+ Pair<Type, Integer> childType = fromThrift(typeDesc, nodeIdx +
1);
+ type = new ArrayType(childType.first);
+ nodeIdx = childType.second;
+ break;
+ }
+
default:
throw new InternalException("Return type " + node.getType() +
" is not supported now!");
}
@@ -238,7 +259,7 @@ public class UdfUtils {
}
public static URLClassLoader getClassLoader(String jarPath, ClassLoader
parent)
- throws MalformedURLException,
FileNotFoundException {
+ throws MalformedURLException, FileNotFoundException {
File file = new File(jarPath);
if (!file.exists()) {
throw new FileNotFoundException("Can not find local file: " +
jarPath);
@@ -268,6 +289,13 @@ public class UdfUtils {
if (retType.isDecimalV3() || retType.isDatetimeV2()) {
result.setPrecision(retType.getPrecision());
result.setScale(((ScalarType) retType).getScalarScale());
+ } else if (retType.isArrayType()) {
+ ArrayType arrType = (ArrayType) retType;
+ result.setItemType(arrType.getItemType());
+ if (arrType.getItemType().isDatetimeV2() ||
arrType.getItemType().isDecimalV3()) {
+ result.setPrecision(arrType.getItemType().getPrecision());
+ result.setScale(((ScalarType)
arrType.getItemType()).getScalarScale());
+ }
}
return Pair.of(res.length != 0, result);
}
@@ -290,6 +318,9 @@ public class UdfUtils {
if (parameterTypes[finalI].isDecimalV3() ||
parameterTypes[finalI].isDatetimeV2()) {
inputArgTypes[i].setPrecision(parameterTypes[finalI].getPrecision());
inputArgTypes[i].setScale(((ScalarType)
parameterTypes[finalI]).getScalarScale());
+ } else if (parameterTypes[finalI].isArrayType()) {
+ ArrayType arrType = (ArrayType) parameterTypes[finalI];
+ inputArgTypes[i].setItemType(arrType.getItemType());
}
if (res.length == 0) {
return Pair.of(false, inputArgTypes);
diff --git a/gensrc/thrift/Types.thrift b/gensrc/thrift/Types.thrift
index d1c9304eb7..5407952daa 100644
--- a/gensrc/thrift/Types.thrift
+++ b/gensrc/thrift/Types.thrift
@@ -426,6 +426,18 @@ struct TJavaUdfExecutorCtorParams {
// this is used to pass place or places to FE, which could help us call jni
// only once and can process a batch size data in JAVA-Udaf
11: optional i64 input_places_ptr
+
+ // for array type about nested column null map
+ 12: optional i64 input_array_nulls_buffer_ptr
+
+ // used for array type of nested string column offset
+ 13: optional i64 input_array_string_offsets_ptrs
+
+ // for array type about nested column null map when output
+ 14: optional i64 output_array_null_ptr
+
+ // used for array type of nested string column offset when output
+ 15: optional i64 output_array_string_offsets_ptr
}
// Contains all interesting statistics from a single 'memory pool' in the JVM.
diff --git a/regression-test/data/javaudf_p0/test_javaudf_array.out
b/regression-test/data/javaudf_p0/test_javaudf_array.out
new file mode 100644
index 0000000000..ad5bda0b92
--- /dev/null
+++ b/regression-test/data/javaudf_p0/test_javaudf_array.out
@@ -0,0 +1,133 @@
+-- This file is automatically generated. You should know what you did if you
want to edit this
+-- !select_default --
+1 2 2022-01-01 2022-01-01T11:11:11 a1b
+2 4 2022-01-01 2022-01-01T11:11:11 a2b
+3 6 2022-01-01 2022-01-01T11:11:11 a3b
+4 8 2022-01-01 2022-01-01T11:11:11 a4b
+5 10 2022-01-01 2022-01-01T11:11:11 a5b
+6 12 2022-01-01 2022-01-01T11:11:11 a6b
+7 14 2022-01-01 2022-01-01T11:11:11 a7b
+8 16 2022-01-01 2022-01-01T11:11:11 a8b
+9 18 2022-01-01 2022-01-01T11:11:11 a9b
+10 20 2022-06-06 2022-01-01T12:12:12 a10b
+
+-- !select_1 --
+1
+2
+3
+4
+5
+6
+7
+8
+9
+10
+
+-- !select_2 --
+\N
+
+-- !select_3 --
+[1] 2
+[2] 4
+[3] 6
+[4] 8
+[5] 10
+[6] 12
+[7] 14
+[8] 16
+[9] 18
+[10] 20
+
+-- !select_4 --
+[2] 2
+[4] 4
+[6] 6
+[8] 8
+[10] 10
+[12] 12
+[14] 14
+[16] 16
+[18] 18
+[20] 20
+
+-- !select_5 --
+\N
+
+-- !select_6 --
+['a1b'] 2
+['a2b'] 4
+['a3b'] 6
+['a4b'] 8
+['a5b'] 10
+['a6b'] 12
+['a7b'] 14
+['a8b'] 16
+['a9b'] 18
+['a10b'] 20
+
+-- !select_7 --
+['a1b1'] 2
+['a2b2'] 4
+['a3b3'] 6
+['a4b4'] 8
+['a5b5'] 10
+['a6b6'] 12
+['a7b7'] 14
+['a8b8'] 16
+['a9b9'] 18
+['a10b10'] 20
+
+-- !select_8 --
+\N
+
+-- !select_9 --
+a1b 2
+a2b 4
+a3b 6
+a4b 8
+a5b 10
+a6b 12
+a7b 14
+a8b 16
+a9b 18
+a10b 20
+
+-- !select_10 --
+a1b1 2
+a2b2 4
+a3b3 6
+a4b4 8
+a5b5 10
+a6b6 12
+a7b7 14
+a8b8 16
+a9b9 18
+a10b10 20
+
+-- !select_11 --
+\N
+
+-- !select_12 --
+[2022-01-01 11:11:11] 2
+[2022-01-01 11:11:11] 4
+[2022-01-01 11:11:11] 6
+[2022-01-01 11:11:11] 8
+[2022-01-01 11:11:11] 10
+[2022-01-01 11:11:11] 12
+[2022-01-01 11:11:11] 14
+[2022-01-01 11:11:11] 16
+[2022-01-01 11:11:11] 18
+[2022-01-01 12:12:12] 20
+
+-- !select_13 --
+[2022-01-01] 2
+[2022-01-01] 4
+[2022-01-01] 6
+[2022-01-01] 8
+[2022-01-01] 10
+[2022-01-01] 12
+[2022-01-01] 14
+[2022-01-01] 16
+[2022-01-01] 18
+[2022-06-06] 20
+
diff --git
a/regression-test/java-udf-src/src/main/java/org/apache/doris/udf/ArrayDateTest.java
b/regression-test/java-udf-src/src/main/java/org/apache/doris/udf/ArrayDateTest.java
new file mode 100644
index 0000000000..8660faea8c
--- /dev/null
+++
b/regression-test/java-udf-src/src/main/java/org/apache/doris/udf/ArrayDateTest.java
@@ -0,0 +1,29 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.udf;
+
+import org.apache.hadoop.hive.ql.exec.UDF;
+
+import java.time.LocalDate;
+import java.util.ArrayList;
+
+public class ArrayDateTest extends UDF {
+ public ArrayList<LocalDate> evaluate(ArrayList<LocalDate> date) {
+ return date;
+ }
+}
diff --git
a/regression-test/java-udf-src/src/main/java/org/apache/doris/udf/ArrayDateTimeTest.java
b/regression-test/java-udf-src/src/main/java/org/apache/doris/udf/ArrayDateTimeTest.java
new file mode 100644
index 0000000000..ed51ccc292
--- /dev/null
+++
b/regression-test/java-udf-src/src/main/java/org/apache/doris/udf/ArrayDateTimeTest.java
@@ -0,0 +1,29 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.udf;
+
+import org.apache.hadoop.hive.ql.exec.UDF;
+
+import java.time.LocalDateTime;
+import java.util.ArrayList;
+
+public class ArrayDateTimeTest extends UDF {
+ public ArrayList<LocalDateTime> evaluate(ArrayList<LocalDateTime> date) {
+ return date;
+ }
+}
diff --git
a/regression-test/java-udf-src/src/main/java/org/apache/doris/udf/ArrayIntTest.java
b/regression-test/java-udf-src/src/main/java/org/apache/doris/udf/ArrayIntTest.java
new file mode 100644
index 0000000000..4d7f2c636e
--- /dev/null
+++
b/regression-test/java-udf-src/src/main/java/org/apache/doris/udf/ArrayIntTest.java
@@ -0,0 +1,38 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.udf;
+
+import org.apache.hadoop.hive.ql.exec.UDF;
+
+import java.util.ArrayList;
+
+public class ArrayIntTest extends UDF {
+ public Integer evaluate(ArrayList<Integer> res) {
+ Integer value = 0;
+ if (res == null) {
+ return null;
+ }
+ for (int i =0; i < res.size(); ++i) {
+ Integer data = res.get(i);
+ if (data != null) {
+ value = value + data;
+ }
+ }
+ return value;
+ }
+}
diff --git
a/regression-test/java-udf-src/src/main/java/org/apache/doris/udf/ArrayReturnArrayIntTest.java
b/regression-test/java-udf-src/src/main/java/org/apache/doris/udf/ArrayReturnArrayIntTest.java
new file mode 100644
index 0000000000..ed9235f911
--- /dev/null
+++
b/regression-test/java-udf-src/src/main/java/org/apache/doris/udf/ArrayReturnArrayIntTest.java
@@ -0,0 +1,40 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.udf;
+
+import org.apache.hadoop.hive.ql.exec.UDF;
+
+import java.util.ArrayList;
+
+public class ArrayReturnArrayIntTest extends UDF {
+ public ArrayList<Integer> evaluate(ArrayList<Integer> res) {
+ Integer value = 0;
+ if (res == null) {
+ return null;
+ }
+ for (int i =0; i < res.size(); ++i) {
+ Integer data = res.get(i);
+ if (data != null) {
+ value = value + data;
+ }
+ }
+ ArrayList<Integer> result = new ArrayList<Integer>();
+ result.add(value);
+ return result;
+ }
+}
diff --git
a/regression-test/java-udf-src/src/main/java/org/apache/doris/udf/ArrayReturnArrayStringTest.java
b/regression-test/java-udf-src/src/main/java/org/apache/doris/udf/ArrayReturnArrayStringTest.java
new file mode 100644
index 0000000000..0f0d73db86
--- /dev/null
+++
b/regression-test/java-udf-src/src/main/java/org/apache/doris/udf/ArrayReturnArrayStringTest.java
@@ -0,0 +1,41 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.udf;
+
+import org.apache.hadoop.hive.ql.exec.UDF;
+
+import java.util.ArrayList;
+
+public class ArrayReturnArrayStringTest extends UDF {
+ public ArrayList<String> evaluate(ArrayList<String> res) {
+ String value = "";
+ if (res == null) {
+ return null;
+ }
+ for (int i = 0; i < res.size(); ++i) {
+ String data = res.get(i);
+ if (data != null) {
+ System.out.println(data);
+ value = value + data;
+ }
+ }
+ ArrayList<String> result = new ArrayList<String>();
+ result.add(value);
+ return result;
+ }
+}
diff --git
a/regression-test/java-udf-src/src/main/java/org/apache/doris/udf/ArrayStringTest.java
b/regression-test/java-udf-src/src/main/java/org/apache/doris/udf/ArrayStringTest.java
new file mode 100644
index 0000000000..ea860babac
--- /dev/null
+++
b/regression-test/java-udf-src/src/main/java/org/apache/doris/udf/ArrayStringTest.java
@@ -0,0 +1,39 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.udf;
+
+import org.apache.hadoop.hive.ql.exec.UDF;
+
+import java.util.ArrayList;
+
+public class ArrayStringTest extends UDF {
+ public String evaluate(ArrayList<String> res) {
+ String value = "";
+ if (res == null) {
+ return null;
+ }
+ for (int i = 0; i < res.size(); ++i) {
+ String data = res.get(i);
+ if (data != null) {
+ System.out.println(data);
+ value = value + data;
+ }
+ }
+ return value;
+ }
+}
diff --git a/regression-test/suites/javaudf_p0/test_javaudf_array.groovy
b/regression-test/suites/javaudf_p0/test_javaudf_array.groovy
new file mode 100644
index 0000000000..15ecb239fc
--- /dev/null
+++ b/regression-test/suites/javaudf_p0/test_javaudf_array.groovy
@@ -0,0 +1,122 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+import org.codehaus.groovy.runtime.IOGroovyMethods
+
+import java.nio.charset.StandardCharsets
+import java.nio.file.Files
+import java.nio.file.Paths
+
+suite("test_javaudf_array") {
+ def tableName = "test_javaudf_array"
+ def jarPath =
"""${context.file.parent}/jars/java-udf-case-jar-with-dependencies.jar"""
+
+ log.info("Jar path: ${jarPath}".toString())
+ try {
+ sql """ DROP TABLE IF EXISTS ${tableName} """
+ sql """
+ CREATE TABLE IF NOT EXISTS ${tableName} (
+ `user_id` INT NOT NULL COMMENT "",
+ `tinyint_col` TINYINT NOT NULL COMMENT "",
+ `datev2_col` datev2 NOT NULL COMMENT "",
+ `datetimev2_col` datetimev2 NOT NULL COMMENT "",
+ `string_col` STRING NOT NULL COMMENT ""
+ )
+ DISTRIBUTED BY HASH(user_id) PROPERTIES("replication_num" = "1");
+ """
+ StringBuilder sb = new StringBuilder()
+ int i = 1
+ for (; i < 10; i ++) {
+ sb.append("""
+ (${i},${i}*2,'2022-01-01','2022-01-01 11:11:11','a${i}b'),
+ """)
+ }
+ sb.append("""
+ (${i},${i}*2,'2022-06-06','2022-01-01 12:12:12','a${i}b')
+ """)
+ sql """ INSERT INTO ${tableName} VALUES
+ ${sb.toString()}
+ """
+ qt_select_default """ SELECT * FROM ${tableName} t ORDER BY user_id;
"""
+
+ File path = new File(jarPath)
+ if (!path.exists()) {
+ throw new IllegalStateException("""${jarPath} doesn't exist! """)
+ }
+
+ sql """ DROP FUNCTION IF EXISTS java_udf_array_int_test(array<int>);
"""
+ sql """ CREATE FUNCTION java_udf_array_int_test(array<int>) RETURNS
int PROPERTIES (
+ "file"="file://${jarPath}",
+ "symbol"="org.apache.doris.udf.ArrayIntTest",
+ "type"="JAVA_UDF"
+ ); """
+ qt_select_1 """ SELECT java_udf_array_int_test(array(user_id)) result
FROM ${tableName} ORDER BY result; """
+ qt_select_2 """ SELECT java_udf_array_int_test(null) result ; """
+
+
+ sql """ DROP FUNCTION IF EXISTS
java_udf_array_return_int_test(array<int>); """
+ sql """ CREATE FUNCTION java_udf_array_return_int_test(array<int>)
RETURNS array<int> PROPERTIES (
+ "file"="file://${jarPath}",
+ "symbol"="org.apache.doris.udf.ArrayReturnArrayIntTest",
+ "type"="JAVA_UDF"
+ ); """
+ qt_select_3 """ SELECT java_udf_array_return_int_test(array(user_id)),
tinyint_col as result FROM ${tableName} ORDER BY result; """
+ qt_select_4 """ SELECT
java_udf_array_return_int_test(array(user_id,user_id)), tinyint_col as result
FROM ${tableName} ORDER BY result; """
+ qt_select_5 """ SELECT java_udf_array_return_int_test(null) result ;
"""
+
+
+ sql """ DROP FUNCTION IF EXISTS
java_udf_array_return_string_test(array<string>); """
+ sql """ CREATE FUNCTION
java_udf_array_return_string_test(array<string>) RETURNS array<string>
PROPERTIES (
+ "file"="file://${jarPath}",
+ "symbol"="org.apache.doris.udf.ArrayReturnArrayStringTest",
+ "type"="JAVA_UDF"
+ ); """
+ qt_select_6 """ SELECT
java_udf_array_return_string_test(array(string_col)), tinyint_col as result
FROM ${tableName} ORDER BY result; """
+ qt_select_7 """ SELECT
java_udf_array_return_string_test(array(string_col, cast(user_id as string))),
tinyint_col as result FROM ${tableName} ORDER BY result; """
+ qt_select_8 """ SELECT java_udf_array_return_string_test(null) result
; """
+
+ sql """ DROP FUNCTION IF EXISTS
java_udf_array_string_test(array<string>); """
+ sql """ CREATE FUNCTION java_udf_array_string_test(array<string>)
RETURNS string PROPERTIES (
+ "file"="file://${jarPath}",
+ "symbol"="org.apache.doris.udf.ArrayStringTest",
+ "type"="JAVA_UDF"
+ ); """
+ qt_select_9 """ SELECT java_udf_array_string_test(array(string_col)),
tinyint_col as result FROM ${tableName} ORDER BY result; """
+ qt_select_10 """ SELECT java_udf_array_string_test(array(string_col,
cast(user_id as string))), tinyint_col as result FROM ${tableName} ORDER BY
result; """
+ qt_select_11 """ SELECT java_udf_array_string_test(null) result ; """
+
+ //ArrayDateTimeTest
+ sql """ DROP FUNCTION IF EXISTS
java_udf_array_datatime_test(array<datetime>); """
+ sql """ CREATE FUNCTION java_udf_array_datatime_test(array<datetime>)
RETURNS array<datetime> PROPERTIES (
+ "file"="file://${jarPath}",
+ "symbol"="org.apache.doris.udf.ArrayDateTimeTest",
+ "type"="JAVA_UDF"
+ ); """
+ qt_select_12 """ SELECT
java_udf_array_datatime_test(array(datetimev2_col)), tinyint_col as result FROM
${tableName} ORDER BY result; """
+
+ sql """ DROP FUNCTION IF EXISTS java_udf_array_date_test(array<date>);
"""
+ sql """ CREATE FUNCTION java_udf_array_date_test(array<date>) RETURNS
array<date> PROPERTIES (
+ "file"="file://${jarPath}",
+ "symbol"="org.apache.doris.udf.ArrayDateTest",
+ "type"="JAVA_UDF"
+ ); """
+ qt_select_13 """ SELECT java_udf_array_date_test(array(datev2_col)),
tinyint_col as result FROM ${tableName} ORDER BY result; """
+
+ } finally {
+ try_sql("DROP TABLE IF EXISTS ${tableName}")
+ }
+}
diff --git a/run-regression-test.sh b/run-regression-test.sh
index 1f6d316dae..a8ea5f976c 100755
--- a/run-regression-test.sh
+++ b/run-regression-test.sh
@@ -159,11 +159,8 @@ if ! test -f ${RUN_JAR:+${RUN_JAR}}; then
mkdir -p "${OUTPUT_DIR}"/{lib,log}
cp -r "${REGRESSION_TEST_BUILD_DIR}"/regression-test-*.jar
"${OUTPUT_DIR}/lib"
-fi
-# build jar needed by java-udf case
-JAVAUDF_JAR="${DORIS_HOME}/regression-test/java-udf-src/target/java-udf-case-jar-with-dependencies.jar"
-if ! test -f ${JAVAUDF_JAR:+${JAVAUDF_JAR}}; then
+ echo "===== BUILD JAVA_UDF_SRC TO GENERATE JAR ====="
mkdir -p "${DORIS_HOME}"/regression-test/suites/javaudf_p0/jars
cd "${DORIS_HOME}"/regression-test/java-udf-src
"${MVN_CMD}" package
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]