This is an automated email from the ASF dual-hosted git repository.

zhaoc pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 63fa260  Support prepare/close in UDF (#1985)
63fa260 is described below

commit 63fa260d3f0265df2d7f59fe320aab2b07b00186
Author: xy720 <[email protected]>
AuthorDate: Wed Oct 16 07:19:20 2019 +0800

    Support prepare/close in UDF (#1985)
    
    The prepare/close step of scalar function is already supported in execution 
framework, We only need to do is that support it in syntax and meta in frontend.
    
    In addition, 'Hive' binary type of scalar function NOT supports 
prepare/close step, we need to make it supports.
---
 be/src/exprs/scalar_fn_call.cpp                    |  3 +-
 be/src/udf_samples/udf_sample.cpp                  |  5 ++
 be/src/udf_samples/udf_sample.h                    | 11 ++++
 .../Data Definition/create-function.md             | 45 +++++++++-----
 .../Data Definition/create-function_EN.md          | 70 +++++++++++++---------
 .../apache/doris/analysis/CreateFunctionStmt.java  |  6 +-
 .../org/apache/doris/catalog/ScalarFunction.java   |  4 +-
 7 files changed, 98 insertions(+), 46 deletions(-)

diff --git a/be/src/exprs/scalar_fn_call.cpp b/be/src/exprs/scalar_fn_call.cpp
index 27b678d..7b75ea6 100644
--- a/be/src/exprs/scalar_fn_call.cpp
+++ b/be/src/exprs/scalar_fn_call.cpp
@@ -540,7 +540,8 @@ Status ScalarFnCall::get_udf(RuntimeState* state, 
Function** udf) {
 
 Status ScalarFnCall::get_function(RuntimeState* state, const std::string& 
symbol, void** fn) {
     if (_fn.binary_type == TFunctionBinaryType::NATIVE 
-            || _fn.binary_type == TFunctionBinaryType::BUILTIN) {
+            || _fn.binary_type == TFunctionBinaryType::BUILTIN
+            || _fn.binary_type == TFunctionBinaryType::HIVE) {
         return UserFunctionCache::instance()->get_function_ptr(
             _fn.id, symbol, _fn.hdfs_location, _fn.checksum, fn, 
&_cache_entry);
     } else {
diff --git a/be/src/udf_samples/udf_sample.cpp 
b/be/src/udf_samples/udf_sample.cpp
index faa7280..6c26c0c 100644
--- a/be/src/udf_samples/udf_sample.cpp
+++ b/be/src/udf_samples/udf_sample.cpp
@@ -26,4 +26,9 @@ IntVal AddUdf(FunctionContext* context, const IntVal& arg1, 
const IntVal& arg2)
     return {arg1.val + arg2.val};
 }
 
+/// --- Prepare / Close Functions ---
+/// ---------------------------------
+void AddUdfPrepare(FunctionContext* context, 
FunctionContext::FunctionStateScope scope) {}
+void AddUdfClose(FunctionContext* context, FunctionContext::FunctionStateScope 
scope) {}
+
 }
diff --git a/be/src/udf_samples/udf_sample.h b/be/src/udf_samples/udf_sample.h
index cf123fe..f0adfc5 100644
--- a/be/src/udf_samples/udf_sample.h
+++ b/be/src/udf_samples/udf_sample.h
@@ -23,4 +23,15 @@ namespace doris_udf {
 
 IntVal AddUdf(FunctionContext* context, const IntVal& arg1, const IntVal& 
arg2);
 
+/// --- Prepare / Close Functions ---
+/// ---------------------------------
+
+/// The UDF can optionally include a prepare function. The prepare function is 
called
+/// before any calls to the UDF to evaluate values.
+void AddUdfPrepare(FunctionContext* context, 
FunctionContext::FunctionStateScope scope);
+
+/// The UDF can also optionally include a close function. The close function 
is called 
+/// after all calls to the UDF have completed.
+void AddUdfClose(FunctionContext* context, FunctionContext::FunctionStateScope 
scope);
+
 }
diff --git a/docs/documentation/cn/sql-reference/sql-statements/Data 
Definition/create-function.md 
b/docs/documentation/cn/sql-reference/sql-statements/Data 
Definition/create-function.md
index 68eb101..46a4097 100644
--- a/docs/documentation/cn/sql-reference/sql-statements/Data 
Definition/create-function.md     
+++ b/docs/documentation/cn/sql-reference/sql-statements/Data 
Definition/create-function.md     
@@ -39,6 +39,10 @@ CREATE [AGGREGATE] FUNCTION function_name
 >           "finalize_fn": 聚合函数获取最后结果的函数签名。对于聚合函数是可选项,如果没有指定,将会使用默认的获取结果函数
 >
 >           "md5": 函数动态链接库的MD5值,用于校验下载的内容是否正确。此选项是可选项
+> 
+>           "prepare_fn": 
自定义函数的prepare函数的函数签名,用于从动态库里面找到prepare函数入口。此选项对于自定义函数是可选项
+> 
+>           "close_fn": 自定义函数的close函数的函数签名,用于从动态库里面找到close函数入口。此选项对于自定义函数是可选项
 
 
 此语句创建一个自定义函数。执行此命令需要用户拥有 `ADMIN` 权限。
@@ -49,24 +53,35 @@ CREATE [AGGREGATE] FUNCTION function_name
 
 1. 创建一个自定义标量函数
 
-```
-CREATE FUNCTION my_add(INT, INT) RETURNS INT PROPERTIES (
-    "symbol" = "_ZN9doris_udf6AddUdfEPNS_15FunctionContextERKNS_6IntValES4_",
-    "object_file" = "http://host:port/libmyadd.so";
-);
-```
+       ```
+       CREATE FUNCTION my_add(INT, INT) RETURNS INT PROPERTIES (
+               "symbol" =      
"_ZN9doris_udf6AddUdfEPNS_15FunctionContextERKNS_6IntValES4_",
+       "object_file" = "http://host:port/libmyadd.so";
+       );
+       ```
+       
+2. 创建一个有prepare/close函数的自定义标量函数
+
+       ```
+       CREATE FUNCTION my_add(INT, INT) RETURNS INT PROPERTIES (
+               "symbol" =      
"_ZN9doris_udf6AddUdfEPNS_15FunctionContextERKNS_6IntValES4_",
+               "prepare_fn" = 
"_ZN9doris_udf14AddUdf_prepareEPNS_15FunctionContextENS0_18FunctionStateScopeE",
+               "close_fn" = 
"_ZN9doris_udf12AddUdf_closeEPNS_15FunctionContextENS0_18FunctionStateScopeE",
+       "object_file" = "http://host:port/libmyadd.so";
+       );
+       ```
 
 2. 创建一个自定义聚合函数
 
-```
-CREATE AGGREGATE FUNCTION my_count (BIGINT) RETURNS BIGINT PROPERTIES (
-    "init_fn"="_ZN9doris_udf9CountInitEPNS_15FunctionContextEPNS_9BigIntValE",
-    
"update_fn"="_ZN9doris_udf11CountUpdateEPNS_15FunctionContextERKNS_6IntValEPNS_9BigIntValE",
-    
"merge_fn"="_ZN9doris_udf10CountMergeEPNS_15FunctionContextERKNS_9BigIntValEPS2_",
-    
"finalize_fn"="_ZN9doris_udf13CountFinalizeEPNS_15FunctionContextERKNS_9BigIntValE",
-    "object_file"="http://host:port/libudasample.so";
-);
-```
+       ```
+       CREATE AGGREGATE FUNCTION my_count (BIGINT) RETURNS BIGINT PROPERTIES (
+           
"init_fn"="_ZN9doris_udf9CountInitEPNS_15FunctionContextEPNS_9BigIntValE",
+           
"update_fn"="_ZN9doris_udf11CountUpdateEPNS_15FunctionContextERKNS_6IntValEPNS_9BigIntValE",
+           
"merge_fn"="_ZN9doris_udf10CountMergeEPNS_15FunctionContextERKNS_9BigIntValEPS2_",
+           
"finalize_fn"="_ZN9doris_udf13CountFinalizeEPNS_15FunctionContextERKNS_9BigIntValE",
+           "object_file"="http://host:port/libudasample.so";
+       );
+       ```
 
 ## keyword
 
diff --git a/docs/documentation/en/sql-reference/sql-statements/Data 
Definition/create-function_EN.md 
b/docs/documentation/en/sql-reference/sql-statements/Data 
Definition/create-function_EN.md
index 8130fc1..8df27f6 100644
--- a/docs/documentation/en/sql-reference/sql-statements/Data 
Definition/create-function_EN.md  
+++ b/docs/documentation/en/sql-reference/sql-statements/Data 
Definition/create-function_EN.md  
@@ -4,25 +4,25 @@
 
 ```
 CREATE [AGGREGATE] FUNCTION function_name
-(angry type [...])
-RETURNS ret_type
-[INTERMEDIATE inter_type]
-[PROPERTIES ("key" = "value" [, ...]) ]
+       (angry type [...])
+       RETURNS ret_type
+       [INTERMEDIATE inter_type]
+       [PROPERTIES ("key" = "value" [, ...]) ]
 ```
 
 ### Parameters
 
->` AGGREGATE `: If this is the case, it means that the created function is an 
aggregate function, otherwise it is a scalar function.
+>`AGGREGATE`: If this is the case, it means that the created function is an 
aggregate function, otherwise it is a scalar function.
 >
->` Function_name': To create the name of the function, you can include the 
name of the database. For example: `db1.my_func'.
+>`Function_name`: To create the name of the function, you can include the name 
of the database. For example: `db1.my_func'.
 >
->` arg_type': The parameter type of the function is the same as the type 
defined at the time of table building. Variable-length parameters can be 
represented by `,...'. If it is a variable-length type, the type of the 
variable-length part of the parameters is the same as the last 
non-variable-length parameter type.
+>` arg_type': The parameter type of the function is the same as the type 
defined at the time of table building. Variable-length parameters can be 
represented by `,...`. If it is a variable-length type, the type of the 
variable-length part of the parameters is the same as the last 
non-variable-length parameter type.
 >
->` ret_type': Function return type.
+>`ret_type`: Function return type.
 >
->` Inter_type': A data type used to represent the intermediate stage of an 
aggregate function.
+>`Inter_type`: A data type used to represent the intermediate stage of an 
aggregate function.
 >
->` properties `: Used to set properties related to this function. Properties 
that can be set include
+>`properties`: Used to set properties related to this function. Properties 
that can be set include
 >
 > "Object_file": Custom function dynamic library URL path, currently only 
 > supports HTTP/HTTPS protocol, this path needs to remain valid throughout the 
 > life cycle of the function. This option is mandatory
 >
@@ -39,33 +39,47 @@ RETURNS ret_type
 > "finalize_fn": A function signature that aggregates functions to obtain the 
 > final result. For aggregation functions, it is optional. If not specified, 
 > the default fetch result function will be used.
 >
 > "md5": The MD5 value of the function dynamic link library, which is used to 
 > verify that the downloaded content is correct. This option is optional
+>
+> "prepare_fn": Function signature of the prepare function for finding the 
entry from the dynamic library. This option is optional for custom functions
+> 
+> "close_fn": Function signature of the close function for finding the entry 
from the dynamic library. This option is optional for custom functions
 
 
-This statement creates a custom function. Executing this command requires that 
the user have `ADMIN'privileges.
+This statement creates a custom function. Executing this command requires that 
the user have `ADMIN` privileges.
 
-If the `function_name'contains the database name, the custom function will be 
created in the corresponding database, otherwise the function will be created 
in the database where the current session is located. The name and parameters 
of the new function cannot be the same as functions already existing in the 
current namespace, otherwise the creation will fail. But only with the same 
name and different parameters can the creation be successful.
+If the `function_name` contains the database name, the custom function will be 
created in the corresponding database, otherwise the function will be created 
in the database where the current session is located. The name and parameters 
of the new function cannot be the same as functions already existing in the 
current namespace, otherwise the creation will fail. But only with the same 
name and different parameters can the creation be successful.
 
 ## example
 
 1. Create a custom scalar function
 
-```
-CREATE FUNCTION my_add(INT, INT) RETURNS INT PROPERTIES (
-"Symbol"=""\\\\\\\\ zn9doris\\\ udf6addudfepns\\ FunctionContexterkns\\ 
INTVales 4\,
-"object file" ="http://host:port /libmyadd.so"
-);
-```
+       ```
+       CREATE FUNCTION my_add(INT, INT) RETURNS INT PROPERTIES (
+               "symbol" = 
"_ZN9doris_udf6AddUdfEPNS_15FunctionContextERKNS_6IntValES4_",
+               "object_file" ="http://host:port/libmyadd.so";
+       );
+       ```
+2. Create a custom scalar function with prepare/close functions
 
-2. Create a custom aggregation function
+       ```
+       CREATE FUNCTION my_add(INT, INT) RETURNS INT PROPERTIES (
+               "symbol" =      
"_ZN9doris_udf6AddUdfEPNS_15FunctionContextERKNS_6IntValES4_",
+               "prepare_fn" = 
"_ZN9doris_udf14AddUdf_prepareEPNS_15FunctionContextENS0_18FunctionStateScopeE",
+               "close_fn" = 
"_ZN9doris_udf12AddUdf_closeEPNS_15FunctionContextENS0_18FunctionStateScopeE",
+       "object_file" = "http://host:port/libmyadd.so";
+       );
+       ```
 
-```
-CREATE AGGREGATE FUNCTION my_count (BIGINT) RETURNS BIGINT PROPERTIES (
-"init u fn"= "ZN9doris, udf9CountInitEPNS -u 15FunctionContextEPNS, u 
9BigIntValE",
-"Update  fn" = " zn9doris \ udf11Countupdateepns \ \ FunctionContexterkns \ 
Intvalepns  bigintvale",
-"Merge fn"="\ zn9doris\\ udf10CountMergeepns\ \ FunctionContexterkns\ 
Bigintvaleps2\\\\\\\\\\\\\
-"Finalize \ fn" = "\ zn9doris \ udf13Count Finalizepns \\ FunctionContexterkns 
\ Bigintvale",
-"object" file ="http://host:port /libudasample.so"
-);
-```
+3. Create a custom aggregation function
+       
+       ```
+       CREATE AGGREGATE FUNCTION my_count (BIGINT) RETURNS BIGINT PROPERTIES (
+               "init_fn"= 
"_ZN9doris_udf6AddUdfEPNS_15FunctionContextERKNS_6IntValES4_",
+               "update_fn" = 
"zn9dorisudf11CountupdateepnsFunctionContexterknsIntvalepnsbigintvale",
+               "merge_fn" = 
"zn9dorisudf10CountMergeepnsFunctionContexterknsBigintvaleps2
+               "finalize_fn" = 
"zn9dorisudf13CountFinalizepnsFunctionContexterknsBigintvale",
+               "object_file" = "http://host:port/libudasample.so";
+       );
+       ```
 ##keyword
 CREATE,FUNCTION
diff --git a/fe/src/main/java/org/apache/doris/analysis/CreateFunctionStmt.java 
b/fe/src/main/java/org/apache/doris/analysis/CreateFunctionStmt.java
index 392e512..05c742d 100644
--- a/fe/src/main/java/org/apache/doris/analysis/CreateFunctionStmt.java
+++ b/fe/src/main/java/org/apache/doris/analysis/CreateFunctionStmt.java
@@ -43,6 +43,8 @@ import java.util.Map;
 public class CreateFunctionStmt extends DdlStmt {
     public static final String OBJECT_FILE_KEY = "object_file";
     public static final String SYMBOL_KEY = "symbol";
+    public static final String PREPARE_SYMBOL_KEY = "prepare_fn";
+    public static final String CLOSE_SYMBOL_KEY = "close_fn";
     public static final String MD5_CHECKSUM = "md5";
     public static final String INIT_KEY = "init_fn";
     public static final String UPDATE_KEY = "update_fn";
@@ -177,10 +179,12 @@ public class CreateFunctionStmt extends DdlStmt {
         if (Strings.isNullOrEmpty(symbol)) {
             throw new AnalysisException("No 'symbol' in properties");
         }
+        String prepareFnSymbol = properties.get(PREPARE_SYMBOL_KEY);
+        String closeFnSymbol = properties.get(CLOSE_SYMBOL_KEY);
         function = ScalarFunction.createUdf(
                 functionName, argsDef.getArgTypes(),
                 returnType.getType(), argsDef.isVariadic(),
-                objectFile, symbol);
+                objectFile, symbol, prepareFnSymbol, closeFnSymbol);
         function.setChecksum(checksum);
     }
 
diff --git a/fe/src/main/java/org/apache/doris/catalog/ScalarFunction.java 
b/fe/src/main/java/org/apache/doris/catalog/ScalarFunction.java
index 300f1f6..3d4618a 100644
--- a/fe/src/main/java/org/apache/doris/catalog/ScalarFunction.java
+++ b/fe/src/main/java/org/apache/doris/catalog/ScalarFunction.java
@@ -233,11 +233,13 @@ public class ScalarFunction extends Function {
     public static ScalarFunction createUdf(
             FunctionName name, Type[] args,
             Type returnType, boolean isVariadic,
-            String objectFile, String symbol) {
+            String objectFile, String symbol, String prepareFnSymbol, String 
closeFnSymbol) {
         ScalarFunction fn = new ScalarFunction(name, args, returnType, 
isVariadic);
         fn.setBinaryType(TFunctionBinaryType.HIVE);
         fn.setUserVisible(true);
         fn.symbolName = symbol;
+        fn.prepareFnSymbol = prepareFnSymbol;
+        fn.closeFnSymbol = closeFnSymbol;
         fn.setLocation(new HdfsURI(objectFile));
         return fn;
     }


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to