This is an automated email from the ASF dual-hosted git repository.

dkuzmenko pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
     new da272b45f77 HIVE-25043: Support custom UDFs in Vectorized mode (Ryu 
Kobayashi, reviewed by Denys Kuzmenko)
da272b45f77 is described below

commit da272b45f7707c41713dbfd50d61f0ff285d099f
Author: Ryu Kobayashi <[email protected]>
AuthorDate: Wed Feb 5 05:47:15 2025 +0900

    HIVE-25043: Support custom UDFs in Vectorized mode (Ryu Kobayashi, reviewed 
by Denys Kuzmenko)
    
    Closes #5631
---
 .../java/org/apache/hadoop/hive/conf/HiveConf.java  |  5 +++++
 .../hive/ql/exec/vector/VectorizationContext.java   | 21 ++++++++++++++++++---
 2 files changed, 23 insertions(+), 3 deletions(-)

diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java 
b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
index 4581dce12cf..d89f1922d2a 100644
--- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
+++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
@@ -4704,6 +4704,11 @@ public static enum ConfVars {
         "This flag should be set to true to enable vectorized mode of the PTF 
of query execution.\n" +
         "The default value is true."),
 
+    
HIVE_VECTOR_ADAPTOR_CUSTOM_UDF_WHITELIST("hive.vectorized.adaptor.custom.udf.whitelist",
 "",
+        "Custom UDF allowed when hive.vectorized.adaptor.usage.mode is 
chosen.\n" +
+        "Specify classes separated by commas:\n" +
+        "package.FooClass,package.BarClass"),
+
     
HIVE_VECTORIZATION_PTF_MAX_MEMORY_BUFFERING_BATCH_COUNT("hive.vectorized.ptf.max.memory.buffering.batch.count",
 25,
         "Maximum number of vectorized row batches to buffer in memory for 
PTF\n" +
         "The default value is 25"),
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
index 289e3f5c480..e49cd5bf3f9 100644
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
@@ -277,6 +277,18 @@ public static HiveVectorIfStmtMode 
getHiveConfValue(HiveConf hiveConf) {
 
   private HiveVectorIfStmtMode hiveVectorIfStmtMode;
 
+  private Set<String> allowedCustomUDFs;
+
+  private Set<String> getAllowedCustomUDFs(HiveConf hiveConf) {
+    String udfs = HiveConf.getVar(hiveConf,
+        HiveConf.ConfVars.HIVE_VECTOR_ADAPTOR_CUSTOM_UDF_WHITELIST);
+    if (udfs != null && !udfs.isEmpty()) {
+      return new HashSet<>(Arrays.asList(udfs.split(",")));
+    }
+
+    return new HashSet<>();
+  }
+
   //when set to true use the overflow checked vector expressions
   private boolean useCheckedVectorExpressions;
 
@@ -298,6 +310,7 @@ private void setHiveConfVars(HiveConf hiveConf) {
     adaptorSuppressEvaluateExceptions =
         HiveConf.getBoolVar(
             hiveConf, 
HiveConf.ConfVars.HIVE_VECTORIZED_ADAPTOR_SUPPRESS_EVALUATE_EXCEPTIONS);
+    this.allowedCustomUDFs = getAllowedCustomUDFs(hiveConf);
   }
 
   private void copyHiveConfVars(VectorizationContext vContextEnvironment) {
@@ -1037,7 +1050,7 @@ public VectorExpression getVectorExpression(ExprNodeDesc 
exprDesc, VectorExpress
                 "Could not vectorize expression (mode = " + mode.name() + "): 
" + exprDesc.toString()
                   + " because hive.vectorized.adaptor.usage.mode=none");
           case CHOSEN:
-            if (isNonVectorizedPathUDF(expr, mode)) {
+            if (isNonVectorizedPathUDF(expr, mode, allowedCustomUDFs)) {
               ve = getCustomUDFExpression(expr, mode);
             } else {
               throw new HiveException(
@@ -1446,8 +1459,8 @@ public static GenericUDF getGenericUDFForCast(TypeInfo 
castType) throws HiveExce
    * Depending on performance requirements and frequency of use, these
    * may be implemented in the future with an optimized VectorExpression.
    */
-  public static boolean isNonVectorizedPathUDF(ExprNodeGenericFuncDesc expr,
-      VectorExpressionDescriptor.Mode mode) {
+  private static boolean isNonVectorizedPathUDF(ExprNodeGenericFuncDesc expr,
+      VectorExpressionDescriptor.Mode mode, Set<String> allowCustomUDFs) {
     GenericUDF gudf = expr.getGenericUDF();
     if (gudf instanceof GenericUDFBridge) {
       GenericUDFBridge bridge = (GenericUDFBridge) gudf;
@@ -1486,6 +1499,8 @@ public static boolean 
isNonVectorizedPathUDF(ExprNodeGenericFuncDesc expr,
       return true;
     } else if (gudf instanceof GenericUDFConcat && (mode == 
VectorExpressionDescriptor.Mode.PROJECTION)) {
       return true;
+    } else if (allowCustomUDFs.contains(gudf.getClass().getName())) {
+      return true;
     }
     return false;
   }

Reply via email to