Repository: hive
Updated Branches:
  refs/heads/master bc8de94ae -> 1c2d7ba35


HIVE-13116 : LLAP: allow ignoring the UDF check during compile time (Sergey 
Shelukhin, reviewed by Gunther Hagleitner)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/137b2380
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/137b2380
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/137b2380

Branch: refs/heads/master
Commit: 137b238079f489429ed09c287bbecf1f46314300
Parents: bc8de94
Author: Sergey Shelukhin <ser...@apache.org>
Authored: Tue Feb 23 11:25:56 2016 -0800
Committer: Sergey Shelukhin <ser...@apache.org>
Committed: Tue Feb 23 11:25:56 2016 -0800

----------------------------------------------------------------------
 .../org/apache/hadoop/hive/conf/HiveConf.java   |  4 ++
 .../hive/ql/optimizer/physical/LlapDecider.java | 35 ++++--------
 .../test/queries/clientpositive/llapdecider.q   |  4 ++
 .../clientpositive/llap/llapdecider.q.out       | 60 ++++++++++++++++++++
 .../clientpositive/tez/llapdecider.q.out        | 60 ++++++++++++++++++++
 5 files changed, 139 insertions(+), 24 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/137b2380/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
----------------------------------------------------------------------
diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java 
b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
index 9cb626e..932746d 100644
--- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
+++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
@@ -2529,6 +2529,10 @@ public class HiveConf extends Configuration {
         "Check input size, before considering vertex (-1 disables check)"),
     LLAP_AUTO_MAX_OUTPUT("hive.llap.auto.max.output.size", 1*1024*1024*1024L,
         "Check output size, before considering vertex (-1 disables check)"),
+    LLAP_SKIP_COMPILE_UDF_CHECK("hive.llap.skip.compile.udf.check", false,
+        "Whether to skip the compile-time check for non-built-in UDFs when 
deciding whether to\n" +
+        "execute tasks in LLAP. Skipping the check allows executing UDFs from 
pre-localized\n" +
+        "jars in LLAP; if the jars are not pre-localized, the UDFs will simply 
fail to load."),
     LLAP_EXECUTION_MODE("hive.llap.execution.mode", "none",
         new StringSet("auto", "none", "all", "map"),
         "Chooses whether query fragments will run in container or in llap"),

http://git-wip-us.apache.org/repos/asf/hive/blob/137b2380/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/LlapDecider.java
----------------------------------------------------------------------
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/LlapDecider.java 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/LlapDecider.java
index b62f6a7..f2998af 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/LlapDecider.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/LlapDecider.java
@@ -38,6 +38,7 @@ import java.util.Stack;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.conf.HiveConf.ConfVars;
 import org.apache.hadoop.hive.ql.exec.FilterOperator;
 import org.apache.hadoop.hive.ql.exec.FunctionInfo;
 import org.apache.hadoop.hive.ql.exec.FunctionRegistry;
@@ -84,10 +85,7 @@ import org.apache.hadoop.hive.ql.plan.TezWork;
  */
 public class LlapDecider implements PhysicalPlanResolver {
 
-  protected static transient final Logger LOG
-    = LoggerFactory.getLogger(LlapDecider.class);
-
-  private PhysicalContext physicalContext;
+  protected static transient final Logger LOG = 
LoggerFactory.getLogger(LlapDecider.class);
 
   private HiveConf conf;
 
@@ -101,13 +99,12 @@ public class LlapDecider implements PhysicalPlanResolver {
   private LlapMode mode;
 
   class LlapDecisionDispatcher implements Dispatcher {
-
-    private final PhysicalContext pctx;
     private final HiveConf conf;
+    private final boolean doSkipUdfCheck;
 
     public LlapDecisionDispatcher(PhysicalContext pctx) {
-      this.pctx = pctx;
-      this.conf = pctx.getConf();
+      conf = pctx.getConf();
+      doSkipUdfCheck = HiveConf.getBoolVar(conf, 
ConfVars.LLAP_SKIP_COMPILE_UDF_CHECK);
     }
 
     @Override
@@ -237,21 +234,13 @@ public class LlapDecider implements PhysicalPlanResolver {
         ExprNodeDesc cur = exprs.removeFirst();
         if (cur == null) continue;
         if (cur.getChildren() != null) {
-         exprs.addAll(cur.getChildren());
-       }
-
-        if (cur instanceof ExprNodeGenericFuncDesc) {
-         // getRequiredJars is currently broken (requires init in some cases 
before you can call it)
-          // String[] jars = 
((ExprNodeGenericFuncDesc)cur).getGenericUDF().getRequiredJars();
-          // if (jars != null && !(jars.length == 0)) {
-          //   LOG.info(String.format("%s requires %s", cur.getExprString(), 
Joiner.on(", ").join(jars)));
-          //   return false;
-          // }
+          exprs.addAll(cur.getChildren());
+        }
 
-          if 
(!FunctionRegistry.isBuiltInFuncExpr((ExprNodeGenericFuncDesc)cur)) {
-            LOG.info("Not a built-in function: " + cur.getExprString());
-            return false;
-          }
+        if (!doSkipUdfCheck && cur instanceof ExprNodeGenericFuncDesc
+            && 
!FunctionRegistry.isBuiltInFuncExpr((ExprNodeGenericFuncDesc)cur)) {
+          LOG.info("Not a built-in function: " + cur.getExprString());
+          return false;
         }
       }
       return true;
@@ -420,8 +409,6 @@ public class LlapDecider implements PhysicalPlanResolver {
 
   @Override
   public PhysicalContext resolve(PhysicalContext pctx) throws 
SemanticException {
-
-    this.physicalContext = pctx;
     this.conf = pctx.getConf();
 
     this.mode = LlapMode.valueOf(HiveConf.getVar(conf, 
HiveConf.ConfVars.LLAP_EXECUTION_MODE));

http://git-wip-us.apache.org/repos/asf/hive/blob/137b2380/ql/src/test/queries/clientpositive/llapdecider.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/llapdecider.q 
b/ql/src/test/queries/clientpositive/llapdecider.q
index ab9f5e0..16b2521 100644
--- a/ql/src/test/queries/clientpositive/llapdecider.q
+++ b/ql/src/test/queries/clientpositive/llapdecider.q
@@ -62,4 +62,8 @@ EXPLAIN SELECT sum(cast(key as int) + 1) from src_orc where 
cast(key as int) > 1
 EXPLAIN SELECT sum(cast(test_udf_get_java_string(cast(key as string)) as int) 
+ 1) from src_orc where cast(key as int) > 1;
 EXPLAIN SELECT sum(cast(key as int) + 1) from src_orc where 
cast(test_udf_get_java_string(cast(key as string)) as int) > 1;
 
+set hive.llap.skip.compile.udf.check=true;
+
+EXPLAIN SELECT sum(cast(test_udf_get_java_string(cast(key as string)) as int) 
+ 1) from src_orc where cast(key as int) > 1;
+
 set hive.execution.mode=container;
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/hive/blob/137b2380/ql/src/test/results/clientpositive/llap/llapdecider.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/llapdecider.q.out 
b/ql/src/test/results/clientpositive/llap/llapdecider.q.out
index 61a31b4..49fc70b 100644
--- a/ql/src/test/results/clientpositive/llap/llapdecider.q.out
+++ b/ql/src/test/results/clientpositive/llap/llapdecider.q.out
@@ -1207,3 +1207,63 @@ STAGE PLANS:
       Processor Tree:
         ListSink
 
+PREHOOK: query: EXPLAIN SELECT sum(cast(test_udf_get_java_string(cast(key as 
string)) as int) + 1) from src_orc where cast(key as int) > 1
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN SELECT sum(cast(test_udf_get_java_string(cast(key as 
string)) as int) + 1) from src_orc where cast(key as int) > 1
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Edges:
+        Reducer 2 <- Map 1 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: src_orc
+                  Statistics: Num rows: 500 Data size: 43500 Basic stats: 
COMPLETE Column stats: COMPLETE
+                  Filter Operator
+                    predicate: (UDFToInteger(key) > 1) (type: boolean)
+                    Statistics: Num rows: 166 Data size: 14442 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    Select Operator
+                      expressions: 
(UDFToInteger(GenericUDFTestGetJavaString(key)) + 1) (type: int)
+                      outputColumnNames: _col0
+                      Statistics: Num rows: 166 Data size: 14442 Basic stats: 
COMPLETE Column stats: COMPLETE
+                      Group By Operator
+                        aggregations: sum(_col0)
+                        mode: hash
+                        outputColumnNames: _col0
+                        Statistics: Num rows: 1 Data size: 8 Basic stats: 
COMPLETE Column stats: COMPLETE
+                        Reduce Output Operator
+                          sort order: 
+                          Statistics: Num rows: 1 Data size: 8 Basic stats: 
COMPLETE Column stats: COMPLETE
+                          value expressions: _col0 (type: bigint)
+            Execution mode: llap
+        Reducer 2 
+            Execution mode: uber
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: sum(VALUE._col0)
+                mode: mergepartial
+                outputColumnNames: _col0
+                Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE 
Column stats: COMPLETE
+                File Output Operator
+                  compressed: false
+                  Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE 
Column stats: COMPLETE
+                  table:
+                      input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+

http://git-wip-us.apache.org/repos/asf/hive/blob/137b2380/ql/src/test/results/clientpositive/tez/llapdecider.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/tez/llapdecider.q.out 
b/ql/src/test/results/clientpositive/tez/llapdecider.q.out
index 61a31b4..49fc70b 100644
--- a/ql/src/test/results/clientpositive/tez/llapdecider.q.out
+++ b/ql/src/test/results/clientpositive/tez/llapdecider.q.out
@@ -1207,3 +1207,63 @@ STAGE PLANS:
       Processor Tree:
         ListSink
 
+PREHOOK: query: EXPLAIN SELECT sum(cast(test_udf_get_java_string(cast(key as 
string)) as int) + 1) from src_orc where cast(key as int) > 1
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN SELECT sum(cast(test_udf_get_java_string(cast(key as 
string)) as int) + 1) from src_orc where cast(key as int) > 1
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Edges:
+        Reducer 2 <- Map 1 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: src_orc
+                  Statistics: Num rows: 500 Data size: 43500 Basic stats: 
COMPLETE Column stats: COMPLETE
+                  Filter Operator
+                    predicate: (UDFToInteger(key) > 1) (type: boolean)
+                    Statistics: Num rows: 166 Data size: 14442 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    Select Operator
+                      expressions: 
(UDFToInteger(GenericUDFTestGetJavaString(key)) + 1) (type: int)
+                      outputColumnNames: _col0
+                      Statistics: Num rows: 166 Data size: 14442 Basic stats: 
COMPLETE Column stats: COMPLETE
+                      Group By Operator
+                        aggregations: sum(_col0)
+                        mode: hash
+                        outputColumnNames: _col0
+                        Statistics: Num rows: 1 Data size: 8 Basic stats: 
COMPLETE Column stats: COMPLETE
+                        Reduce Output Operator
+                          sort order: 
+                          Statistics: Num rows: 1 Data size: 8 Basic stats: 
COMPLETE Column stats: COMPLETE
+                          value expressions: _col0 (type: bigint)
+            Execution mode: llap
+        Reducer 2 
+            Execution mode: uber
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: sum(VALUE._col0)
+                mode: mergepartial
+                outputColumnNames: _col0
+                Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE 
Column stats: COMPLETE
+                File Output Operator
+                  compressed: false
+                  Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE 
Column stats: COMPLETE
+                  table:
+                      input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+

Reply via email to