Repository: hive Updated Branches: refs/heads/master bc8de94ae -> 1c2d7ba35
HIVE-13116 : LLAP: allow ignoring the UDF check during compile time (Sergey Shelukhin, reviewed by Gunther Hagleitner) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/137b2380 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/137b2380 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/137b2380 Branch: refs/heads/master Commit: 137b238079f489429ed09c287bbecf1f46314300 Parents: bc8de94 Author: Sergey Shelukhin <ser...@apache.org> Authored: Tue Feb 23 11:25:56 2016 -0800 Committer: Sergey Shelukhin <ser...@apache.org> Committed: Tue Feb 23 11:25:56 2016 -0800 ---------------------------------------------------------------------- .../org/apache/hadoop/hive/conf/HiveConf.java | 4 ++ .../hive/ql/optimizer/physical/LlapDecider.java | 35 ++++-------- .../test/queries/clientpositive/llapdecider.q | 4 ++ .../clientpositive/llap/llapdecider.q.out | 60 ++++++++++++++++++++ .../clientpositive/tez/llapdecider.q.out | 60 ++++++++++++++++++++ 5 files changed, 139 insertions(+), 24 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/137b2380/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java ---------------------------------------------------------------------- diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index 9cb626e..932746d 100644 --- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -2529,6 +2529,10 @@ public class HiveConf extends Configuration { "Check input size, before considering vertex (-1 disables check)"), LLAP_AUTO_MAX_OUTPUT("hive.llap.auto.max.output.size", 1*1024*1024*1024L, "Check output size, before considering vertex (-1 disables check)"), + LLAP_SKIP_COMPILE_UDF_CHECK("hive.llap.skip.compile.udf.check", false, + "Whether to skip the compile-time check for non-built-in UDFs when deciding whether to\n" + + "execute tasks in LLAP. Skipping the check allows executing UDFs from pre-localized\n" + + "jars in LLAP; if the jars are not pre-localized, the UDFs will simply fail to load."), LLAP_EXECUTION_MODE("hive.llap.execution.mode", "none", new StringSet("auto", "none", "all", "map"), "Chooses whether query fragments will run in container or in llap"), http://git-wip-us.apache.org/repos/asf/hive/blob/137b2380/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/LlapDecider.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/LlapDecider.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/LlapDecider.java index b62f6a7..f2998af 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/LlapDecider.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/LlapDecider.java @@ -38,6 +38,7 @@ import java.util.Stack; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.conf.HiveConf.ConfVars; import org.apache.hadoop.hive.ql.exec.FilterOperator; import org.apache.hadoop.hive.ql.exec.FunctionInfo; import org.apache.hadoop.hive.ql.exec.FunctionRegistry; @@ -84,10 +85,7 @@ import org.apache.hadoop.hive.ql.plan.TezWork; */ public class LlapDecider implements PhysicalPlanResolver { - protected static transient final Logger LOG - = LoggerFactory.getLogger(LlapDecider.class); - - private PhysicalContext physicalContext; + protected static transient final Logger LOG = LoggerFactory.getLogger(LlapDecider.class); private HiveConf conf; @@ -101,13 +99,12 @@ public class LlapDecider implements PhysicalPlanResolver { private LlapMode mode; class LlapDecisionDispatcher implements Dispatcher { - - private final PhysicalContext pctx; private final HiveConf conf; + private final boolean doSkipUdfCheck; public LlapDecisionDispatcher(PhysicalContext pctx) { - this.pctx = pctx; - this.conf = pctx.getConf(); + conf = pctx.getConf(); + doSkipUdfCheck = HiveConf.getBoolVar(conf, ConfVars.LLAP_SKIP_COMPILE_UDF_CHECK); } @Override @@ -237,21 +234,13 @@ public class LlapDecider implements PhysicalPlanResolver { ExprNodeDesc cur = exprs.removeFirst(); if (cur == null) continue; if (cur.getChildren() != null) { - exprs.addAll(cur.getChildren()); - } - - if (cur instanceof ExprNodeGenericFuncDesc) { - // getRequiredJars is currently broken (requires init in some cases before you can call it) - // String[] jars = ((ExprNodeGenericFuncDesc)cur).getGenericUDF().getRequiredJars(); - // if (jars != null && !(jars.length == 0)) { - // LOG.info(String.format("%s requires %s", cur.getExprString(), Joiner.on(", ").join(jars))); - // return false; - // } + exprs.addAll(cur.getChildren()); + } - if (!FunctionRegistry.isBuiltInFuncExpr((ExprNodeGenericFuncDesc)cur)) { - LOG.info("Not a built-in function: " + cur.getExprString()); - return false; - } + if (!doSkipUdfCheck && cur instanceof ExprNodeGenericFuncDesc + && !FunctionRegistry.isBuiltInFuncExpr((ExprNodeGenericFuncDesc)cur)) { + LOG.info("Not a built-in function: " + cur.getExprString()); + return false; } } return true; @@ -420,8 +409,6 @@ public class LlapDecider implements PhysicalPlanResolver { @Override public PhysicalContext resolve(PhysicalContext pctx) throws SemanticException { - - this.physicalContext = pctx; this.conf = pctx.getConf(); this.mode = LlapMode.valueOf(HiveConf.getVar(conf, HiveConf.ConfVars.LLAP_EXECUTION_MODE)); http://git-wip-us.apache.org/repos/asf/hive/blob/137b2380/ql/src/test/queries/clientpositive/llapdecider.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/llapdecider.q b/ql/src/test/queries/clientpositive/llapdecider.q index ab9f5e0..16b2521 100644 --- a/ql/src/test/queries/clientpositive/llapdecider.q +++ b/ql/src/test/queries/clientpositive/llapdecider.q @@ -62,4 +62,8 @@ EXPLAIN SELECT sum(cast(key as int) + 1) from src_orc where cast(key as int) > 1 EXPLAIN SELECT sum(cast(test_udf_get_java_string(cast(key as string)) as int) + 1) from src_orc where cast(key as int) > 1; EXPLAIN SELECT sum(cast(key as int) + 1) from src_orc where cast(test_udf_get_java_string(cast(key as string)) as int) > 1; +set hive.llap.skip.compile.udf.check=true; + +EXPLAIN SELECT sum(cast(test_udf_get_java_string(cast(key as string)) as int) + 1) from src_orc where cast(key as int) > 1; + set hive.execution.mode=container; \ No newline at end of file http://git-wip-us.apache.org/repos/asf/hive/blob/137b2380/ql/src/test/results/clientpositive/llap/llapdecider.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/llap/llapdecider.q.out b/ql/src/test/results/clientpositive/llap/llapdecider.q.out index 61a31b4..49fc70b 100644 --- a/ql/src/test/results/clientpositive/llap/llapdecider.q.out +++ b/ql/src/test/results/clientpositive/llap/llapdecider.q.out @@ -1207,3 +1207,63 @@ STAGE PLANS: Processor Tree: ListSink +PREHOOK: query: EXPLAIN SELECT sum(cast(test_udf_get_java_string(cast(key as string)) as int) + 1) from src_orc where cast(key as int) > 1 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN SELECT sum(cast(test_udf_get_java_string(cast(key as string)) as int) + 1) from src_orc where cast(key as int) > 1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: src_orc + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (UDFToInteger(key) > 1) (type: boolean) + Statistics: Num rows: 166 Data size: 14442 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: (UDFToInteger(GenericUDFTestGetJavaString(key)) + 1) (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 166 Data size: 14442 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(_col0) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) + Execution mode: llap + Reducer 2 + Execution mode: uber + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + http://git-wip-us.apache.org/repos/asf/hive/blob/137b2380/ql/src/test/results/clientpositive/tez/llapdecider.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/tez/llapdecider.q.out b/ql/src/test/results/clientpositive/tez/llapdecider.q.out index 61a31b4..49fc70b 100644 --- a/ql/src/test/results/clientpositive/tez/llapdecider.q.out +++ b/ql/src/test/results/clientpositive/tez/llapdecider.q.out @@ -1207,3 +1207,63 @@ STAGE PLANS: Processor Tree: ListSink +PREHOOK: query: EXPLAIN SELECT sum(cast(test_udf_get_java_string(cast(key as string)) as int) + 1) from src_orc where cast(key as int) > 1 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN SELECT sum(cast(test_udf_get_java_string(cast(key as string)) as int) + 1) from src_orc where cast(key as int) > 1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: src_orc + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (UDFToInteger(key) > 1) (type: boolean) + Statistics: Num rows: 166 Data size: 14442 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: (UDFToInteger(GenericUDFTestGetJavaString(key)) + 1) (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 166 Data size: 14442 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(_col0) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) + Execution mode: llap + Reducer 2 + Execution mode: uber + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink +