Repository: hive Updated Branches: refs/heads/master bcbd245c8 -> 0bab072d1
HIVE-12444 - Global Limit optimization on ACID table without base directory may throw exception (Wei Zheng via Eugene Koifman) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/0bab072d Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/0bab072d Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/0bab072d Branch: refs/heads/master Commit: 0bab072d17df5022ee6262be68d0442f206e398b Parents: bcbd245 Author: Eugene Koifman <[email protected]> Authored: Wed Dec 2 12:37:31 2015 -0800 Committer: Eugene Koifman <[email protected]> Committed: Wed Dec 2 12:37:31 2015 -0800 ---------------------------------------------------------------------- .../test/resources/testconfiguration.properties | 3 +- .../hive/ql/optimizer/GenMapRedUtils.java | 43 ++++++++++++-------- 2 files changed, 27 insertions(+), 19 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/0bab072d/itests/src/test/resources/testconfiguration.properties ---------------------------------------------------------------------- diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties index 935fd28..8db4a9f 100644 --- a/itests/src/test/resources/testconfiguration.properties +++ b/itests/src/test/resources/testconfiguration.properties @@ -68,7 +68,8 @@ disabled.query.files=ql_rewrite_gbtoidx.q,\ rcfile_merge1.q,\ smb_mapjoin_8.q -minitez.query.files.shared=alter_merge_2_orc.q,\ +minitez.query.files.shared=acid_globallimit.q,\ + alter_merge_2_orc.q,\ alter_merge_orc.q,\ alter_merge_stats_orc.q,\ auto_join0.q,\ http://git-wip-us.apache.org/repos/asf/hive/blob/0bab072d/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java index ecdaa55..0cd7b62 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java @@ -489,6 +489,7 @@ public final class GenMapRedUtils { HiveConf conf, boolean local) throws SemanticException { ArrayList<Path> partDir = new ArrayList<Path>(); ArrayList<PartitionDesc> partDesc = new ArrayList<PartitionDesc>(); + boolean isAcidTable = false; Path tblDir = null; plan.setNameToSplitSample(parseCtx.getNameToSplitSample()); @@ -497,6 +498,7 @@ public final class GenMapRedUtils { try { TableScanOperator tsOp = (TableScanOperator) topOp; partsList = PartitionPruner.prune(tsOp, parseCtx, alias_id); + isAcidTable = ((TableScanOperator) topOp).getConf().isAcidTable(); } catch (SemanticException e) { throw e; } @@ -536,26 +538,31 @@ public final class GenMapRedUtils { long sizeNeeded = Integer.MAX_VALUE; int fileLimit = -1; if (parseCtx.getGlobalLimitCtx().isEnable()) { - long sizePerRow = HiveConf.getLongVar(parseCtx.getConf(), - HiveConf.ConfVars.HIVELIMITMAXROWSIZE); - sizeNeeded = parseCtx.getGlobalLimitCtx().getGlobalLimit() * sizePerRow; - // for the optimization that reduce number of input file, we limit number - // of files allowed. If more than specific number of files have to be - // selected, we skip this optimization. Since having too many files as - // inputs can cause unpredictable latency. It's not necessarily to be - // cheaper. - fileLimit = - HiveConf.getIntVar(parseCtx.getConf(), HiveConf.ConfVars.HIVELIMITOPTLIMITFILE); - - if (sizePerRow <= 0 || fileLimit <= 0) { - LOG.info("Skip optimization to reduce input size of 'limit'"); + if (isAcidTable) { + LOG.info("Skip Global Limit optimization for ACID table"); parseCtx.getGlobalLimitCtx().disableOpt(); - } else if (parts.isEmpty()) { - LOG.info("Empty input: skip limit optimiztion"); } else { - LOG.info("Try to reduce input size for 'limit' " + - "sizeNeeded: " + sizeNeeded + - " file limit : " + fileLimit); + long sizePerRow = HiveConf.getLongVar(parseCtx.getConf(), + HiveConf.ConfVars.HIVELIMITMAXROWSIZE); + sizeNeeded = parseCtx.getGlobalLimitCtx().getGlobalLimit() * sizePerRow; + // for the optimization that reduce number of input file, we limit number + // of files allowed. If more than specific number of files have to be + // selected, we skip this optimization. Since having too many files as + // inputs can cause unpredictable latency. It's not necessarily to be + // cheaper. + fileLimit = + HiveConf.getIntVar(parseCtx.getConf(), HiveConf.ConfVars.HIVELIMITOPTLIMITFILE); + + if (sizePerRow <= 0 || fileLimit <= 0) { + LOG.info("Skip optimization to reduce input size of 'limit'"); + parseCtx.getGlobalLimitCtx().disableOpt(); + } else if (parts.isEmpty()) { + LOG.info("Empty input: skip limit optimiztion"); + } else { + LOG.info("Try to reduce input size for 'limit' " + + "sizeNeeded: " + sizeNeeded + + " file limit : " + fileLimit); + } } } boolean isFirstPart = true;
