HIVE-11531: Add mysql-style LIMIT support to Hive, or improve ROW_NUMBER performance-wise (Hui Zheng, reviewed by Sergey Shelukhin, Jesus Camacho Rodriguez)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/6273bee3 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/6273bee3 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/6273bee3 Branch: refs/heads/branch-2.0 Commit: 6273bee32601c759f16293fae119d7a7bb7473e1 Parents: a59d4ff Author: Hui Zheng <[email protected]> Authored: Mon Dec 21 02:11:51 2015 +0100 Committer: Jesus Camacho Rodriguez <[email protected]> Committed: Mon Dec 21 02:11:51 2015 +0100 ---------------------------------------------------------------------- .../hadoop/hive/ql/exec/LimitOperator.java | 6 +- .../ql/exec/vector/VectorLimitOperator.java | 26 +- .../hive/ql/optimizer/GenMapRedUtils.java | 3 +- .../hive/ql/optimizer/GlobalLimitOptimizer.java | 22 +- .../ql/optimizer/LimitPushdownOptimizer.java | 5 +- .../calcite/reloperators/HiveSortLimit.java | 4 + .../calcite/translator/ASTBuilder.java | 5 +- .../calcite/translator/ASTConverter.java | 11 +- .../hadoop/hive/ql/parse/CalcitePlanner.java | 15 +- .../hadoop/hive/ql/parse/GlobalLimitCtx.java | 10 +- .../org/apache/hadoop/hive/ql/parse/HiveLexer.g | 1 + .../apache/hadoop/hive/ql/parse/HiveParser.g | 5 +- .../hadoop/hive/ql/parse/IdentifiersParser.g | 2 +- .../hadoop/hive/ql/parse/MapReduceCompiler.java | 3 +- .../hadoop/hive/ql/parse/QBParseInfo.java | 20 +- .../hadoop/hive/ql/parse/SemanticAnalyzer.java | 27 +- .../apache/hadoop/hive/ql/plan/LimitDesc.java | 19 + .../test/queries/clientpositive/offset_limit.q | 25 + .../offset_limit_global_optimizer.q | 45 + .../clientpositive/offset_limit_ppd_optimizer.q | 80 + .../clientpositive/vectorization_offset_limit.q | 10 + .../results/clientpositive/offset_limit.q.out | 257 ++ .../offset_limit_global_optimizer.q.out | 3390 ++++++++++++++++++ .../offset_limit_ppd_optimizer.q.out | 1377 +++++++ .../vectorization_offset_limit.q.out | 118 + 25 files changed, 5442 insertions(+), 44 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/6273bee3/ql/src/java/org/apache/hadoop/hive/ql/exec/LimitOperator.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/LimitOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/LimitOperator.java index 8fe96be..fc85bea 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/LimitOperator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/LimitOperator.java @@ -34,6 +34,7 @@ public class LimitOperator extends Operator<LimitDesc> implements Serializable { private static final long serialVersionUID = 1L; protected transient int limit; + protected transient int offset; protected transient int leastRow; protected transient int currCount; protected transient boolean isMap; @@ -43,15 +44,18 @@ public class LimitOperator extends Operator<LimitDesc> implements Serializable { super.initializeOp(hconf); limit = conf.getLimit(); leastRow = conf.getLeastRows(); + offset = (conf.getOffset() == null) ? 0 : conf.getOffset(); currCount = 0; isMap = hconf.getBoolean("mapred.task.is.map", true); } @Override public void process(Object row, int tag) throws HiveException { - if (currCount < limit) { + if (offset <= currCount && currCount < (offset + limit)) { forward(row, inputObjInspectors[tag]); currCount++; + } else if (offset > currCount) { + currCount++; } else { setDone(true); } http://git-wip-us.apache.org/repos/asf/hive/blob/6273bee3/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorLimitOperator.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorLimitOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorLimitOperator.java index 2f4e46b..4cb91d4 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorLimitOperator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorLimitOperator.java @@ -42,12 +42,30 @@ public class VectorLimitOperator extends LimitOperator { public void process(Object row, int tag) throws HiveException { VectorizedRowBatch batch = (VectorizedRowBatch) row; - if (currCount < limit) { - batch.size = Math.min(batch.size, limit - currCount); - forward(row, inputObjInspectors[tag]); + if (currCount + batch.size < offset) { currCount += batch.size; - } else { + } else if (currCount >= offset + limit) { setDone(true); + } else { + int skipSize = 0; + if (currCount < offset) { + skipSize = offset - currCount; + } + //skip skipSize rows of batch + batch.size = Math.min(batch.size, offset + limit - currCount); + if (batch.selectedInUse == false) { + batch.selectedInUse = true; + batch.selected = new int[batch.size]; + for (int i = 0; i < batch.size - skipSize; i++) { + batch.selected[i] = skipSize + i; + } + } else { + for (int i = 0; i < batch.size - skipSize; i++) { + batch.selected[i] = batch.selected[skipSize + i]; + } + } + forward(row, inputObjInspectors[tag]); + currCount += batch.size; } } } http://git-wip-us.apache.org/repos/asf/hive/blob/6273bee3/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java index 0cd7b62..a42df71 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java @@ -544,7 +544,8 @@ public final class GenMapRedUtils { } else { long sizePerRow = HiveConf.getLongVar(parseCtx.getConf(), HiveConf.ConfVars.HIVELIMITMAXROWSIZE); - sizeNeeded = parseCtx.getGlobalLimitCtx().getGlobalLimit() * sizePerRow; + sizeNeeded = (parseCtx.getGlobalLimitCtx().getGlobalOffset() + + parseCtx.getGlobalLimitCtx().getGlobalLimit()) * sizePerRow; // for the optimization that reduce number of input file, we limit number // of files allowed. If more than specific number of files have to be // selected, we skip this optimization. Since having too many files as http://git-wip-us.apache.org/repos/asf/hive/blob/6273bee3/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GlobalLimitOptimizer.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GlobalLimitOptimizer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GlobalLimitOptimizer.java index 74d6292..2062df1 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GlobalLimitOptimizer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GlobalLimitOptimizer.java @@ -27,6 +27,7 @@ import org.slf4j.LoggerFactory; import org.apache.hadoop.hive.ql.Context; import org.apache.hadoop.hive.ql.exec.FilterOperator; import org.apache.hadoop.hive.ql.exec.GroupByOperator; +import org.apache.hadoop.hive.ql.plan.LimitDesc; import org.apache.hadoop.hive.ql.exec.LimitOperator; import org.apache.hadoop.hive.ql.exec.Operator; import org.apache.hadoop.hive.ql.exec.OperatorUtils; @@ -93,16 +94,19 @@ public class GlobalLimitOptimizer extends Transform { // SELECT * FROM (SELECT col1 as col2 (SELECT * FROM ...) t1 LIMIT ...) t2); // TableScanOperator ts = (TableScanOperator) topOps.values().toArray()[0]; - Integer tempGlobalLimit = checkQbpForGlobalLimit(ts); + LimitOperator tempGlobalLimit = checkQbpForGlobalLimit(ts); // query qualify for the optimization - if (tempGlobalLimit != null && tempGlobalLimit != 0) { + if (tempGlobalLimit != null) { + LimitDesc tempGlobalLimitDesc = tempGlobalLimit.getConf(); Table tab = ts.getConf().getTableMetadata(); Set<FilterOperator> filterOps = OperatorUtils.findOperators(ts, FilterOperator.class); if (!tab.isPartitioned()) { if (filterOps.size() == 0) { - globalLimitCtx.enableOpt(tempGlobalLimit); + Integer tempOffset = tempGlobalLimitDesc.getOffset(); + globalLimitCtx.enableOpt(tempGlobalLimitDesc.getLimit(), + (tempOffset == null) ? 0 : tempOffset); } } else { // check if the pruner only contains partition columns @@ -114,11 +118,15 @@ public class GlobalLimitOptimizer extends Transform { // If there is any unknown partition, create a map-reduce job for // the filter to prune correctly if (!partsList.hasUnknownPartitions()) { - globalLimitCtx.enableOpt(tempGlobalLimit); + Integer tempOffset = tempGlobalLimitDesc.getOffset(); + globalLimitCtx.enableOpt(tempGlobalLimitDesc.getLimit(), + (tempOffset == null) ? 0 : tempOffset); } } } if (globalLimitCtx.isEnable()) { + LOG.info("Qualify the optimize that reduces input size for 'offset' for offset " + + globalLimitCtx.getGlobalOffset()); LOG.info("Qualify the optimize that reduces input size for 'limit' for limit " + globalLimitCtx.getGlobalLimit()); } @@ -143,7 +151,7 @@ public class GlobalLimitOptimizer extends Transform { * if there is no limit, return 0 * otherwise, return null */ - private static Integer checkQbpForGlobalLimit(TableScanOperator ts) { + private static LimitOperator checkQbpForGlobalLimit(TableScanOperator ts) { Set<Class<? extends Operator<?>>> searchedClasses = new ImmutableSet.Builder<Class<? extends Operator<?>>>() .add(ReduceSinkOperator.class) @@ -185,10 +193,10 @@ public class GlobalLimitOptimizer extends Transform { // Otherwise, return null Collection<Operator<?>> limitOps = ops.get(LimitOperator.class); if (limitOps.size() == 1) { - return ((LimitOperator) limitOps.iterator().next()).getConf().getLimit(); + return (LimitOperator) limitOps.iterator().next(); } else if (limitOps.size() == 0) { - return 0; + return null; } return null; } http://git-wip-us.apache.org/repos/asf/hive/blob/6273bee3/ql/src/java/org/apache/hadoop/hive/ql/optimizer/LimitPushdownOptimizer.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/LimitPushdownOptimizer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/LimitPushdownOptimizer.java index 5a55061..4ca2d7d 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/LimitPushdownOptimizer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/LimitPushdownOptimizer.java @@ -41,6 +41,7 @@ import org.apache.hadoop.hive.ql.lib.Rule; import org.apache.hadoop.hive.ql.lib.RuleRegExp; import org.apache.hadoop.hive.ql.parse.ParseContext; import org.apache.hadoop.hive.ql.parse.SemanticException; +import org.apache.hadoop.hive.ql.plan.LimitDesc; /** * Make RS calculate top-K selection for limit clause. @@ -130,7 +131,9 @@ public class LimitPushdownOptimizer extends Transform { return false; } LimitOperator limit = (LimitOperator) nd; - rs.getConf().setTopN(limit.getConf().getLimit()); + LimitDesc limitDesc = limit.getConf(); + Integer offset = limitDesc.getOffset(); + rs.getConf().setTopN(limitDesc.getLimit() + ((offset == null) ? 0 : offset)); rs.getConf().setTopNMemoryUsage(((LimitPushdownContext) procCtx).threshold); if (rs.getNumChild() == 1 && rs.getChildren().get(0) instanceof GroupByOperator) { rs.getConf().setMapGroupBy(true); http://git-wip-us.apache.org/repos/asf/hive/blob/6273bee3/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveSortLimit.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveSortLimit.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveSortLimit.java index 0c8728d..6ed2914 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveSortLimit.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveSortLimit.java @@ -83,6 +83,10 @@ public class HiveSortLimit extends Sort implements HiveRelNode { return fetch; } + public RexNode getOffsetExpr() { + return offset; + } + public void setInputRefToCallMap(ImmutableMap<Integer, RexNode> refToCall) { this.mapOfInputRefToRexCall = refToCall; } http://git-wip-us.apache.org/repos/asf/hive/blob/6273bee3/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ASTBuilder.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ASTBuilder.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ASTBuilder.java index 1f5d919..425514d 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ASTBuilder.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ASTBuilder.java @@ -128,9 +128,10 @@ class ASTBuilder { return ASTBuilder.construct(HiveParser.TOK_HAVING, "TOK_HAVING").add(cond).node(); } - static ASTNode limit(Object value) { + static ASTNode limit(Object offset, Object limit) { return ASTBuilder.construct(HiveParser.TOK_LIMIT, "TOK_LIMIT") - .add(HiveParser.Number, value.toString()).node(); + .add(HiveParser.Number, offset.toString()) + .add(HiveParser.Number, limit.toString()).node(); } static ASTNode selectExpr(ASTNode expr, String alias) { http://git-wip-us.apache.org/repos/asf/hive/blob/6273bee3/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ASTConverter.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ASTConverter.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ASTConverter.java index d026e58..3f2267d 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ASTConverter.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ASTConverter.java @@ -251,10 +251,13 @@ public class ASTConverter { hiveAST.order = orderAst; } - RexNode limitExpr = hiveSortLimit.getFetchExpr(); - if (limitExpr != null) { - Object val = ((RexLiteral) limitExpr).getValue2(); - hiveAST.limit = ASTBuilder.limit(val); + RexNode offsetExpr = hiveSortLimit.getOffsetExpr(); + RexNode fetchExpr = hiveSortLimit.getFetchExpr(); + if (fetchExpr != null) { + Object offset = (offsetExpr == null) ? + new Integer(0) : ((RexLiteral) offsetExpr).getValue2(); + Object fetch = ((RexLiteral) fetchExpr).getValue2(); + hiveAST.limit = ASTBuilder.limit(offset, fetch); } } } http://git-wip-us.apache.org/repos/asf/hive/blob/6273bee3/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java index 20945a0..0ca6949 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java @@ -26,6 +26,7 @@ import java.util.BitSet; import java.util.Collections; import java.util.EnumSet; import java.util.HashMap; +import java.util.AbstractMap.SimpleEntry; import java.util.HashSet; import java.util.Iterator; import java.util.LinkedHashMap; @@ -2440,13 +2441,17 @@ public class CalcitePlanner extends SemanticAnalyzer { private RelNode genLimitLogicalPlan(QB qb, RelNode srcRel) throws SemanticException { HiveRelNode sortRel = null; QBParseInfo qbp = getQBParseInfo(qb); - Integer limit = qbp.getDestToLimit().get(qbp.getClauseNames().iterator().next()); - - if (limit != null) { - RexNode fetch = cluster.getRexBuilder().makeExactLiteral(BigDecimal.valueOf(limit)); + SimpleEntry<Integer,Integer> entry = + qbp.getDestToLimit().get(qbp.getClauseNames().iterator().next()); + Integer offset = (entry == null) ? 0 : entry.getKey(); + Integer fetch = (entry == null) ? null : entry.getValue(); + + if (fetch != null) { + RexNode offsetRN = cluster.getRexBuilder().makeExactLiteral(BigDecimal.valueOf(offset)); + RexNode fetchRN = cluster.getRexBuilder().makeExactLiteral(BigDecimal.valueOf(fetch)); RelTraitSet traitSet = cluster.traitSetOf(HiveRelNode.CONVENTION); RelCollation canonizedCollation = traitSet.canonize(RelCollations.EMPTY); - sortRel = new HiveSortLimit(cluster, traitSet, srcRel, canonizedCollation, null, fetch); + sortRel = new HiveSortLimit(cluster, traitSet, srcRel, canonizedCollation, offsetRN, fetchRN); RowResolver outputRR = new RowResolver(); if (!RowResolver.add(outputRR, relToHiveRR.get(srcRel))) { http://git-wip-us.apache.org/repos/asf/hive/blob/6273bee3/ql/src/java/org/apache/hadoop/hive/ql/parse/GlobalLimitCtx.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/GlobalLimitCtx.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/GlobalLimitCtx.java index 6cd636c..c37f9ce 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/GlobalLimitCtx.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/GlobalLimitCtx.java @@ -27,6 +27,7 @@ public class GlobalLimitCtx { private boolean enable; private int globalLimit; + private int globalOffset; private boolean hasTransformOrUDTF; private LimitDesc lastReduceLimitDesc; @@ -38,6 +39,10 @@ public class GlobalLimitCtx { return globalLimit; } + public int getGlobalOffset() { + return globalOffset; + } + public boolean ifHasTransformOrUDTF() { return hasTransformOrUDTF; } @@ -58,20 +63,23 @@ public class GlobalLimitCtx { return enable; } - public void enableOpt(int globalLimit) { + public void enableOpt(int globalLimit, int globalOffset) { this.enable = true; this.globalLimit = globalLimit; + this.globalOffset = globalOffset; } public void disableOpt() { this.enable = false; this.globalLimit = -1; + this.globalOffset = 0; this.lastReduceLimitDesc = null; } public void reset() { enable = false; globalLimit = -1; + globalOffset = 0; hasTransformOrUDTF = false; lastReduceLimitDesc = null; } http://git-wip-us.apache.org/repos/asf/hive/blob/6273bee3/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveLexer.g ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveLexer.g b/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveLexer.g index 395152f..1c72b1c 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveLexer.g +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveLexer.g @@ -185,6 +185,7 @@ KW_DEFERRED: 'DEFERRED'; KW_SERDEPROPERTIES: 'SERDEPROPERTIES'; KW_DBPROPERTIES: 'DBPROPERTIES'; KW_LIMIT: 'LIMIT'; +KW_OFFSET: 'OFFSET'; KW_SET: 'SET'; KW_UNSET: 'UNSET'; KW_TBLPROPERTIES: 'TBLPROPERTIES'; http://git-wip-us.apache.org/repos/asf/hive/blob/6273bee3/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g b/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g index f6ea2a3..d5051ce 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g @@ -249,6 +249,7 @@ TOK_INDEXPROPERTIES; TOK_INDEXPROPLIST; TOK_TABTYPE; TOK_LIMIT; +TOK_OFFSET; TOK_TABLEPROPERTY; TOK_IFEXISTS; TOK_IFNOTEXISTS; @@ -497,6 +498,7 @@ import org.apache.hadoop.hive.conf.HiveConf; xlateMap.put("KW_WITH", "WITH"); xlateMap.put("KW_SERDEPROPERTIES", "SERDEPROPERTIES"); xlateMap.put("KW_LIMIT", "LIMIT"); + xlateMap.put("KW_OFFSET", "OFFSET"); xlateMap.put("KW_SET", "SET"); xlateMap.put("KW_PROPERTIES", "TBLPROPERTIES"); xlateMap.put("KW_VALUE_TYPE", "\$VALUE\$"); @@ -2362,7 +2364,8 @@ limitClause @init { pushMsg("limit clause", state); } @after { popMsg(state); } : - KW_LIMIT num=Number -> ^(TOK_LIMIT $num) + KW_LIMIT ((offset=Number COMMA)? num=Number) -> ^(TOK_LIMIT ($offset)? $num) + | KW_LIMIT num=Number KW_OFFSET offset=Number -> ^(TOK_LIMIT ($offset)? $num) ; //DELETE FROM <tableName> WHERE ...; http://git-wip-us.apache.org/repos/asf/hive/blob/6273bee3/ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g b/ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g index bac0d22..15ca754 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g @@ -653,7 +653,7 @@ nonReserved | KW_ENABLE | KW_ESCAPED | KW_EXCLUSIVE | KW_EXPLAIN | KW_EXPORT | KW_FIELDS | KW_FILE | KW_FILEFORMAT | KW_FIRST | KW_FORMAT | KW_FORMATTED | KW_FUNCTIONS | KW_HOLD_DDLTIME | KW_HOUR | KW_IDXPROPERTIES | KW_IGNORE | KW_INDEX | KW_INDEXES | KW_INPATH | KW_INPUTDRIVER | KW_INPUTFORMAT | KW_ITEMS | KW_JAR - | KW_KEYS | KW_KEY_TYPE | KW_LIMIT | KW_LINES | KW_LOAD | KW_LOCATION | KW_LOCK | KW_LOCKS | KW_LOGICAL | KW_LONG + | KW_KEYS | KW_KEY_TYPE | KW_LIMIT | KW_OFFSET | KW_LINES | KW_LOAD | KW_LOCATION | KW_LOCK | KW_LOCKS | KW_LOGICAL | KW_LONG | KW_MAPJOIN | KW_MATERIALIZED | KW_METADATA | KW_MINUS | KW_MINUTE | KW_MONTH | KW_MSCK | KW_NOSCAN | KW_NO_DROP | KW_OFFLINE | KW_OPTION | KW_OUTPUTDRIVER | KW_OUTPUTFORMAT | KW_OVERWRITE | KW_OWNER | KW_PARTITIONED | KW_PARTITIONS | KW_PLUS | KW_PRETTY | KW_PRINCIPALS | KW_PROTECTION | KW_PURGE | KW_READ | KW_READONLY | KW_REBUILD | KW_RECORDREADER | KW_RECORDWRITER http://git-wip-us.apache.org/repos/asf/hive/blob/6273bee3/ql/src/java/org/apache/hadoop/hive/ql/parse/MapReduceCompiler.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/MapReduceCompiler.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/MapReduceCompiler.java index d41253f..5b08ed2 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/MapReduceCompiler.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/MapReduceCompiler.java @@ -215,7 +215,8 @@ public class MapReduceCompiler extends TaskCompiler { // long sizePerRow = HiveConf.getLongVar(conf, HiveConf.ConfVars.HIVELIMITMAXROWSIZE); - estimatedInput = globalLimitCtx.getGlobalLimit() * sizePerRow; + estimatedInput = (globalLimitCtx.getGlobalOffset() + + globalLimitCtx.getGlobalLimit()) * sizePerRow; long minSplitSize = HiveConf.getLongVar(conf, HiveConf.ConfVars.MAPREDMINSPLITSIZE); long estimatedNumMap = inputSummary.getLength() / minSplitSize + 1; http://git-wip-us.apache.org/repos/asf/hive/blob/6273bee3/ql/src/java/org/apache/hadoop/hive/ql/parse/QBParseInfo.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/QBParseInfo.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/QBParseInfo.java index 186c2a8..3a226e7 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/QBParseInfo.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/QBParseInfo.java @@ -26,6 +26,7 @@ import java.util.LinkedHashMap; import java.util.List; import java.util.Map; import java.util.Set; +import java.util.AbstractMap.SimpleEntry; import org.antlr.runtime.tree.Tree; import org.slf4j.Logger; @@ -99,7 +100,10 @@ public class QBParseInfo { /* Order by clause */ private final HashMap<String, ASTNode> destToOrderby; - private final HashMap<String, Integer> destToLimit; + // Use SimpleEntry to save the offset and rowcount of limit clause + // KEY of SimpleEntry: offset + // VALUE of SimpleEntry: rowcount + private final HashMap<String, SimpleEntry<Integer, Integer>> destToLimit; private int outerQueryLimit; // used by GroupBy @@ -128,7 +132,7 @@ public class QBParseInfo { destToDistributeby = new HashMap<String, ASTNode>(); destToSortby = new HashMap<String, ASTNode>(); destToOrderby = new HashMap<String, ASTNode>(); - destToLimit = new HashMap<String, Integer>(); + destToLimit = new HashMap<String, SimpleEntry<Integer, Integer>>(); insertIntoTables = new HashSet<String>(); destRollups = new HashSet<String>(); destCubes = new HashSet<String>(); @@ -440,12 +444,16 @@ public class QBParseInfo { exprToColumnAlias.put(expr, alias); } - public void setDestLimit(String dest, Integer limit) { - destToLimit.put(dest, limit); + public void setDestLimit(String dest, Integer offset, Integer limit) { + destToLimit.put(dest, new SimpleEntry<>(offset, limit)); } public Integer getDestLimit(String dest) { - return destToLimit.get(dest); + return destToLimit.get(dest) == null ? null : destToLimit.get(dest).getValue(); + } + + public Integer getDestLimitOffset(String dest) { + return destToLimit.get(dest) == null ? 0 : destToLimit.get(dest).getKey(); } /** @@ -566,7 +574,7 @@ public class QBParseInfo { return tableSpecs.get(tName.next()); } - public HashMap<String, Integer> getDestToLimit() { + public HashMap<String, SimpleEntry<Integer,Integer>> getDestToLimit() { return destToLimit; } http://git-wip-us.apache.org/repos/asf/hive/blob/6273bee3/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java index 5803a9c..b888860 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java @@ -1328,7 +1328,14 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer { break; case HiveParser.TOK_LIMIT: - qbp.setDestLimit(ctx_1.dest, new Integer(ast.getChild(0).getText())); + if (ast.getChildCount() == 2) { + qbp.setDestLimit(ctx_1.dest, + new Integer(ast.getChild(0).getText()), + new Integer(ast.getChild(1).getText())); + } else { + qbp.setDestLimit(ctx_1.dest, new Integer(0), + new Integer(ast.getChild(0).getText())); + } break; case HiveParser.TOK_ANALYZE: @@ -6801,7 +6808,7 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer { } @SuppressWarnings("nls") - private Operator genLimitPlan(String dest, QB qb, Operator input, int limit) + private Operator genLimitPlan(String dest, QB qb, Operator input, int offset, int limit) throws SemanticException { // A map-only job can be optimized - instead of converting it to a // map-reduce job, we can have another map @@ -6812,7 +6819,7 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer { RowResolver inputRR = opParseCtx.get(input).getRowResolver(); - LimitDesc limitDesc = new LimitDesc(limit); + LimitDesc limitDesc = new LimitDesc(offset, limit); globalLimitCtx.setLastReduceLimitDesc(limitDesc); Operator limitMap = putOpInsertMap(OperatorFactory.getAndMakeChild( @@ -6922,14 +6929,14 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer { @SuppressWarnings("nls") private Operator genLimitMapRedPlan(String dest, QB qb, Operator input, - int limit, boolean extraMRStep) throws SemanticException { + int offset, int limit, boolean extraMRStep) throws SemanticException { // A map-only job can be optimized - instead of converting it to a // map-reduce job, we can have another map // job to do the same to avoid the cost of sorting in the map-reduce phase. // A better approach would be to // write into a local file and then have a map-only job. // Add the limit operator to get the value fields - Operator curr = genLimitPlan(dest, qb, input, limit); + Operator curr = genLimitPlan(dest, qb, input, offset, limit); // the client requested that an extra map-reduce step be performed if (!extraMRStep) { @@ -6938,7 +6945,7 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer { // Create a reduceSink operator followed by another limit curr = genReduceSinkPlan(dest, qb, curr, 1, false); - return genLimitPlan(dest, qb, curr, limit); + return genLimitPlan(dest, qb, curr, offset, limit); } private ArrayList<ExprNodeDesc> getPartitionColsFromBucketCols(String dest, QB qb, Table tab, @@ -8871,6 +8878,7 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer { curr = genSelectPlan(dest, qb, curr, gbySource); Integer limit = qbp.getDestLimit(dest); + Integer offset = (qbp.getDestLimitOffset(dest) == null) ? 0 : qbp.getDestLimitOffset(dest); // Expressions are not supported currently without a alias. @@ -8915,7 +8923,8 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer { if (limit != null) { // In case of order by, only 1 reducer is used, so no need of // another shuffle - curr = genLimitMapRedPlan(dest, qb, curr, limit.intValue(), !hasOrderBy); + curr = genLimitMapRedPlan(dest, qb, curr, offset.intValue(), + limit.intValue(), !hasOrderBy); } } else { // exact limit can be taken care of by the fetch operator @@ -8928,8 +8937,8 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer { extraMRStep = false; } - curr = genLimitMapRedPlan(dest, qb, curr, limit.intValue(), - extraMRStep); + curr = genLimitMapRedPlan(dest, qb, curr, offset.intValue(), + limit.intValue(), extraMRStep); qb.getParseInfo().setOuterQueryLimit(limit.intValue()); } if (!SessionState.get().getHiveOperation().equals(HiveOperation.CREATEVIEW)) { http://git-wip-us.apache.org/repos/asf/hive/blob/6273bee3/ql/src/java/org/apache/hadoop/hive/ql/plan/LimitDesc.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/LimitDesc.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/LimitDesc.java index f88bf63..8448a41 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/plan/LimitDesc.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/LimitDesc.java @@ -27,6 +27,7 @@ import org.apache.hadoop.hive.ql.plan.Explain.Level; @Explain(displayName = "Limit", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) public class LimitDesc extends AbstractOperatorDesc { private static final long serialVersionUID = 1L; + private int offset = 0; private int limit; private int leastRows = -1; @@ -37,6 +38,24 @@ public class LimitDesc extends AbstractOperatorDesc { this.limit = limit; } + public LimitDesc(final int offset, final int limit) { + this.offset = offset; + this.limit = limit; + } + + /** + * not to print the offset if it is 0 we need to turn null. + * use Integer instead of int. + */ + @Explain(displayName = "Offset of rows", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) + public Integer getOffset() { + return (offset == 0) ? null : new Integer(offset); + } + + public void setOffset(Integer offset) { + this.offset = offset; + } + @Explain(displayName = "Number of rows", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) public int getLimit() { return limit; http://git-wip-us.apache.org/repos/asf/hive/blob/6273bee3/ql/src/test/queries/clientpositive/offset_limit.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/offset_limit.q b/ql/src/test/queries/clientpositive/offset_limit.q new file mode 100644 index 0000000..80d559a --- /dev/null +++ b/ql/src/test/queries/clientpositive/offset_limit.q @@ -0,0 +1,25 @@ +EXPLAIN +SELECT src.key, sum(substr(src.value,5)) FROM src GROUP BY src.key ORDER BY src.key LIMIT 10,10; + +SELECT src.key, sum(substr(src.value,5)) FROM src GROUP BY src.key ORDER BY src.key LIMIT 10,10; + +SELECT src.key, sum(substr(src.value,5)) FROM src GROUP BY src.key ORDER BY src.key LIMIT 0,10; + +SELECT src.key, sum(substr(src.value,5)) FROM src GROUP BY src.key ORDER BY src.key LIMIT 1,10; + +SELECT src.key, sum(substr(src.value,5)) FROM src GROUP BY src.key ORDER BY src.key LIMIT 300,100; + +SELECT src.key, sum(substr(src.value,5)) FROM src GROUP BY src.key ORDER BY src.key LIMIT 100 OFFSET 300; + +set hive.cbo.enable=false; + +SELECT src.key, sum(substr(src.value,5)) FROM src GROUP BY src.key ORDER BY src.key LIMIT 10,10; + +SELECT src.key, sum(substr(src.value,5)) FROM src GROUP BY src.key ORDER BY src.key LIMIT 0,10; + +SELECT src.key, sum(substr(src.value,5)) FROM src GROUP BY src.key ORDER BY src.key LIMIT 1,10; + +SELECT src.key, sum(substr(src.value,5)) FROM src GROUP BY src.key ORDER BY src.key LIMIT 300,100; + +SELECT src.key, sum(substr(src.value,5)) FROM src GROUP BY src.key ORDER BY src.key LIMIT 100 OFFSET 300; + http://git-wip-us.apache.org/repos/asf/hive/blob/6273bee3/ql/src/test/queries/clientpositive/offset_limit_global_optimizer.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/offset_limit_global_optimizer.q b/ql/src/test/queries/clientpositive/offset_limit_global_optimizer.q new file mode 100644 index 0000000..5ddb9a6 --- /dev/null +++ b/ql/src/test/queries/clientpositive/offset_limit_global_optimizer.q @@ -0,0 +1,45 @@ +set hive.limit.optimize.enable=true; +set hive.limit.row.max.size=12; +set hive.mapred.mode=nonstrict; + +EXPLAIN EXTENDED +SELECT srcpart.key, substr(srcpart.value,5), ds, hr FROM srcpart LIMIT 400,10; + +SELECT srcpart.key, substr(srcpart.value,5), ds, hr FROM srcpart LIMIT 400,10; + +EXPLAIN EXTENDED +SELECT srcpart.key, substr(srcpart.value,5), ds, hr FROM srcpart LIMIT 490,10; + +SELECT srcpart.key, substr(srcpart.value,5), ds, hr FROM srcpart LIMIT 490,10; + +EXPLAIN EXTENDED +SELECT srcpart.key, substr(srcpart.value,5), ds, hr FROM srcpart LIMIT 490,20; + +SELECT srcpart.key, substr(srcpart.value,5), ds, hr FROM srcpart LIMIT 490,20; + +EXPLAIN EXTENDED +SELECT srcpart.key, substr(srcpart.value,5), ds, hr FROM srcpart LIMIT 490,600; + +SELECT srcpart.key, substr(srcpart.value,5), ds, hr FROM srcpart LIMIT 490,600; + +set hive.cbo.enable=false; + +EXPLAIN EXTENDED +SELECT srcpart.key, substr(srcpart.value,5), ds, hr FROM srcpart LIMIT 400,10; + +SELECT srcpart.key, substr(srcpart.value,5), ds, hr FROM srcpart LIMIT 400,10; + +EXPLAIN EXTENDED +SELECT srcpart.key, substr(srcpart.value,5), ds, hr FROM srcpart LIMIT 490,10; + +SELECT srcpart.key, substr(srcpart.value,5), ds, hr FROM srcpart LIMIT 490,10; + +EXPLAIN EXTENDED +SELECT srcpart.key, substr(srcpart.value,5), ds, hr FROM srcpart LIMIT 490,20; + +SELECT srcpart.key, substr(srcpart.value,5), ds, hr FROM srcpart LIMIT 490,20; + +EXPLAIN EXTENDED +SELECT srcpart.key, substr(srcpart.value,5), ds, hr FROM srcpart LIMIT 490,600; + +SELECT srcpart.key, substr(srcpart.value,5), ds, hr FROM srcpart LIMIT 490,600; \ No newline at end of file http://git-wip-us.apache.org/repos/asf/hive/blob/6273bee3/ql/src/test/queries/clientpositive/offset_limit_ppd_optimizer.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/offset_limit_ppd_optimizer.q b/ql/src/test/queries/clientpositive/offset_limit_ppd_optimizer.q new file mode 100644 index 0000000..2895203 --- /dev/null +++ b/ql/src/test/queries/clientpositive/offset_limit_ppd_optimizer.q @@ -0,0 +1,80 @@ +set hive.explain.user=false; +set hive.limit.pushdown.memory.usage=0.3f; +set hive.optimize.reducededuplication.min.reducer=1; + +explain +select key,value from src order by key limit 10,20; +select key,value from src order by key limit 10,20; + +explain +select key,value from src order by key desc limit 10,20; +select key,value from src order by key desc limit 10,20; + +explain +select value, sum(key + 1) as sum from src group by value order by value limit 10,20; +select value, sum(key + 1) as sum from src group by value order by value limit 10,20; + +-- deduped RS +explain +select value,avg(key + 1) from src group by value order by value limit 10,20; +select value,avg(key + 1) from src group by value order by value limit 10,20; + +-- distincts +explain +select distinct(cdouble) as dis from alltypesorc order by dis limit 10,20; +select distinct(cdouble) as dis from alltypesorc order by dis limit 10,20; + +explain +select ctinyint, count(distinct(cdouble)) from alltypesorc group by ctinyint order by ctinyint limit 10,20; +select ctinyint, count(distinct(cdouble)) from alltypesorc group by ctinyint order by ctinyint limit 10,20; + +explain +select ctinyint, count(cdouble) from (select ctinyint, cdouble from alltypesorc group by ctinyint, cdouble) t1 group by ctinyint order by ctinyint limit 10,20; +select ctinyint, count(cdouble) from (select ctinyint, cdouble from alltypesorc group by ctinyint, cdouble) t1 group by ctinyint order by ctinyint limit 10,20; + +-- multi distinct +explain +select ctinyint, count(distinct(cstring1)), count(distinct(cstring2)) from alltypesorc group by ctinyint order by ctinyint limit 10,20; +select ctinyint, count(distinct(cstring1)), count(distinct(cstring2)) from alltypesorc group by ctinyint order by ctinyint limit 10,20; + +-- limit zero +explain +select key,value from src order by key limit 0,0; +select key,value from src order by key limit 0,0; + +-- 2MR (applied to last RS) +explain +select value, sum(key) as sum from src group by value order by sum limit 10,20; +select value, sum(key) as sum from src group by value order by sum limit 10,20; + +set hive.map.aggr=false; +-- map aggregation disabled +explain +select value, sum(key) as sum from src group by value order by value limit 10,20; +select value, sum(key) as sum from src group by value order by value limit 10,20; + +set hive.limit.pushdown.memory.usage=0.00002f; + +-- flush for order-by +explain +select key,value,value,value,value,value,value,value,value from src order by key limit 30,70; +select key,value,value,value,value,value,value,value,value from src order by key limit 30,70; + +-- flush for group-by +explain +select sum(key) as sum from src group by concat(key,value,value,value,value,value,value,value,value,value) order by sum limit 30,70; +select sum(key) as sum from src group by concat(key,value,value,value,value,value,value,value,value,value) order by sum limit 30,70; + +-- subqueries +explain +select * from +(select key, count(1) from src group by key order by key limit 10,20) subq +join +(select key, count(1) from src group by key limit 20,20) subq2 +on subq.key=subq2.key limit 3,5; +select * from +(select key, count(1) from src group by key order by key limit 10,20) subq +join +(select key, count(1) from src group by key order by key limit 20,20) subq2 +on subq.key=subq2.key limit 3,5; + http://git-wip-us.apache.org/repos/asf/hive/blob/6273bee3/ql/src/test/queries/clientpositive/vectorization_offset_limit.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/vectorization_offset_limit.q b/ql/src/test/queries/clientpositive/vectorization_offset_limit.q new file mode 100644 index 0000000..3d01154 --- /dev/null +++ b/ql/src/test/queries/clientpositive/vectorization_offset_limit.q @@ -0,0 +1,10 @@ +set hive.explain.user=false; +SET hive.vectorized.execution.enabled=true; +set hive.mapred.mode=nonstrict; + +explain SELECT cbigint, cdouble FROM alltypesorc WHERE cbigint < cdouble and cint > 0 limit 3,2; +SELECT cbigint, cdouble FROM alltypesorc WHERE cbigint < cdouble and cint > 0 limit 3,2; + +explain +select ctinyint,cdouble,csmallint from alltypesorc where ctinyint is not null order by ctinyint,cdouble limit 10,3; +select ctinyint,cdouble,csmallint from alltypesorc where ctinyint is not null order by ctinyint,cdouble limit 10,3; \ No newline at end of file http://git-wip-us.apache.org/repos/asf/hive/blob/6273bee3/ql/src/test/results/clientpositive/offset_limit.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/offset_limit.q.out b/ql/src/test/results/clientpositive/offset_limit.q.out new file mode 100644 index 0000000..2092c1d --- /dev/null +++ b/ql/src/test/results/clientpositive/offset_limit.q.out @@ -0,0 +1,257 @@ +PREHOOK: query: EXPLAIN +SELECT src.key, sum(substr(src.value,5)) FROM src GROUP BY src.key ORDER BY src.key LIMIT 10,10 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT src.key, sum(substr(src.value,5)) FROM src GROUP BY src.key ORDER BY src.key LIMIT 10,10 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(substr(value, 5)) + keys: key (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: double) + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: double) + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: double) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Offset of rows: 10 + Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 10 + Processor Tree: + ListSink + +PREHOOK: query: SELECT src.key, sum(substr(src.value,5)) FROM src GROUP BY src.key ORDER BY src.key LIMIT 10,10 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT src.key, sum(substr(src.value,5)) FROM src GROUP BY src.key ORDER BY src.key LIMIT 10,10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +116 116.0 +118 236.0 +119 357.0 +12 24.0 +120 240.0 +125 250.0 +126 126.0 +128 384.0 +129 258.0 +131 131.0 +PREHOOK: query: SELECT src.key, sum(substr(src.value,5)) FROM src GROUP BY src.key ORDER BY src.key LIMIT 0,10 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT src.key, sum(substr(src.value,5)) FROM src GROUP BY src.key ORDER BY src.key LIMIT 0,10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +0 0.0 +10 10.0 +100 200.0 +103 206.0 +104 208.0 +105 105.0 +11 11.0 +111 111.0 +113 226.0 +114 114.0 +PREHOOK: query: SELECT src.key, sum(substr(src.value,5)) FROM src GROUP BY src.key ORDER BY src.key LIMIT 1,10 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT src.key, sum(substr(src.value,5)) FROM src GROUP BY src.key ORDER BY src.key LIMIT 1,10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +10 10.0 +100 200.0 +103 206.0 +104 208.0 +105 105.0 +11 11.0 +111 111.0 +113 226.0 +114 114.0 +116 116.0 +PREHOOK: query: SELECT src.key, sum(substr(src.value,5)) FROM src GROUP BY src.key ORDER BY src.key LIMIT 300,100 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT src.key, sum(substr(src.value,5)) FROM src GROUP BY src.key ORDER BY src.key LIMIT 300,100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +86 86.0 +87 87.0 +9 9.0 +90 270.0 +92 92.0 +95 190.0 +96 96.0 +97 194.0 +98 196.0 +PREHOOK: query: SELECT src.key, sum(substr(src.value,5)) FROM src GROUP BY src.key ORDER BY src.key LIMIT 100 OFFSET 300 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT src.key, sum(substr(src.value,5)) FROM src GROUP BY src.key ORDER BY src.key LIMIT 100 OFFSET 300 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +86 86.0 +87 87.0 +9 9.0 +90 270.0 +92 92.0 +95 190.0 +96 96.0 +97 194.0 +98 196.0 +PREHOOK: query: SELECT src.key, sum(substr(src.value,5)) FROM src GROUP BY src.key ORDER BY src.key LIMIT 10,10 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT src.key, sum(substr(src.value,5)) FROM src GROUP BY src.key ORDER BY src.key LIMIT 10,10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +116 116.0 +118 236.0 +119 357.0 +12 24.0 +120 240.0 +125 250.0 +126 126.0 +128 384.0 +129 258.0 +131 131.0 +PREHOOK: query: SELECT src.key, sum(substr(src.value,5)) FROM src GROUP BY src.key ORDER BY src.key LIMIT 0,10 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT src.key, sum(substr(src.value,5)) FROM src GROUP BY src.key ORDER BY src.key LIMIT 0,10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +0 0.0 +10 10.0 +100 200.0 +103 206.0 +104 208.0 +105 105.0 +11 11.0 +111 111.0 +113 226.0 +114 114.0 +PREHOOK: query: SELECT src.key, sum(substr(src.value,5)) FROM src GROUP BY src.key ORDER BY src.key LIMIT 1,10 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT src.key, sum(substr(src.value,5)) FROM src GROUP BY src.key ORDER BY src.key LIMIT 1,10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +10 10.0 +100 200.0 +103 206.0 +104 208.0 +105 105.0 +11 11.0 +111 111.0 +113 226.0 +114 114.0 +116 116.0 +PREHOOK: query: SELECT src.key, sum(substr(src.value,5)) FROM src GROUP BY src.key ORDER BY src.key LIMIT 300,100 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT src.key, sum(substr(src.value,5)) FROM src GROUP BY src.key ORDER BY src.key LIMIT 300,100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +86 86.0 +87 87.0 +9 9.0 +90 270.0 +92 92.0 +95 190.0 +96 96.0 +97 194.0 +98 196.0 +PREHOOK: query: SELECT src.key, sum(substr(src.value,5)) FROM src GROUP BY src.key ORDER BY src.key LIMIT 100 OFFSET 300 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT src.key, sum(substr(src.value,5)) FROM src GROUP BY src.key ORDER BY src.key LIMIT 100 OFFSET 300 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +86 86.0 +87 87.0 +9 9.0 +90 270.0 +92 92.0 +95 190.0 +96 96.0 +97 194.0 +98 196.0
