Author: jpullokk Date: Wed Feb 4 18:12:43 2015 New Revision: 1657358 URL: http://svn.apache.org/r1657358 Log: HIVE-9292: Inline GroupBy, Properties (Jesus Camacho Rodriguez via Laljo John Pullokkaran)
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/OperatorUtils.java hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/index/RewriteParseContextGenerator.java hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/index/RewriteQueryUsingAggregateIndexCtx.java hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseContext.java hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/TaskCompiler.java Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/OperatorUtils.java URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/OperatorUtils.java?rev=1657358&r1=1657357&r2=1657358&view=diff ============================================================================== --- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/OperatorUtils.java (original) +++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/OperatorUtils.java Wed Feb 4 18:12:43 2015 @@ -128,6 +128,60 @@ public class OperatorUtils { } } + /** + * Starting at the input operator, finds the last operator in the stream that + * is an instance of the input class. + * + * @param op the starting operator + * @param clazz the class that the operator that we are looking for instantiates + * @return null if no such operator exists or multiple branches are found in + * the stream, the last operator otherwise + */ + @SuppressWarnings("unchecked") + public static <T> T findLastOperator(Operator<?> op, Class<T> clazz) { + Operator<?> currentOp = op; + T lastOp = null; + while (currentOp != null) { + if (clazz.isInstance(currentOp)) { + lastOp = (T) currentOp; + } + if (currentOp.getChildOperators().size() == 1) { + currentOp = currentOp.getChildOperators().get(0); + } + else { + currentOp = null; + } + } + return lastOp; + } + + /** + * Starting at the input operator, finds the last operator upstream that is + * an instance of the input class. + * + * @param op the starting operator + * @param clazz the class that the operator that we are looking for instantiates + * @return null if no such operator exists or multiple branches are found in + * the stream, the last operator otherwise + */ + @SuppressWarnings("unchecked") + public static <T> T findLastOperatorUpstream(Operator<?> op, Class<T> clazz) { + Operator<?> currentOp = op; + T lastOp = null; + while (currentOp != null) { + if (clazz.isInstance(currentOp)) { + lastOp = (T) currentOp; + } + if (currentOp.getParentOperators().size() == 1) { + currentOp = currentOp.getParentOperators().get(0); + } + else { + currentOp = null; + } + } + return lastOp; + } + public static void iterateParents(Operator<?> operator, Function<Operator<?>> function) { iterateParents(operator, function, new HashSet<Operator<?>>()); } Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/index/RewriteParseContextGenerator.java URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/index/RewriteParseContextGenerator.java?rev=1657358&r1=1657357&r2=1657358&view=diff ============================================================================== --- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/index/RewriteParseContextGenerator.java (original) +++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/index/RewriteParseContextGenerator.java Wed Feb 4 18:12:43 2015 @@ -24,6 +24,7 @@ import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.Context; +import org.apache.hadoop.hive.ql.exec.Operator; import org.apache.hadoop.hive.ql.parse.ASTNode; import org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer; import org.apache.hadoop.hive.ql.parse.ParseContext; @@ -34,6 +35,7 @@ import org.apache.hadoop.hive.ql.parse.Q import org.apache.hadoop.hive.ql.parse.SemanticAnalyzer; import org.apache.hadoop.hive.ql.parse.SemanticAnalyzerFactory; import org.apache.hadoop.hive.ql.parse.SemanticException; +import org.apache.hadoop.hive.ql.plan.OperatorDesc; /** @@ -45,33 +47,27 @@ import org.apache.hadoop.hive.ql.parse.S * */ public final class RewriteParseContextGenerator { - private static final Log LOG = LogFactory.getLog(RewriteParseContextGenerator.class.getName()); - private RewriteParseContextGenerator(){ - } + private static final Log LOG = LogFactory.getLog(RewriteParseContextGenerator.class.getName()); /** - * Parse the input {@link String} command and generate a ASTNode tree. + * Parse the input {@link String} command and generate an operator tree. * @param conf * @param command - * @return the parse context * @throws SemanticException */ - public static ParseContext generateOperatorTree(HiveConf conf, - String command) throws SemanticException{ - Context ctx; - ParseContext subPCtx = null; + public static Operator<? extends OperatorDesc> generateOperatorTree(HiveConf conf, + String command) throws SemanticException { + Operator<? extends OperatorDesc> operatorTree; try { - ctx = new Context(conf); + Context ctx = new Context(conf); ParseDriver pd = new ParseDriver(); ASTNode tree = pd.parse(command, ctx); tree = ParseUtils.findRootNonNullToken(tree); BaseSemanticAnalyzer sem = SemanticAnalyzerFactory.get(conf, tree); assert(sem instanceof SemanticAnalyzer); - doSemanticAnalysis((SemanticAnalyzer) sem, tree, ctx); - - subPCtx = ((SemanticAnalyzer) sem).getParseContext(); + operatorTree = doSemanticAnalysis((SemanticAnalyzer) sem, tree, ctx); LOG.info("Sub-query Semantic Analysis Completed"); } catch (IOException e) { LOG.error("IOException in generating the operator " + @@ -89,13 +85,12 @@ public final class RewriteParseContextGe LOG.error(org.apache.hadoop.util.StringUtils.stringifyException(e)); throw new SemanticException(e.getMessage(), e); } - return subPCtx; - + return operatorTree; } /** * For the input ASTNode tree, perform a semantic analysis and check metadata - * Generate a operator tree and return the {@link ParseContext} instance for the operator tree. + * Generate a operator tree and return it. * * @param ctx * @param sem @@ -103,7 +98,7 @@ public final class RewriteParseContextGe * @return * @throws SemanticException */ - private static void doSemanticAnalysis(SemanticAnalyzer sem, + private static Operator<?> doSemanticAnalysis(SemanticAnalyzer sem, ASTNode ast, Context ctx) throws SemanticException { QB qb = new QB(null, null, false); ASTNode child = ast; @@ -119,9 +114,10 @@ public final class RewriteParseContextGe LOG.info("Completed getting MetaData in Sub-query Semantic Analysis"); LOG.info("Sub-query Abstract syntax tree: " + ast.toStringTree()); - sem.genPlan(qb); + Operator<?> operator = sem.genPlan(qb); LOG.info("Sub-query Completed plan generation"); + return operator; } } Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/index/RewriteQueryUsingAggregateIndexCtx.java URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/index/RewriteQueryUsingAggregateIndexCtx.java?rev=1657358&r1=1657357&r2=1657358&view=diff ============================================================================== --- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/index/RewriteQueryUsingAggregateIndexCtx.java (original) +++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/index/RewriteQueryUsingAggregateIndexCtx.java Wed Feb 4 18:12:43 2015 @@ -23,7 +23,6 @@ import java.util.Arrays; import java.util.HashMap; import java.util.List; import java.util.Map; -import java.util.Set; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; @@ -32,6 +31,7 @@ import org.apache.hadoop.hive.ql.exec.Co import org.apache.hadoop.hive.ql.exec.FunctionRegistry; import org.apache.hadoop.hive.ql.exec.GroupByOperator; import org.apache.hadoop.hive.ql.exec.Operator; +import org.apache.hadoop.hive.ql.exec.OperatorUtils; import org.apache.hadoop.hive.ql.exec.RowSchema; import org.apache.hadoop.hive.ql.exec.SelectOperator; import org.apache.hadoop.hive.ql.exec.TableScanOperator; @@ -267,15 +267,17 @@ public final class RewriteQueryUsingAggr String selReplacementCommand = "select sum(`" + rewriteQueryCtx.getAggregateFunction() + "`)" + " from " + rewriteQueryCtx.getIndexName() + " group by " + rewriteQueryCtx.getIndexKey() + " "; - // create a new ParseContext for the query to retrieve its operator tree, - // and the required GroupByOperator from it - ParseContext newDAGContext = RewriteParseContextGenerator.generateOperatorTree( - rewriteQueryCtx.getParseContext().getConf(), selReplacementCommand); + // retrieve the operator tree for the query, and the required GroupByOperator from it + Operator<?> newOperatorTree = RewriteParseContextGenerator.generateOperatorTree( + rewriteQueryCtx.getParseContext().getConf(), + selReplacementCommand); // we get our new GroupByOperator here - Map<GroupByOperator, Set<String>> newGbyOpMap = newDAGContext.getGroupOpToInputTables(); - GroupByOperator newGbyOperator = newGbyOpMap.keySet().iterator().next(); - GroupByDesc oldConf = operator.getConf(); + GroupByOperator newGbyOperator = OperatorUtils.findLastOperatorUpstream( + newOperatorTree, GroupByOperator.class); + if (newGbyOperator == null) { + throw new SemanticException("Error replacing GroupBy operator."); + } // we need this information to set the correct colList, outputColumnNames // in SelectOperator @@ -297,6 +299,7 @@ public final class RewriteQueryUsingAggr // Now the GroupByOperator has the new AggregationList; // sum(`_count_of_indexed_key`) // instead of count(indexed_key) + GroupByDesc oldConf = operator.getConf(); oldConf.setAggregators((ArrayList<AggregationDesc>) newAggrList); operator.setConf(oldConf); Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseContext.java URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseContext.java?rev=1657358&r1=1657357&r2=1657358&view=diff ============================================================================== --- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseContext.java (original) +++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseContext.java Wed Feb 4 18:12:43 2015 @@ -30,7 +30,6 @@ import org.apache.hadoop.hive.ql.Context import org.apache.hadoop.hive.ql.QueryProperties; import org.apache.hadoop.hive.ql.exec.AbstractMapJoinOperator; import org.apache.hadoop.hive.ql.exec.FetchTask; -import org.apache.hadoop.hive.ql.exec.GroupByOperator; import org.apache.hadoop.hive.ql.exec.JoinOperator; import org.apache.hadoop.hive.ql.exec.ListSinkOperator; import org.apache.hadoop.hive.ql.exec.MapJoinOperator; @@ -83,7 +82,6 @@ public class ParseContext { private List<AbstractMapJoinOperator<? extends MapJoinDesc>> listMapJoinOpsNoReducer; // list of map join // operators with no // reducer - private Map<GroupByOperator, Set<String>> groupOpToInputTables; private Map<String, PrunedPartitionList> prunedPartitions; private Map<String, ReadEntity> viewAliasToInput; @@ -153,7 +151,6 @@ public class ParseContext { List<LoadTableDesc> loadTableWork, List<LoadFileDesc> loadFileWork, Context ctx, HashMap<String, String> idToTableNameMap, int destTableId, UnionProcContext uCtx, List<AbstractMapJoinOperator<? extends MapJoinDesc>> listMapJoinOpsNoReducer, - Map<GroupByOperator, Set<String>> groupOpToInputTables, Map<String, PrunedPartitionList> prunedPartitions, HashMap<TableScanOperator, sampleDesc> opToSamplePruner, GlobalLimitCtx globalLimitCtx, @@ -177,7 +174,6 @@ public class ParseContext { this.destTableId = destTableId; this.uCtx = uCtx; this.listMapJoinOpsNoReducer = listMapJoinOpsNoReducer; - this.groupOpToInputTables = groupOpToInputTables; this.prunedPartitions = prunedPartitions; this.opToSamplePruner = opToSamplePruner; this.nameToSplitSample = nameToSplitSample; @@ -391,21 +387,6 @@ public class ParseContext { } /** - * @return the groupOpToInputTables - */ - public Map<GroupByOperator, Set<String>> getGroupOpToInputTables() { - return groupOpToInputTables; - } - - /** - * @param groupOpToInputTables - */ - public void setGroupOpToInputTables( - Map<GroupByOperator, Set<String>> groupOpToInputTables) { - this.groupOpToInputTables = groupOpToInputTables; - } - - /** * @return pruned partition map */ public Map<String, PrunedPartitionList> getPrunedPartitions() { Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java?rev=1657358&r1=1657357&r2=1657358&view=diff ============================================================================== --- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java (original) +++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java Wed Feb 4 18:12:43 2015 @@ -389,7 +389,6 @@ public class SemanticAnalyzer extends Ba uCtx = pctx.getUCtx(); listMapJoinOpsNoReducer = pctx.getListMapJoinOpsNoReducer(); qb = pctx.getQB(); - groupOpToInputTables = pctx.getGroupOpToInputTables(); prunedPartitions = pctx.getPrunedPartitions(); fetchTask = pctx.getFetchTask(); setLineageInfo(pctx.getLineageInfo()); @@ -400,7 +399,7 @@ public class SemanticAnalyzer extends Ba new HashSet<JoinOperator>(joinContext.keySet()), new HashSet<SMBMapJoinOperator>(smbMapJoinContext.keySet()), loadTableWork, loadFileWork, ctx, idToTableNameMap, destTableId, uCtx, - listMapJoinOpsNoReducer, groupOpToInputTables, prunedPartitions, + listMapJoinOpsNoReducer, prunedPartitions, opToSamplePruner, globalLimitCtx, nameToSplitSample, inputs, rootTasks, opToPartToSkewedPruner, viewAliasToInput, reduceSinkOperatorsAddedByEnforceBucketingSorting, queryProperties); @@ -10101,7 +10100,7 @@ public class SemanticAnalyzer extends Ba new HashSet<JoinOperator>(joinContext.keySet()), new HashSet<SMBMapJoinOperator>(smbMapJoinContext.keySet()), loadTableWork, loadFileWork, ctx, idToTableNameMap, destTableId, uCtx, - listMapJoinOpsNoReducer, groupOpToInputTables, prunedPartitions, opToSamplePruner, + listMapJoinOpsNoReducer, prunedPartitions, opToSamplePruner, globalLimitCtx, nameToSplitSample, inputs, rootTasks, opToPartToSkewedPruner, viewAliasToInput, reduceSinkOperatorsAddedByEnforceBucketingSorting, queryProperties); Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/TaskCompiler.java URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/TaskCompiler.java?rev=1657358&r1=1657357&r2=1657358&view=diff ============================================================================== --- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/TaskCompiler.java (original) +++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/TaskCompiler.java Wed Feb 4 18:12:43 2015 @@ -390,7 +390,7 @@ public abstract class TaskCompiler { pCtx.getJoinOps(), pCtx.getSmbMapJoinOps(), pCtx.getLoadTableWork(), pCtx.getLoadFileWork(), pCtx.getContext(), pCtx.getIdToTableNameMap(), pCtx.getDestTableId(), pCtx.getUCtx(), - pCtx.getListMapJoinOpsNoReducer(), pCtx.getGroupOpToInputTables(), + pCtx.getListMapJoinOpsNoReducer(), pCtx.getPrunedPartitions(), pCtx.getOpToSamplePruner(), pCtx.getGlobalLimitCtx(), pCtx.getNameToSplitSample(), pCtx.getSemanticInputs(), rootTasks, pCtx.getOpToPartToSkewedPruner(), pCtx.getViewAliasToInput(),