svn commit: r1657358 - in /hive/trunk/ql/src/java/org/apache/hadoop/hive/ql: exec/ optimizer/index/ parse/

jpullokk Wed, 04 Feb 2015 10:13:19 -0800

Author: jpullokk
Date: Wed Feb  4 18:12:43 2015
New Revision: 1657358

URL: http://svn.apache.org/r1657358
Log:
HIVE-9292: Inline GroupBy, Properties (Jesus Camacho Rodriguez via Laljo John 
Pullokkaran)


Modified:
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/OperatorUtils.java
    
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/index/RewriteParseContextGenerator.java
    
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/index/RewriteQueryUsingAggregateIndexCtx.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseContext.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/TaskCompiler.java

Modified: 
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/OperatorUtils.java
URL: 
http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/OperatorUtils.java?rev=1657358&r1=1657357&r2=1657358&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/OperatorUtils.java 
(original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/OperatorUtils.java 
Wed Feb  4 18:12:43 2015
@@ -128,6 +128,60 @@ public class OperatorUtils {
     }
   }
 
+  /**
+   * Starting at the input operator, finds the last operator in the stream that
+   * is an instance of the input class.
+   *
+   * @param op the starting operator
+   * @param clazz the class that the operator that we are looking for 
instantiates
+   * @return null if no such operator exists or multiple branches are found in
+   * the stream, the last operator otherwise
+   */
+  @SuppressWarnings("unchecked")
+  public static <T> T findLastOperator(Operator<?> op, Class<T> clazz) {
+    Operator<?> currentOp = op;
+    T lastOp = null;
+    while (currentOp != null) {
+      if (clazz.isInstance(currentOp)) {
+        lastOp = (T) currentOp;
+      }
+      if (currentOp.getChildOperators().size() == 1) {
+        currentOp = currentOp.getChildOperators().get(0);
+      }
+      else {
+        currentOp = null;
+      }
+    }
+    return lastOp;
+  }
+
+  /**
+   * Starting at the input operator, finds the last operator upstream that is
+   * an instance of the input class.
+   *
+   * @param op the starting operator
+   * @param clazz the class that the operator that we are looking for 
instantiates
+   * @return null if no such operator exists or multiple branches are found in
+   * the stream, the last operator otherwise
+   */
+  @SuppressWarnings("unchecked")
+  public static <T> T findLastOperatorUpstream(Operator<?> op, Class<T> clazz) 
{
+    Operator<?> currentOp = op;
+    T lastOp = null;
+    while (currentOp != null) {
+      if (clazz.isInstance(currentOp)) {
+        lastOp = (T) currentOp;
+      }
+      if (currentOp.getParentOperators().size() == 1) {
+        currentOp = currentOp.getParentOperators().get(0);
+      }
+      else {
+        currentOp = null;
+      }
+    }
+    return lastOp;
+  }
+
   public static void iterateParents(Operator<?> operator, 
Function<Operator<?>> function) {
     iterateParents(operator, function, new HashSet<Operator<?>>());
   }

Modified: 
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/index/RewriteParseContextGenerator.java
URL: 
http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/index/RewriteParseContextGenerator.java?rev=1657358&r1=1657357&r2=1657358&view=diff
==============================================================================
--- 
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/index/RewriteParseContextGenerator.java
 (original)
+++ 
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/index/RewriteParseContextGenerator.java
 Wed Feb  4 18:12:43 2015
@@ -24,6 +24,7 @@ import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.hive.conf.HiveConf;
 import org.apache.hadoop.hive.ql.Context;
+import org.apache.hadoop.hive.ql.exec.Operator;
 import org.apache.hadoop.hive.ql.parse.ASTNode;
 import org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer;
 import org.apache.hadoop.hive.ql.parse.ParseContext;
@@ -34,6 +35,7 @@ import org.apache.hadoop.hive.ql.parse.Q
 import org.apache.hadoop.hive.ql.parse.SemanticAnalyzer;
 import org.apache.hadoop.hive.ql.parse.SemanticAnalyzerFactory;
 import org.apache.hadoop.hive.ql.parse.SemanticException;
+import org.apache.hadoop.hive.ql.plan.OperatorDesc;
 
 
 /**
@@ -45,33 +47,27 @@ import org.apache.hadoop.hive.ql.parse.S
  *
  */
 public final class RewriteParseContextGenerator {
-  private static final Log LOG = 
LogFactory.getLog(RewriteParseContextGenerator.class.getName());
 
-  private RewriteParseContextGenerator(){
-  }
+  private static final Log LOG = 
LogFactory.getLog(RewriteParseContextGenerator.class.getName());
 
   /**
-   * Parse the input {@link String} command and generate a ASTNode tree.
+   * Parse the input {@link String} command and generate an operator tree.
    * @param conf
    * @param command
-   * @return the parse context
    * @throws SemanticException
    */
-  public static ParseContext generateOperatorTree(HiveConf conf,
-      String command) throws SemanticException{
-    Context ctx;
-    ParseContext subPCtx = null;
+  public static Operator<? extends OperatorDesc> generateOperatorTree(HiveConf 
conf,
+      String command) throws SemanticException {
+    Operator<? extends OperatorDesc> operatorTree;
     try {
-      ctx = new Context(conf);
+      Context ctx = new Context(conf);
       ParseDriver pd = new ParseDriver();
       ASTNode tree = pd.parse(command, ctx);
       tree = ParseUtils.findRootNonNullToken(tree);
 
       BaseSemanticAnalyzer sem = SemanticAnalyzerFactory.get(conf, tree);
       assert(sem instanceof SemanticAnalyzer);
-      doSemanticAnalysis((SemanticAnalyzer) sem, tree, ctx);
-
-      subPCtx = ((SemanticAnalyzer) sem).getParseContext();
+      operatorTree = doSemanticAnalysis((SemanticAnalyzer) sem, tree, ctx);
       LOG.info("Sub-query Semantic Analysis Completed");
     } catch (IOException e) {
       LOG.error("IOException in generating the operator " +
@@ -89,13 +85,12 @@ public final class RewriteParseContextGe
       LOG.error(org.apache.hadoop.util.StringUtils.stringifyException(e));
       throw new SemanticException(e.getMessage(), e);
     }
-    return subPCtx;
-
+    return operatorTree;
   }
 
   /**
    * For the input ASTNode tree, perform a semantic analysis and check metadata
-   * Generate a operator tree and return the {@link ParseContext} instance for 
the operator tree.
+   * Generate a operator tree and return it.
    *
    * @param ctx
    * @param sem
@@ -103,7 +98,7 @@ public final class RewriteParseContextGe
    * @return
    * @throws SemanticException
    */
-  private static void doSemanticAnalysis(SemanticAnalyzer sem,
+  private static Operator<?> doSemanticAnalysis(SemanticAnalyzer sem,
       ASTNode ast, Context ctx) throws SemanticException {
     QB qb = new QB(null, null, false);
     ASTNode child = ast;
@@ -119,9 +114,10 @@ public final class RewriteParseContextGe
     LOG.info("Completed getting MetaData in Sub-query Semantic Analysis");
 
     LOG.info("Sub-query Abstract syntax tree: " + ast.toStringTree());
-    sem.genPlan(qb);
+    Operator<?> operator = sem.genPlan(qb);
 
     LOG.info("Sub-query Completed plan generation");
+    return operator;
   }
 
 }

Modified: 
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/index/RewriteQueryUsingAggregateIndexCtx.java
URL: 
http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/index/RewriteQueryUsingAggregateIndexCtx.java?rev=1657358&r1=1657357&r2=1657358&view=diff
==============================================================================
--- 
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/index/RewriteQueryUsingAggregateIndexCtx.java
 (original)
+++ 
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/index/RewriteQueryUsingAggregateIndexCtx.java
 Wed Feb  4 18:12:43 2015
@@ -23,7 +23,6 @@ import java.util.Arrays;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
-import java.util.Set;
 
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
@@ -32,6 +31,7 @@ import org.apache.hadoop.hive.ql.exec.Co
 import org.apache.hadoop.hive.ql.exec.FunctionRegistry;
 import org.apache.hadoop.hive.ql.exec.GroupByOperator;
 import org.apache.hadoop.hive.ql.exec.Operator;
+import org.apache.hadoop.hive.ql.exec.OperatorUtils;
 import org.apache.hadoop.hive.ql.exec.RowSchema;
 import org.apache.hadoop.hive.ql.exec.SelectOperator;
 import org.apache.hadoop.hive.ql.exec.TableScanOperator;
@@ -267,15 +267,17 @@ public final class RewriteQueryUsingAggr
       String selReplacementCommand = "select sum(`" + 
rewriteQueryCtx.getAggregateFunction() + "`)"
           + " from " + rewriteQueryCtx.getIndexName() + " group by "
           + rewriteQueryCtx.getIndexKey() + " ";
-      // create a new ParseContext for the query to retrieve its operator tree,
-      // and the required GroupByOperator from it
-      ParseContext newDAGContext = 
RewriteParseContextGenerator.generateOperatorTree(
-          rewriteQueryCtx.getParseContext().getConf(), selReplacementCommand);
+      // retrieve the operator tree for the query, and the required 
GroupByOperator from it
+      Operator<?> newOperatorTree = 
RewriteParseContextGenerator.generateOperatorTree(
+              rewriteQueryCtx.getParseContext().getConf(),
+              selReplacementCommand);
 
       // we get our new GroupByOperator here
-      Map<GroupByOperator, Set<String>> newGbyOpMap = 
newDAGContext.getGroupOpToInputTables();
-      GroupByOperator newGbyOperator = newGbyOpMap.keySet().iterator().next();
-      GroupByDesc oldConf = operator.getConf();
+      GroupByOperator newGbyOperator = OperatorUtils.findLastOperatorUpstream(
+            newOperatorTree, GroupByOperator.class);
+      if (newGbyOperator == null) {
+        throw new SemanticException("Error replacing GroupBy operator.");
+      }
 
       // we need this information to set the correct colList, outputColumnNames
       // in SelectOperator
@@ -297,6 +299,7 @@ public final class RewriteQueryUsingAggr
       // Now the GroupByOperator has the new AggregationList;
       // sum(`_count_of_indexed_key`)
       // instead of count(indexed_key)
+      GroupByDesc oldConf = operator.getConf();
       oldConf.setAggregators((ArrayList<AggregationDesc>) newAggrList);
       operator.setConf(oldConf);
 

Modified: 
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseContext.java
URL: 
http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseContext.java?rev=1657358&r1=1657357&r2=1657358&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseContext.java 
(original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseContext.java 
Wed Feb  4 18:12:43 2015
@@ -30,7 +30,6 @@ import org.apache.hadoop.hive.ql.Context
 import org.apache.hadoop.hive.ql.QueryProperties;
 import org.apache.hadoop.hive.ql.exec.AbstractMapJoinOperator;
 import org.apache.hadoop.hive.ql.exec.FetchTask;
-import org.apache.hadoop.hive.ql.exec.GroupByOperator;
 import org.apache.hadoop.hive.ql.exec.JoinOperator;
 import org.apache.hadoop.hive.ql.exec.ListSinkOperator;
 import org.apache.hadoop.hive.ql.exec.MapJoinOperator;
@@ -83,7 +82,6 @@ public class ParseContext {
   private List<AbstractMapJoinOperator<? extends MapJoinDesc>> 
listMapJoinOpsNoReducer; // list of map join
   // operators with no
   // reducer
-  private Map<GroupByOperator, Set<String>> groupOpToInputTables;
   private Map<String, PrunedPartitionList> prunedPartitions;
   private Map<String, ReadEntity> viewAliasToInput;
 
@@ -153,7 +151,6 @@ public class ParseContext {
       List<LoadTableDesc> loadTableWork, List<LoadFileDesc> loadFileWork,
       Context ctx, HashMap<String, String> idToTableNameMap, int destTableId,
       UnionProcContext uCtx, List<AbstractMapJoinOperator<? extends 
MapJoinDesc>> listMapJoinOpsNoReducer,
-      Map<GroupByOperator, Set<String>> groupOpToInputTables,
       Map<String, PrunedPartitionList> prunedPartitions,
       HashMap<TableScanOperator, sampleDesc> opToSamplePruner,
       GlobalLimitCtx globalLimitCtx,
@@ -177,7 +174,6 @@ public class ParseContext {
     this.destTableId = destTableId;
     this.uCtx = uCtx;
     this.listMapJoinOpsNoReducer = listMapJoinOpsNoReducer;
-    this.groupOpToInputTables = groupOpToInputTables;
     this.prunedPartitions = prunedPartitions;
     this.opToSamplePruner = opToSamplePruner;
     this.nameToSplitSample = nameToSplitSample;
@@ -391,21 +387,6 @@ public class ParseContext {
   }
 
   /**
-   * @return the groupOpToInputTables
-   */
-  public Map<GroupByOperator, Set<String>> getGroupOpToInputTables() {
-    return groupOpToInputTables;
-  }
-
-  /**
-   * @param groupOpToInputTables
-   */
-  public void setGroupOpToInputTables(
-      Map<GroupByOperator, Set<String>> groupOpToInputTables) {
-    this.groupOpToInputTables = groupOpToInputTables;
-  }
-
-  /**
    * @return pruned partition map
    */
   public Map<String, PrunedPartitionList> getPrunedPartitions() {

Modified: 
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
URL: 
http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java?rev=1657358&r1=1657357&r2=1657358&view=diff
==============================================================================
--- 
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java 
(original)
+++ 
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java 
Wed Feb  4 18:12:43 2015
@@ -389,7 +389,6 @@ public class SemanticAnalyzer extends Ba
     uCtx = pctx.getUCtx();
     listMapJoinOpsNoReducer = pctx.getListMapJoinOpsNoReducer();
     qb = pctx.getQB();
-    groupOpToInputTables = pctx.getGroupOpToInputTables();
     prunedPartitions = pctx.getPrunedPartitions();
     fetchTask = pctx.getFetchTask();
     setLineageInfo(pctx.getLineageInfo());
@@ -400,7 +399,7 @@ public class SemanticAnalyzer extends Ba
         new HashSet<JoinOperator>(joinContext.keySet()),
         new HashSet<SMBMapJoinOperator>(smbMapJoinContext.keySet()),
         loadTableWork, loadFileWork, ctx, idToTableNameMap, destTableId, uCtx,
-        listMapJoinOpsNoReducer, groupOpToInputTables, prunedPartitions,
+        listMapJoinOpsNoReducer, prunedPartitions,
         opToSamplePruner, globalLimitCtx, nameToSplitSample, inputs, rootTasks,
         opToPartToSkewedPruner, viewAliasToInput,
         reduceSinkOperatorsAddedByEnforceBucketingSorting, queryProperties);
@@ -10101,7 +10100,7 @@ public class SemanticAnalyzer extends Ba
         new HashSet<JoinOperator>(joinContext.keySet()),
         new HashSet<SMBMapJoinOperator>(smbMapJoinContext.keySet()),
         loadTableWork, loadFileWork, ctx, idToTableNameMap, destTableId, uCtx,
-        listMapJoinOpsNoReducer, groupOpToInputTables, prunedPartitions, 
opToSamplePruner,
+        listMapJoinOpsNoReducer, prunedPartitions, opToSamplePruner,
         globalLimitCtx, nameToSplitSample, inputs, rootTasks, 
opToPartToSkewedPruner,
         viewAliasToInput, reduceSinkOperatorsAddedByEnforceBucketingSorting, 
queryProperties);
 

Modified: 
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/TaskCompiler.java
URL: 
http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/TaskCompiler.java?rev=1657358&r1=1657357&r2=1657358&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/TaskCompiler.java 
(original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/TaskCompiler.java 
Wed Feb  4 18:12:43 2015
@@ -390,7 +390,7 @@ public abstract class TaskCompiler {
         pCtx.getJoinOps(), pCtx.getSmbMapJoinOps(),
         pCtx.getLoadTableWork(), pCtx.getLoadFileWork(), pCtx.getContext(),
         pCtx.getIdToTableNameMap(), pCtx.getDestTableId(), pCtx.getUCtx(),
-        pCtx.getListMapJoinOpsNoReducer(), pCtx.getGroupOpToInputTables(),
+        pCtx.getListMapJoinOpsNoReducer(),
         pCtx.getPrunedPartitions(), pCtx.getOpToSamplePruner(), 
pCtx.getGlobalLimitCtx(),
         pCtx.getNameToSplitSample(), pCtx.getSemanticInputs(), rootTasks,
         pCtx.getOpToPartToSkewedPruner(), pCtx.getViewAliasToInput(),

svn commit: r1657358 - in /hive/trunk/ql/src/java/org/apache/hadoop/hive/ql: exec/ optimizer/index/ parse/

Reply via email to