Author: hashutosh
Date: Mon Feb 2 06:41:02 2015
New Revision: 1656393
URL: http://svn.apache.org/r1656393
Log:
HIVE-9416 : Get rid of Extract Operator (Ashutosh Chauhan via Navis)
Removed:
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/ExtractOperator.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorExtractOperator.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/ExtractDesc.java
Modified:
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/OperatorFactory.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/PTFOperator.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/BucketingSortingReduceSinkOptimizer.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConstantPropagateProcFactory.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SortedDynPartitionOptimizer.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/correlation/CorrelationUtilities.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/correlation/ReduceSinkDeDuplication.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/lineage/OpProcFactory.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/BucketingSortingInferenceOptimizer.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/BucketingSortingOpProcFactory.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/SelectDesc.java
hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/exec/TestExecDriver.java
hive/trunk/ql/src/test/results/clientpositive/bucket1.q.out
hive/trunk/ql/src/test/results/clientpositive/bucket2.q.out
hive/trunk/ql/src/test/results/clientpositive/bucket3.q.out
hive/trunk/ql/src/test/results/clientpositive/bucket4.q.out
hive/trunk/ql/src/test/results/clientpositive/bucket5.q.out
hive/trunk/ql/src/test/results/clientpositive/bucket6.q.out
hive/trunk/ql/src/test/results/clientpositive/bucketsortoptimize_insert_1.q.out
hive/trunk/ql/src/test/results/clientpositive/bucketsortoptimize_insert_2.q.out
hive/trunk/ql/src/test/results/clientpositive/bucketsortoptimize_insert_3.q.out
hive/trunk/ql/src/test/results/clientpositive/bucketsortoptimize_insert_4.q.out
hive/trunk/ql/src/test/results/clientpositive/bucketsortoptimize_insert_5.q.out
hive/trunk/ql/src/test/results/clientpositive/bucketsortoptimize_insert_6.q.out
hive/trunk/ql/src/test/results/clientpositive/disable_merge_for_bucketing.q.out
hive/trunk/ql/src/test/results/clientpositive/dynpart_sort_opt_vectorization.q.out
hive/trunk/ql/src/test/results/clientpositive/dynpart_sort_optimization.q.out
hive/trunk/ql/src/test/results/clientpositive/encrypted/encryption_insert_partition_dynamic.q.out
hive/trunk/ql/src/test/results/clientpositive/encrypted/encryption_insert_partition_static.q.out
hive/trunk/ql/src/test/results/clientpositive/load_dyn_part2.q.out
hive/trunk/ql/src/test/results/clientpositive/ptf.q.out
hive/trunk/ql/src/test/results/clientpositive/ptf_streaming.q.out
hive/trunk/ql/src/test/results/clientpositive/smb_mapjoin_20.q.out
hive/trunk/ql/src/test/results/clientpositive/smb_mapjoin_21.q.out
hive/trunk/ql/src/test/results/clientpositive/spark/bucket2.q.out
hive/trunk/ql/src/test/results/clientpositive/spark/bucket3.q.out
hive/trunk/ql/src/test/results/clientpositive/spark/bucket4.q.out
hive/trunk/ql/src/test/results/clientpositive/spark/bucketsortoptimize_insert_2.q.out
hive/trunk/ql/src/test/results/clientpositive/spark/bucketsortoptimize_insert_4.q.out
hive/trunk/ql/src/test/results/clientpositive/spark/bucketsortoptimize_insert_6.q.out
hive/trunk/ql/src/test/results/clientpositive/spark/bucketsortoptimize_insert_7.q.out
hive/trunk/ql/src/test/results/clientpositive/spark/bucketsortoptimize_insert_8.q.out
hive/trunk/ql/src/test/results/clientpositive/spark/disable_merge_for_bucketing.q.out
hive/trunk/ql/src/test/results/clientpositive/spark/load_dyn_part2.q.out
hive/trunk/ql/src/test/results/clientpositive/spark/ptf.q.out
hive/trunk/ql/src/test/results/clientpositive/spark/ptf_streaming.q.out
hive/trunk/ql/src/test/results/clientpositive/spark/smb_mapjoin_20.q.out
hive/trunk/ql/src/test/results/clientpositive/spark/smb_mapjoin_21.q.out
hive/trunk/ql/src/test/results/clientpositive/spark/stats10.q.out
hive/trunk/ql/src/test/results/clientpositive/spark/vectorized_ptf.q.out
hive/trunk/ql/src/test/results/clientpositive/stats10.q.out
hive/trunk/ql/src/test/results/clientpositive/tez/bucket2.q.out
hive/trunk/ql/src/test/results/clientpositive/tez/bucket3.q.out
hive/trunk/ql/src/test/results/clientpositive/tez/bucket4.q.out
hive/trunk/ql/src/test/results/clientpositive/tez/disable_merge_for_bucketing.q.out
hive/trunk/ql/src/test/results/clientpositive/tez/dynpart_sort_opt_vectorization.q.out
hive/trunk/ql/src/test/results/clientpositive/tez/dynpart_sort_optimization.q.out
hive/trunk/ql/src/test/results/clientpositive/tez/load_dyn_part2.q.out
hive/trunk/ql/src/test/results/clientpositive/tez/ptf.q.out
hive/trunk/ql/src/test/results/clientpositive/tez/ptf_streaming.q.out
hive/trunk/ql/src/test/results/clientpositive/tez/vector_bucket.q.out
hive/trunk/ql/src/test/results/clientpositive/tez/vectorized_ptf.q.out
hive/trunk/ql/src/test/results/clientpositive/vector_bucket.q.out
hive/trunk/ql/src/test/results/clientpositive/vectorized_ptf.q.out
Modified:
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/OperatorFactory.java
URL:
http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/OperatorFactory.java?rev=1656393&r1=1656392&r2=1656393&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/OperatorFactory.java
(original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/OperatorFactory.java
Mon Feb 2 06:41:02 2015
@@ -19,9 +19,6 @@
package org.apache.hadoop.hive.ql.exec;
import org.apache.hadoop.hive.ql.exec.vector.VectorAppMasterEventOperator;
-import org.apache.hadoop.hive.ql.exec.vector.VectorExtractOperator;
-import org.apache.hadoop.hive.ql.exec.vector.VectorAppMasterEventOperator;
-import org.apache.hadoop.hive.ql.exec.vector.VectorExtractOperator;
import org.apache.hadoop.hive.ql.exec.vector.VectorFileSinkOperator;
import org.apache.hadoop.hive.ql.exec.vector.VectorFilterOperator;
import org.apache.hadoop.hive.ql.exec.vector.VectorGroupByOperator;
@@ -39,7 +36,6 @@ import org.apache.hadoop.hive.ql.plan.De
import org.apache.hadoop.hive.ql.plan.DummyStoreDesc;
import org.apache.hadoop.hive.ql.plan.DynamicPruningEventDesc;
import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
-import org.apache.hadoop.hive.ql.plan.ExtractDesc;
import org.apache.hadoop.hive.ql.plan.FileSinkDesc;
import org.apache.hadoop.hive.ql.plan.FilterDesc;
import org.apache.hadoop.hive.ql.plan.ForwardDesc;
@@ -89,7 +85,6 @@ public final class OperatorFactory {
opvec.add(new OpTuple<ScriptDesc>(ScriptDesc.class, ScriptOperator.class));
opvec.add(new OpTuple<PTFDesc>(PTFDesc.class, PTFOperator.class));
opvec.add(new OpTuple<ReduceSinkDesc>(ReduceSinkDesc.class,
ReduceSinkOperator.class));
- opvec.add(new OpTuple<ExtractDesc>(ExtractDesc.class,
ExtractOperator.class));
opvec.add(new OpTuple<GroupByDesc>(GroupByDesc.class,
GroupByOperator.class));
opvec.add(new OpTuple<JoinDesc>(JoinDesc.class, JoinOperator.class));
opvec.add(new OpTuple<MapJoinDesc>(MapJoinDesc.class,
MapJoinOperator.class));
@@ -143,7 +138,6 @@ public final class OperatorFactory {
vectorOpvec.add(new OpTuple<FileSinkDesc>(FileSinkDesc.class,
VectorFileSinkOperator.class));
vectorOpvec.add(new OpTuple<FilterDesc>(FilterDesc.class,
VectorFilterOperator.class));
vectorOpvec.add(new OpTuple<LimitDesc>(LimitDesc.class,
VectorLimitOperator.class));
- vectorOpvec.add(new OpTuple<ExtractDesc>(ExtractDesc.class,
VectorExtractOperator.class));
}
private static final class OpTuple<T extends OperatorDesc> {
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/PTFOperator.java
URL:
http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/PTFOperator.java?rev=1656393&r1=1656392&r2=1656393&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/PTFOperator.java
(original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/PTFOperator.java Mon
Feb 2 06:41:02 2015
@@ -68,7 +68,6 @@ public class PTFOperator extends Operato
@Override
protected void initializeOp(Configuration jobConf) throws HiveException {
hiveConf = jobConf;
- // if the parent is ExtractOperator, this invocation is from reduce-side
isMapOperator = conf.isMapSide();
reconstructQueryDef(hiveConf);
@@ -157,7 +156,7 @@ public class PTFOperator extends Operato
/*
* Why cannot we just use the ExprNodeEvaluator on the column?
* - because on the reduce-side it is initialized based on the rowOI of
the HiveTable
- * and not the OI of the ExtractOp ( the parent of this Operator on
the reduce-side)
+ * and not the OI of the parent of this Operator on the reduce-side
*/
keyFields[i] = ExprNodeEvaluatorFactory.get(exprDef.getExprNode());
keyOIs[i] = keyFields[i].initialize(inputOI);
Modified:
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/BucketingSortingReduceSinkOptimizer.java
URL:
http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/BucketingSortingReduceSinkOptimizer.java?rev=1656393&r1=1656392&r2=1656393&view=diff
==============================================================================
---
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/BucketingSortingReduceSinkOptimizer.java
(original)
+++
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/BucketingSortingReduceSinkOptimizer.java
Mon Feb 2 06:41:02 2015
@@ -29,7 +29,6 @@ import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.hive.common.ObjectPair;
import org.apache.hadoop.hive.metastore.api.FieldSchema;
import org.apache.hadoop.hive.metastore.api.Order;
-import org.apache.hadoop.hive.ql.exec.ExtractOperator;
import org.apache.hadoop.hive.ql.exec.FileSinkOperator;
import org.apache.hadoop.hive.ql.exec.FilterOperator;
import org.apache.hadoop.hive.ql.exec.Operator;
@@ -84,7 +83,7 @@ public class BucketingSortingReduceSinkO
// process reduce sink added by hive.enforce.bucketing or
hive.enforce.sorting
opRules.put(new RuleRegExp("R1",
ReduceSinkOperator.getOperatorName() + "%" +
- ExtractOperator.getOperatorName() + "%" +
+ SelectOperator.getOperatorName() + "%" +
FileSinkOperator.getOperatorName() + "%"),
getBucketSortReduceSinkProc(pctx));
@@ -362,8 +361,7 @@ public class BucketingSortingReduceSinkO
// If the reduce sink has not been introduced due to bucketing/sorting,
ignore it
FileSinkOperator fsOp = (FileSinkOperator) nd;
- ExtractOperator exOp = (ExtractOperator)
fsOp.getParentOperators().get(0);
- ReduceSinkOperator rsOp = (ReduceSinkOperator)
exOp.getParentOperators().get(0);
+ ReduceSinkOperator rsOp = (ReduceSinkOperator)
fsOp.getParentOperators().get(0).getParentOperators().get(0);
List<ReduceSinkOperator> rsOps = pGraphContext
.getReduceSinkOperatorsAddedByEnforceBucketingSorting();
Modified:
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConstantPropagateProcFactory.java
URL:
http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConstantPropagateProcFactory.java?rev=1656393&r1=1656392&r2=1656393&view=diff
==============================================================================
---
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConstantPropagateProcFactory.java
(original)
+++
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConstantPropagateProcFactory.java
Mon Feb 2 06:41:02 2015
@@ -372,7 +372,7 @@ public final class ConstantPropagateProc
// if false return false
return childExpr;
}
- } else // Try to fold (key = 86) and (key is not null) to (key = 86)
+ } else // Try to fold (key = 86) and (key is not null) to (key = 86)
if (childExpr instanceof ExprNodeGenericFuncDesc &&
((ExprNodeGenericFuncDesc)childExpr).getGenericUDF() instanceof
GenericUDFOPNotNull &&
childExpr.getChildren().get(0) instanceof ExprNodeColumnDesc &&
other instanceof ExprNodeGenericFuncDesc
@@ -429,7 +429,7 @@ public final class ConstantPropagateProc
}
if (ci == null) {
LOG.error("Can't resolve " + desc.getTabAlias() + "." +
desc.getColumn());
- throw new RuntimeException("Can't resolve " + desc.getTabAlias() + "." +
desc.getColumn());
+ return null;
}
ExprNodeDesc constant = null;
// Additional work for union operator, see union27.q
Modified:
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SortedDynPartitionOptimizer.java
URL:
http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SortedDynPartitionOptimizer.java?rev=1656393&r1=1656392&r2=1656393&view=diff
==============================================================================
---
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SortedDynPartitionOptimizer.java
(original)
+++
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SortedDynPartitionOptimizer.java
Mon Feb 2 06:41:02 2015
@@ -34,14 +34,15 @@ import org.apache.hadoop.hive.conf.HiveC
import org.apache.hadoop.hive.metastore.api.FieldSchema;
import org.apache.hadoop.hive.metastore.api.Order;
import org.apache.hadoop.hive.ql.exec.ColumnInfo;
-import org.apache.hadoop.hive.ql.exec.ExtractOperator;
import org.apache.hadoop.hive.ql.exec.FileSinkOperator;
import org.apache.hadoop.hive.ql.exec.Operator;
import org.apache.hadoop.hive.ql.exec.OperatorFactory;
import org.apache.hadoop.hive.ql.exec.OperatorUtils;
import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator;
import org.apache.hadoop.hive.ql.exec.RowSchema;
+import org.apache.hadoop.hive.ql.exec.SelectOperator;
import org.apache.hadoop.hive.ql.exec.Utilities;
+import org.apache.hadoop.hive.ql.exec.Utilities.ReduceField;
import org.apache.hadoop.hive.ql.io.AcidUtils;
import org.apache.hadoop.hive.ql.lib.DefaultGraphWalker;
import org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher;
@@ -59,12 +60,12 @@ import org.apache.hadoop.hive.ql.plan.Dy
import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc;
import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc;
import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
-import org.apache.hadoop.hive.ql.plan.ExtractDesc;
import org.apache.hadoop.hive.ql.plan.FileSinkDesc;
import org.apache.hadoop.hive.ql.plan.ListBucketingCtx;
import org.apache.hadoop.hive.ql.plan.OperatorDesc;
import org.apache.hadoop.hive.ql.plan.PlanUtils;
import org.apache.hadoop.hive.ql.plan.ReduceSinkDesc;
+import org.apache.hadoop.hive.ql.plan.SelectDesc;
import org.apache.hadoop.hive.ql.plan.TableDesc;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
@@ -224,19 +225,28 @@ public class SortedDynPartitionOptimizer
rsConf, new RowSchema(outRS.getSignature()), fsParent);
rsOp.setColumnExprMap(colExprMap);
- // Create ExtractDesc
- RowSchema exRR = new RowSchema(outRS);
- ExtractDesc exConf = new ExtractDesc(new
ExprNodeColumnDesc(TypeInfoFactory.stringTypeInfo,
- Utilities.ReduceField.VALUE.toString(), "", false));
-
- // Create Extract Operator
- ExtractOperator exOp = (ExtractOperator) OperatorFactory.getAndMakeChild(
- exConf, exRR, rsOp);
+ List<ExprNodeDesc> valCols = rsConf.getValueCols();
+ List<ExprNodeDesc> descs = new ArrayList<ExprNodeDesc>(valCols.size());
+ List<String> colNames = new ArrayList<String>();
+ String colName;
+ for (ExprNodeDesc valCol : valCols) {
+ colName = PlanUtils.stripQuotes(valCol.getExprString());
+ colNames.add(colName);
+ descs.add(new ExprNodeColumnDesc(valCol.getTypeInfo(),
ReduceField.VALUE.toString()+"."+colName, null, false));
+ }
+
+ // Create SelectDesc
+ SelectDesc selConf = new SelectDesc(descs, colNames);
+ RowSchema selRS = new RowSchema(outRS);
+
+ // Create Select Operator
+ SelectOperator selOp = (SelectOperator) OperatorFactory.getAndMakeChild(
+ selConf, selRS, rsOp);
- // link EX to FS
+ // link SEL to FS
fsOp.getParentOperators().clear();
- fsOp.getParentOperators().add(exOp);
- exOp.getChildOperators().add(fsOp);
+ fsOp.getParentOperators().add(selOp);
+ selOp.getChildOperators().add(fsOp);
// Set if partition sorted or partition bucket sorted
fsOp.getConf().setDpSortState(FileSinkDesc.DPSortState.PARTITION_SORTED);
@@ -249,13 +259,13 @@ public class SortedDynPartitionOptimizer
.getSchema().getSignature());
fsOp.getConf().setPartitionCols(partitionColumns);
- LOG.info("Inserted " + rsOp.getOperatorId() + " and " +
exOp.getOperatorId()
+ LOG.info("Inserted " + rsOp.getOperatorId() + " and " +
selOp.getOperatorId()
+ " as parent of " + fsOp.getOperatorId() + " and child of " +
fsParent.getOperatorId());
return null;
}
- // Remove RS and EX introduced by enforce bucketing/sorting config
- // Convert PARENT -> RS -> EX -> FS to PARENT -> FS
+ // Remove RS and SEL introduced by enforce bucketing/sorting config
+ // Convert PARENT -> RS -> SEL -> FS to PARENT -> FS
private boolean removeRSInsertedByEnforceBucketing(FileSinkOperator fsOp) {
HiveConf hconf = parseCtx.getConf();
boolean enforceBucketing = HiveConf.getBoolVar(hconf,
ConfVars.HIVEENFORCEBUCKETING);
@@ -290,7 +300,7 @@ public class SortedDynPartitionOptimizer
Operator<? extends OperatorDesc> rsChild =
rsToRemove.getChildOperators().get(0);
Operator<? extends OperatorDesc> rsGrandChild =
rsChild.getChildOperators().get(0);
- if (rsChild instanceof ExtractOperator) {
+ if (rsChild instanceof SelectOperator) {
// if schema size cannot be matched, then it could be because of
constant folding
// converting partition column expression to constant expression.
The constant
// expression will then get pruned by column pruner since it will
not reference to
Modified:
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/correlation/CorrelationUtilities.java
URL:
http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/correlation/CorrelationUtilities.java?rev=1656393&r1=1656392&r2=1656393&view=diff
==============================================================================
---
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/correlation/CorrelationUtilities.java
(original)
+++
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/correlation/CorrelationUtilities.java
Mon Feb 2 06:41:02 2015
@@ -30,7 +30,6 @@ import java.util.Map.Entry;
import java.util.Set;
import org.apache.hadoop.hive.ql.exec.ColumnInfo;
-import org.apache.hadoop.hive.ql.exec.ExtractOperator;
import org.apache.hadoop.hive.ql.exec.FilterOperator;
import org.apache.hadoop.hive.ql.exec.ForwardOperator;
import org.apache.hadoop.hive.ql.exec.GroupByOperator;
@@ -43,10 +42,12 @@ import org.apache.hadoop.hive.ql.exec.Sc
import org.apache.hadoop.hive.ql.exec.SelectOperator;
import org.apache.hadoop.hive.ql.exec.TableScanOperator;
import org.apache.hadoop.hive.ql.exec.Utilities;
+import org.apache.hadoop.hive.ql.exec.Utilities.ReduceField;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.parse.ParseContext;
import org.apache.hadoop.hive.ql.parse.SemanticException;
import org.apache.hadoop.hive.ql.plan.AggregationDesc;
+import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc;
import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
import org.apache.hadoop.hive.ql.plan.ExprNodeDescUtils;
import org.apache.hadoop.hive.ql.plan.GroupByDesc;
@@ -237,7 +238,6 @@ public final class CorrelationUtilities
}
if (!(cursor instanceof SelectOperator
|| cursor instanceof FilterOperator
- || cursor instanceof ExtractOperator
|| cursor instanceof ForwardOperator
|| cursor instanceof ScriptOperator
|| cursor instanceof ReduceSinkOperator)) {
@@ -332,46 +332,64 @@ public final class CorrelationUtilities
return child.getParentOperators();
}
+ // replace the cRS to SEL operator
protected static SelectOperator
replaceReduceSinkWithSelectOperator(ReduceSinkOperator childRS,
ParseContext context, AbstractCorrelationProcCtx procCtx) throws
SemanticException {
- SelectOperator select = replaceOperatorWithSelect(childRS, context,
procCtx);
-
select.getConf().setOutputColumnNames(childRS.getConf().getOutputValueColumnNames());
- select.getConf().setColList(childRS.getConf().getValueCols());
- return select;
- }
-
- // replace the cRS to SEL operator
- // If child if cRS is EXT, EXT also should be removed
- protected static SelectOperator replaceOperatorWithSelect(Operator<?>
operator,
- ParseContext context, AbstractCorrelationProcCtx procCtx)
- throws SemanticException {
- RowSchema inputRS = operator.getSchema();
- SelectDesc select = new SelectDesc(null, null);
-
- Operator<?> parent = getSingleParent(operator);
- Operator<?> child = getSingleChild(operator);
+ RowSchema inputRS = childRS.getSchema();
+ SelectDesc select = new SelectDesc(childRS.getConf().getValueCols(),
childRS.getConf().getOutputValueColumnNames());
+ Operator<?> parent = getSingleParent(childRS);
parent.getChildOperators().clear();
SelectOperator sel = (SelectOperator) OperatorFactory.getAndMakeChild(
select, new RowSchema(inputRS.getSignature()), parent);
- sel.setColumnExprMap(operator.getColumnExprMap());
+ sel.setColumnExprMap(childRS.getColumnExprMap());
- sel.setChildOperators(operator.getChildOperators());
- for (Operator<? extends Serializable> ch : operator.getChildOperators()) {
- ch.replaceParent(operator, sel);
- }
- if (child instanceof ExtractOperator) {
- removeOperator(child, getSingleChild(child), sel, context);
- procCtx.addRemovedOperator(child);
- }
- operator.setChildOperators(null);
- operator.setParentOperators(null);
- procCtx.addRemovedOperator(operator);
+ sel.setChildOperators(childRS.getChildOperators());
+ for (Operator<? extends Serializable> ch : childRS.getChildOperators()) {
+ ch.replaceParent(childRS, sel);
+ }
+
+ removeChildSelIfApplicable(getSingleChild(childRS), sel, context, procCtx);
+ childRS.setChildOperators(null);
+ childRS.setParentOperators(null);
+ procCtx.addRemovedOperator(childRS);
return sel;
}
+ //TODO: ideally this method should be removed in future, as in we need not
to rely on removing
+ // this select operator which likely is introduced by
SortedDynPartitionOptimizer.
+ // NonblockingdedupOptimizer should be able to merge this select Operator
with its
+ // parent. But, that is not working at the moment. See:
dynpart_sort_optimization2.q
+
+ private static void removeChildSelIfApplicable(Operator<?> child,
SelectOperator sel,
+ ParseContext context, AbstractCorrelationProcCtx procCtx) throws
SemanticException {
+
+ if (!(child instanceof SelectOperator)) {
+ return;
+ }
+ if (child.getColumnExprMap() != null) {
+ return;
+ }
+
+ SelectOperator selOp = (SelectOperator) child;
+
+ for (ExprNodeDesc desc : selOp.getConf().getColList()) {
+ if (!(desc instanceof ExprNodeColumnDesc)) {
+ return;
+ }
+ ExprNodeColumnDesc col = (ExprNodeColumnDesc) desc;
+ if(!col.getColumn().startsWith(ReduceField.VALUE.toString()+".") ||
+ col.getTabAlias() != null || col.getIsPartitionColOrVirtualCol()){
+ return;
+ }
+ }
+
+ removeOperator(child, getSingleChild(child), sel, context);
+ procCtx.addRemovedOperator(child);
+ }
+
protected static void removeReduceSinkForGroupBy(ReduceSinkOperator cRS,
GroupByOperator cGBYr,
ParseContext context, AbstractCorrelationProcCtx procCtx) throws
SemanticException {
Modified:
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/correlation/ReduceSinkDeDuplication.java
URL:
http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/correlation/ReduceSinkDeDuplication.java?rev=1656393&r1=1656392&r2=1656393&view=diff
==============================================================================
---
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/correlation/ReduceSinkDeDuplication.java
(original)
+++
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/correlation/ReduceSinkDeDuplication.java
Mon Feb 2 06:41:02 2015
@@ -29,7 +29,6 @@ import java.util.Stack;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.metastore.api.FieldSchema;
-import org.apache.hadoop.hive.ql.exec.ExtractOperator;
import org.apache.hadoop.hive.ql.exec.GroupByOperator;
import org.apache.hadoop.hive.ql.exec.JoinOperator;
import org.apache.hadoop.hive.ql.exec.Operator;
@@ -146,6 +145,7 @@ public class ReduceSinkDeDuplication imp
public abstract static class AbsctractReducerReducerProc implements
NodeProcessor {
+ @Override
public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx,
Object... nodeOutputs) throws SemanticException {
ReduceSinkDeduplicateProcCtx dedupCtx = (ReduceSinkDeduplicateProcCtx)
procCtx;
@@ -164,7 +164,7 @@ public class ReduceSinkDeDuplication imp
}
return false;
}
- if (child instanceof ExtractOperator || child instanceof SelectOperator)
{
+ if (child instanceof SelectOperator) {
return process(cRS, dedupCtx);
}
return false;
Modified:
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/lineage/OpProcFactory.java
URL:
http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/lineage/OpProcFactory.java?rev=1656393&r1=1656392&r2=1656393&view=diff
==============================================================================
---
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/lineage/OpProcFactory.java
(original)
+++
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/lineage/OpProcFactory.java
Mon Feb 2 06:41:02 2015
@@ -32,7 +32,6 @@ import org.apache.commons.logging.LogFac
import org.apache.hadoop.hive.metastore.api.FieldSchema;
import org.apache.hadoop.hive.metastore.api.Table;
import org.apache.hadoop.hive.ql.exec.ColumnInfo;
-import org.apache.hadoop.hive.ql.exec.ExtractOperator;
import org.apache.hadoop.hive.ql.exec.ForwardOperator;
import org.apache.hadoop.hive.ql.exec.GroupByOperator;
import org.apache.hadoop.hive.ql.exec.JoinOperator;
@@ -459,12 +458,6 @@ public class OpProcFactory {
for(ExprNodeDesc expr : rop.getConf().getValueCols()) {
lCtx.getIndex().putDependency(rop, col_infos.get(cnt++),
ExprProcFactory.getExprDependency(lCtx, inpOp, expr));
- }
- } else if (op instanceof ExtractOperator) {
- ArrayList<ColumnInfo> col_infos = rop.getSchema().getSignature();
- for(ExprNodeDesc expr : rop.getConf().getValueCols()) {
- lCtx.getIndex().putDependency(rop, col_infos.get(cnt++),
- ExprProcFactory.getExprDependency(lCtx, inpOp, expr));
}
} else {
RowSchema schema = rop.getSchema();
Modified:
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/BucketingSortingInferenceOptimizer.java
URL:
http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/BucketingSortingInferenceOptimizer.java?rev=1656393&r1=1656392&r2=1656393&view=diff
==============================================================================
---
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/BucketingSortingInferenceOptimizer.java
(original)
+++
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/BucketingSortingInferenceOptimizer.java
Mon Feb 2 06:41:02 2015
@@ -23,7 +23,6 @@ import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
-import org.apache.hadoop.hive.ql.exec.ExtractOperator;
import org.apache.hadoop.hive.ql.exec.FileSinkOperator;
import org.apache.hadoop.hive.ql.exec.FilterOperator;
import org.apache.hadoop.hive.ql.exec.ForwardOperator;
@@ -112,9 +111,6 @@ public class BucketingSortingInferenceOp
BucketingSortingOpProcFactory.getJoinProc());
opRules.put(new RuleRegExp("R5", FileSinkOperator.getOperatorName() +
"%"),
BucketingSortingOpProcFactory.getFileSinkProc());
- // Matches only ExtractOperators which are reducers
- opRules.put(new RuleExactMatch("R6", ExtractOperator.getOperatorName() +
"%"),
- BucketingSortingOpProcFactory.getExtractProc());
opRules.put(new RuleRegExp("R7", FilterOperator.getOperatorName() + "%"),
BucketingSortingOpProcFactory.getFilterProc());
opRules.put(new RuleRegExp("R8", LimitOperator.getOperatorName() + "%"),
Modified:
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/BucketingSortingOpProcFactory.java
URL:
http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/BucketingSortingOpProcFactory.java?rev=1656393&r1=1656392&r2=1656393&view=diff
==============================================================================
---
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/BucketingSortingOpProcFactory.java
(original)
+++
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/BucketingSortingOpProcFactory.java
Mon Feb 2 06:41:02 2015
@@ -26,7 +26,6 @@ import java.util.Map;
import java.util.Stack;
import org.apache.hadoop.hive.ql.exec.ColumnInfo;
-import org.apache.hadoop.hive.ql.exec.ExtractOperator;
import org.apache.hadoop.hive.ql.exec.FileSinkOperator;
import org.apache.hadoop.hive.ql.exec.ForwardOperator;
import org.apache.hadoop.hive.ql.exec.GroupByOperator;
@@ -487,49 +486,13 @@ public class BucketingSortingOpProcFacto
}
- /**
- * Processor for Extract operator.
- *
- * Only handles the case where the tree looks like
- *
- * ReduceSinkOperator --- ExtractOperator
- *
- * This is the case for distribute by, sort by, order by, cluster by
operators.
- */
- public static class ExtractInferrer extends DefaultInferrer implements
NodeProcessor {
- @Override
- public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx,
- Object... nodeOutputs) throws SemanticException {
-
- BucketingSortingCtx bctx = (BucketingSortingCtx)procCtx;
- ExtractOperator exop = (ExtractOperator)nd;
-
- // As of writing this, there is no case where this could be false, this
is just protection
- // from possible future changes
- if (exop.getParentOperators().size() != 1) {
- return null;
- }
-
- Operator<? extends OperatorDesc> parent =
exop.getParentOperators().get(0);
-
- // The caller of this method should guarantee this
- if (parent instanceof ReduceSinkOperator) {
- extractTraits(bctx, (ReduceSinkOperator)parent, exop);
- }
-
- return null;
- }
- }
-
- static void extractTraits(BucketingSortingCtx bctx, ReduceSinkOperator rop,
Operator<?> exop)
+ static void extractTraits(BucketingSortingCtx bctx, ReduceSinkOperator rop,
Operator<?> childop)
throws SemanticException {
List<ExprNodeDesc> outputValues = Collections.emptyList();
- if (exop instanceof ExtractOperator) {
- outputValues = rop.getConf().getValueCols();
- } else if (exop instanceof SelectOperator) {
- SelectDesc select = ((SelectOperator)exop).getConf();
- outputValues = ExprNodeDescUtils.backtrack(select.getColList(), exop,
rop);
+ if (childop instanceof SelectOperator) {
+ SelectDesc select = ((SelectOperator)childop).getConf();
+ outputValues = ExprNodeDescUtils.backtrack(select.getColList(), childop,
rop);
}
if (outputValues.isEmpty()) {
return;
@@ -543,16 +506,16 @@ public class BucketingSortingOpProcFacto
// These represent the sorted columns
List<SortCol> sortCols = extractSortCols(rop, outputValues);
- List<ColumnInfo> colInfos = exop.getSchema().getSignature();
+ List<ColumnInfo> colInfos = childop.getSchema().getSignature();
if (!bucketCols.isEmpty()) {
List<BucketCol> newBucketCols = getNewBucketCols(bucketCols, colInfos);
- bctx.setBucketedCols(exop, newBucketCols);
+ bctx.setBucketedCols(childop, newBucketCols);
}
if (!sortCols.isEmpty()) {
List<SortCol> newSortCols = getNewSortCols(sortCols, colInfos);
- bctx.setSortedCols(exop, newSortCols);
+ bctx.setSortedCols(childop, newSortCols);
}
}
@@ -778,10 +741,6 @@ public class BucketingSortingOpProcFacto
return new FileSinkInferrer();
}
- public static NodeProcessor getExtractProc() {
- return new ExtractInferrer();
- }
-
public static NodeProcessor getFilterProc() {
return new ForwardingInferrer();
}
Modified:
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
URL:
http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java?rev=1656393&r1=1656392&r2=1656393&view=diff
==============================================================================
---
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
(original)
+++
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
Mon Feb 2 06:41:02 2015
@@ -41,7 +41,6 @@ import org.apache.hadoop.hive.ql.exec.*;
import org.apache.hadoop.hive.ql.exec.mr.MapRedTask;
import org.apache.hadoop.hive.ql.exec.spark.SparkTask;
import org.apache.hadoop.hive.ql.exec.tez.TezTask;
-import org.apache.hadoop.hive.ql.exec.vector.VectorExtractOperator;
import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor;
import org.apache.hadoop.hive.ql.exec.vector.VectorGroupByOperator;
import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext;
@@ -76,7 +75,6 @@ import org.apache.hadoop.hive.ql.plan.Pa
import org.apache.hadoop.hive.ql.plan.ReduceWork;
import org.apache.hadoop.hive.ql.plan.SMBJoinDesc;
import org.apache.hadoop.hive.ql.plan.SparkWork;
-import org.apache.hadoop.hive.ql.plan.TableDesc;
import org.apache.hadoop.hive.ql.plan.TableScanDesc;
import org.apache.hadoop.hive.ql.plan.TezWork;
import org.apache.hadoop.hive.ql.plan.VectorGroupByDesc;
@@ -283,7 +281,7 @@ public class Vectorizer implements Physi
class VectorizationDispatcher implements Dispatcher {
- private PhysicalContext pctx;
+ private final PhysicalContext pctx;
private List<String> reduceColumnNames;
private List<TypeInfo> reduceTypeInfos;
@@ -449,9 +447,8 @@ public class Vectorizer implements Physi
}
private void addReduceWorkRules(Map<Rule, NodeProcessor> opRules,
NodeProcessor np) {
- opRules.put(new RuleRegExp("R1", ExtractOperator.getOperatorName() +
".*"), np);
- opRules.put(new RuleRegExp("R2", GroupByOperator.getOperatorName() +
".*"), np);
- opRules.put(new RuleRegExp("R3", SelectOperator.getOperatorName() +
".*"), np);
+ opRules.put(new RuleRegExp("R1", GroupByOperator.getOperatorName() +
".*"), np);
+ opRules.put(new RuleRegExp("R2", SelectOperator.getOperatorName() +
".*"), np);
}
private boolean validateReduceWork(ReduceWork reduceWork) throws
SemanticException {
@@ -485,7 +482,7 @@ public class Vectorizer implements Physi
private void vectorizeReduceWork(ReduceWork reduceWork) throws
SemanticException {
LOG.info("Vectorizing ReduceWork...");
reduceWork.setVectorMode(true);
-
+
// For some reason, the DefaultGraphWalker does not descend down from
the reducer Operator as
// expected. We need to descend down, otherwise it breaks our algorithm
that determines
// VectorizationContext... Do we use PreOrderWalker instead of
DefaultGraphWalker.
@@ -506,11 +503,6 @@ public class Vectorizer implements Physi
// Necessary since we are vectorizing the root operator in reduce.
reduceWork.setReducer(vnp.getRootVectorOp());
- Operator<? extends OperatorDesc> reducer = reduceWork.getReducer();
- if (reducer.getType().equals(OperatorType.EXTRACT)) {
- ((VectorExtractOperator)reducer).setReduceTypeInfos(reduceTypeInfos);
- }
-
Map<String, Map<Integer, String>> allScratchColumnVectorTypeMaps =
vnp.getAllScratchColumnVectorTypeMaps();
reduceWork.setAllScratchColumnVectorTypeMaps(allScratchColumnVectorTypeMaps);
Map<String, Map<String, Integer>> allColumnVectorMaps =
vnp.getAllColumnVectorMaps();
@@ -525,8 +517,8 @@ public class Vectorizer implements Physi
class MapWorkValidationNodeProcessor implements NodeProcessor {
- private MapWork mapWork;
- private boolean isTez;
+ private final MapWork mapWork;
+ private final boolean isTez;
public MapWorkValidationNodeProcessor(MapWork mapWork, boolean isTez) {
this.mapWork = mapWork;
@@ -658,7 +650,7 @@ public class Vectorizer implements Physi
throw new SemanticException("Must be overridden");
}
}
-
+
class MapWorkVectorizationNodeProcessor extends VectorizationNodeProcessor {
private final MapWork mWork;
@@ -723,8 +715,6 @@ public class Vectorizer implements Physi
if (LOG.isDebugEnabled()) {
LOG.debug("Vectorized MapWork operator " + vectorOp.getName() + "
vectorization context " + vContext.toString());
if (vectorOp instanceof VectorizationContextRegion) {
- VectorizationContextRegion vcRegion = (VectorizationContextRegion)
vectorOp;
- VectorizationContext vOutContext =
vcRegion.getOuputVectorizationContext();
LOG.debug("Vectorized MapWork operator " + vectorOp.getName() + "
added vectorization context " + vContext.toString());
}
}
@@ -735,8 +725,8 @@ public class Vectorizer implements Physi
class ReduceWorkVectorizationNodeProcessor extends
VectorizationNodeProcessor {
- private List<String> reduceColumnNames;
-
+ private final List<String> reduceColumnNames;
+
private VectorizationContext reduceShuffleVectorizationContext;
private Operator<? extends OperatorDesc> rootVectorOp;
@@ -801,8 +791,6 @@ public class Vectorizer implements Physi
if (LOG.isDebugEnabled()) {
LOG.debug("Vectorized ReduceWork operator " + vectorOp.getName() + "
vectorization context " + vContext.toString());
if (vectorOp instanceof VectorizationContextRegion) {
- VectorizationContextRegion vcRegion = (VectorizationContextRegion)
vectorOp;
- VectorizationContext vOutContext =
vcRegion.getOuputVectorizationContext();
LOG.debug("Vectorized ReduceWork operator " + vectorOp.getName() + "
added vectorization context " + vContext.toString());
}
}
@@ -897,9 +885,6 @@ public class Vectorizer implements Physi
boolean validateReduceWorkOperator(Operator<? extends OperatorDesc> op) {
boolean ret = false;
switch (op.getType()) {
- case EXTRACT:
- ret = validateExtractOperator((ExtractOperator) op);
- break;
case MAPJOIN:
// Does MAPJOIN actually get planned in Reduce?
if (op instanceof MapJoinOperator) {
@@ -1034,7 +1019,7 @@ public class Vectorizer implements Physi
MapJoinDesc desc = op.getConf();
return validateMapJoinDesc(desc);
}
-
+
private boolean validateMapJoinDesc(MapJoinDesc desc) {
byte posBigTable = (byte) desc.getPosBigTable();
List<ExprNodeDesc> filterExprs = desc.getFilters().get(posBigTable);
@@ -1123,15 +1108,6 @@ public class Vectorizer implements Physi
return true;
}
- private boolean validateExtractOperator(ExtractOperator op) {
- ExprNodeDesc expr = op.getConf().getCol();
- boolean ret = validateExprNodeDesc(expr);
- if (!ret) {
- return false;
- }
- return true;
- }
-
private boolean validateFileSinkOperator(FileSinkOperator op) {
return true;
}
@@ -1300,7 +1276,7 @@ public class Vectorizer implements Physi
return vContext;
}
- private void fixupParentChildOperators(Operator<? extends OperatorDesc> op,
+ private void fixupParentChildOperators(Operator<? extends OperatorDesc> op,
Operator<? extends OperatorDesc> vectorOp) {
if (op.getParentOperators() != null) {
vectorOp.setParentOperators(op.getParentOperators());
@@ -1354,7 +1330,7 @@ public class Vectorizer implements Physi
return false;
}
- public void debugDisplayAllMaps(Map<String, Map<String, Integer>>
allColumnVectorMaps,
+ public void debugDisplayAllMaps(Map<String, Map<String, Integer>>
allColumnVectorMaps,
Map<String, Map<Integer, String>> allScratchColumnVectorTypeMaps) {
// Context keys grow in length since they are a path...
Modified:
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
URL:
http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java?rev=1656393&r1=1656392&r2=1656393&view=diff
==============================================================================
---
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
(original)
+++
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
Mon Feb 2 06:41:02 2015
@@ -94,6 +94,7 @@ import org.apache.hadoop.hive.ql.hooks.R
import org.apache.hadoop.hive.ql.hooks.WriteEntity;
import org.apache.hadoop.hive.ql.io.AcidOutputFormat;
import org.apache.hadoop.hive.ql.io.AcidUtils;
+import org.apache.hadoop.hive.ql.io.AcidUtils.Operation;
import org.apache.hadoop.hive.ql.io.CombineHiveInputFormat;
import org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat;
import org.apache.hadoop.hive.ql.io.HiveOutputFormat;
@@ -149,7 +150,6 @@ import org.apache.hadoop.hive.ql.plan.Ex
import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
import org.apache.hadoop.hive.ql.plan.ExprNodeDescUtils;
import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc;
-import org.apache.hadoop.hive.ql.plan.ExtractDesc;
import org.apache.hadoop.hive.ql.plan.FileSinkDesc;
import org.apache.hadoop.hive.ql.plan.FilterDesc;
import org.apache.hadoop.hive.ql.plan.FilterDesc.sampleDesc;
@@ -229,12 +229,12 @@ public class SemanticAnalyzer extends Ba
private HashMap<TableScanOperator, ExprNodeDesc> opToPartPruner;
private HashMap<TableScanOperator, PrunedPartitionList> opToPartList;
private HashMap<String, Operator<? extends OperatorDesc>> topOps;
- private HashMap<String, Operator<? extends OperatorDesc>> topSelOps;
+ private final HashMap<String, Operator<? extends OperatorDesc>> topSelOps;
private LinkedHashMap<Operator<? extends OperatorDesc>, OpParseContext>
opParseCtx;
private List<LoadTableDesc> loadTableWork;
private List<LoadFileDesc> loadFileWork;
- private Map<JoinOperator, QBJoinTree> joinContext;
- private Map<SMBMapJoinOperator, QBJoinTree> smbMapJoinContext;
+ private final Map<JoinOperator, QBJoinTree> joinContext;
+ private final Map<SMBMapJoinOperator, QBJoinTree> smbMapJoinContext;
private final HashMap<TableScanOperator, Table> topToTable;
private final Map<FileSinkOperator, Table> fsopToTable;
private final List<ReduceSinkOperator>
reduceSinkOperatorsAddedByEnforceBucketingSorting;
@@ -5975,8 +5975,13 @@ public class SemanticAnalyzer extends Ba
maxReducers = numBuckets;
}
- input = genReduceSinkPlanForSortingBucketing(dest_tab, input,
- sortCols, sortOrders, partnCols, maxReducers);
+ StringBuilder order = new StringBuilder();
+ for (int sortOrder : sortOrders) {
+ order.append(sortOrder == BaseSemanticAnalyzer.HIVE_COLUMN_ORDER_ASC ?
'+' : '-');
+ }
+ input = genReduceSinkPlan(input, partnCols, sortCols, order.toString(),
maxReducers,
+ (isAcidTable(dest_tab) ? getAcidType() :
AcidUtils.Operation.NOT_ACID));
+
reduceSinkOperatorsAddedByEnforceBucketingSorting.add((ReduceSinkOperator)input.getParentOperators().get(0));
ctx.setMultiFileSpray(multiFileSpray);
ctx.setNumFiles(numFiles);
ctx.setPartnCols(partnColsNoConvert);
@@ -6448,7 +6453,7 @@ public class SemanticAnalyzer extends Ba
fileSinkDesc.setWriteType(wt);
acidFileSinks.add(fileSinkDesc);
}
-
+
fileSinkDesc.setTemporary(destTableIsTemporary);
/* Set List Bucketing context. */
@@ -6930,7 +6935,6 @@ public class SemanticAnalyzer extends Ba
private ArrayList<ExprNodeDesc> getSortCols(String dest, QB qb, Table tab,
TableDesc table_desc,
Operator input, boolean convert)
throws SemanticException {
- RowResolver inputRR = opParseCtx.get(input).getRowResolver();
List<Order> tabSortCols = tab.getSortCols();
List<FieldSchema> tabCols = tab.getCols();
@@ -6940,7 +6944,6 @@ public class SemanticAnalyzer extends Ba
int pos = 0;
for (FieldSchema tabCol : tabCols) {
if (sortCol.getCol().equals(tabCol.getName())) {
- ColumnInfo colInfo = inputRR.getColumnInfos().get(pos);
posns.add(pos);
break;
}
@@ -6953,7 +6956,6 @@ public class SemanticAnalyzer extends Ba
private ArrayList<Integer> getSortOrders(String dest, QB qb, Table tab,
Operator input)
throws SemanticException {
- RowResolver inputRR = opParseCtx.get(input).getRowResolver();
List<Order> tabSortCols = tab.getSortCols();
List<FieldSchema> tabCols = tab.getCols();
@@ -6969,74 +6971,11 @@ public class SemanticAnalyzer extends Ba
return orders;
}
- @SuppressWarnings("nls")
- private Operator genReduceSinkPlanForSortingBucketing(Table tab, Operator
input,
- ArrayList<ExprNodeDesc> sortCols,
- List<Integer> sortOrders,
- ArrayList<ExprNodeDesc> partitionCols,
- int numReducers)
- throws SemanticException {
- RowResolver inputRR = opParseCtx.get(input).getRowResolver();
-
- // For the generation of the values expression just get the inputs
- // signature and generate field expressions for those
- Map<String, ExprNodeDesc> colExprMap = new HashMap<String, ExprNodeDesc>();
- ArrayList<ExprNodeDesc> valueCols = new ArrayList<ExprNodeDesc>();
- ArrayList<String> outputColumns = new ArrayList<String>();
- int i = 0;
- for (ColumnInfo colInfo : inputRR.getColumnInfos()) {
- String internalName = getColumnInternalName(i++);
- outputColumns.add(internalName);
- valueCols.add(new ExprNodeColumnDesc(colInfo));
- colExprMap.put(internalName, valueCols
- .get(valueCols.size() - 1));
- }
-
- StringBuilder order = new StringBuilder();
- for (int sortOrder : sortOrders) {
- order.append(sortOrder == BaseSemanticAnalyzer.HIVE_COLUMN_ORDER_ASC ?
'+' : '-');
- }
-
- AcidUtils.Operation acidOp = (isAcidTable(tab) ? getAcidType() :
AcidUtils.Operation.NOT_ACID);
-
- Operator interim = putOpInsertMap(OperatorFactory.getAndMakeChild(PlanUtils
- .getReduceSinkDesc(sortCols, valueCols, outputColumns, false, -1,
- partitionCols, order.toString(), numReducers, acidOp),
- new RowSchema(inputRR.getColumnInfos()), input), inputRR);
- interim.setColumnExprMap(colExprMap);
- reduceSinkOperatorsAddedByEnforceBucketingSorting.add((ReduceSinkOperator)
interim);
-
- // Add the extract operator to get the value fields
- RowResolver out_rwsch = new RowResolver();
- RowResolver interim_rwsch = inputRR;
- Integer pos = Integer.valueOf(0);
- for (ColumnInfo colInfo : interim_rwsch.getColumnInfos()) {
- String[] info = interim_rwsch.reverseLookup(colInfo.getInternalName());
- out_rwsch.put(info[0], info[1], new ColumnInfo(
- getColumnInternalName(pos), colInfo.getType(), info[0],
- colInfo.getIsVirtualCol(), colInfo.isHiddenVirtualCol()));
- pos = Integer.valueOf(pos.intValue() + 1);
- }
-
- Operator output = putOpInsertMap(OperatorFactory.getAndMakeChild(
- new ExtractDesc(new ExprNodeColumnDesc(TypeInfoFactory.stringTypeInfo,
- Utilities.ReduceField.VALUE.toString(), "", false)), new RowSchema(
- out_rwsch.getColumnInfos()), interim), out_rwsch);
-
- if (LOG.isDebugEnabled()) {
- LOG.debug("Created ReduceSink Plan for table: " + tab.getTableName() +
- " row schema: " + out_rwsch.toString());
- }
-
- return output;
-
- }
-
private Operator genReduceSinkPlan(String dest, QB qb, Operator<?> input,
int numReducers) throws SemanticException {
-
+
RowResolver inputRR = opParseCtx.get(input).getRowResolver();
-
+
// First generate the expression for the partition and sort keys
// The cluster by clause / distribute by clause has the aliases for
// partition function
@@ -7094,16 +7033,16 @@ public class SemanticAnalyzer extends Ba
sortCols.add(exprNode);
}
}
- return genReduceSinkPlan(input, partCols, sortCols, order.toString(),
numReducers);
+ return genReduceSinkPlan(input, partCols, sortCols, order.toString(),
numReducers, Operation.NOT_ACID);
}
-
+
@SuppressWarnings("nls")
private Operator genReduceSinkPlan(Operator<?> input,
- ArrayList<ExprNodeDesc> partitionCols, ArrayList<ExprNodeDesc> sortCols,
- String sortOrder, int numReducers) throws SemanticException {
+ ArrayList<ExprNodeDesc> partitionCols, ArrayList<ExprNodeDesc> sortCols,
+ String sortOrder, int numReducers, AcidUtils.Operation acidOp) throws
SemanticException {
RowResolver inputRR = opParseCtx.get(input).getRowResolver();
-
+
Operator dummy = Operator.createDummy();
dummy.setParentOperators(Arrays.asList(input));
@@ -7166,9 +7105,8 @@ public class SemanticAnalyzer extends Ba
dummy.setParentOperators(null);
- // TODO Not 100% sure NOT_ACID is always right here.
ReduceSinkDesc rsdesc = PlanUtils.getReduceSinkDesc(sortCols, valueCols,
outputColumns,
- false, -1, partitionCols, sortOrder, numReducers,
AcidUtils.Operation.NOT_ACID);
+ false, -1, partitionCols, sortOrder, numReducers, acidOp);
Operator interim = putOpInsertMap(OperatorFactory.getAndMakeChild(rsdesc,
new RowSchema(rsRR.getColumnInfos()), input), rsRR);
@@ -11894,13 +11832,8 @@ public class SemanticAnalyzer extends Ba
void buildPTFReduceSinkDetails(PartitionedTableFunctionDef tabDef,
RowResolver inputRR,
ArrayList<ExprNodeDesc> partCols,
- ArrayList<ExprNodeDesc> valueCols,
ArrayList<ExprNodeDesc> orderCols,
- Map<String, ExprNodeDesc> colExprMap,
- List<String> outputColumnNames,
- StringBuilder orderString,
- RowResolver rsOpRR,
- RowResolver extractRR) throws SemanticException {
+ StringBuilder orderString) throws SemanticException {
List<PTFExpressionDef> partColList =
tabDef.getPartition().getExpressions();
@@ -11928,68 +11861,6 @@ public class SemanticAnalyzer extends Ba
}
orderCols.add(colDef.getExprNode());
}
-
- ArrayList<ColumnInfo> colInfoList = inputRR.getColumnInfos();
- /*
- * construct the ReduceSinkRR
- */
- int pos = 0;
- for (ColumnInfo colInfo : colInfoList) {
- ExprNodeDesc valueColExpr = new ExprNodeColumnDesc(colInfo);
- valueCols.add(valueColExpr);
- String internalName = SemanticAnalyzer.getColumnInternalName(pos++);
- outputColumnNames.add(internalName);
- colExprMap.put(internalName, valueColExpr);
-
- String[] alias = inputRR.reverseLookup(colInfo.getInternalName());
- ColumnInfo newColInfo = new ColumnInfo(
- internalName, colInfo.getType(), alias[0],
- colInfo.getIsVirtualCol(), colInfo.isHiddenVirtualCol());
- rsOpRR.put(alias[0], alias[1], newColInfo);
- }
-
- /*
- * construct the ExtractRR
- */
- LinkedHashMap<String[], ColumnInfo> colsAddedByHaving =
- new LinkedHashMap<String[], ColumnInfo>();
- pos = 0;
- for (ColumnInfo colInfo : colInfoList) {
- String[] alias = inputRR.reverseLookup(colInfo.getInternalName());
- /*
- * if we have already encountered this colInfo internalName.
- * We encounter it again because it must be put for the Having clause.
- * We will add these entries in the end; in a loop on colsAddedByHaving.
See below.
- */
- if ( colsAddedByHaving.containsKey(alias)) {
- continue;
- }
- ASTNode astNode = PTFTranslator.getASTNode(colInfo, inputRR);
- ColumnInfo eColInfo = new ColumnInfo(
- SemanticAnalyzer.getColumnInternalName(pos++), colInfo.getType(),
alias[0],
- colInfo.getIsVirtualCol(), colInfo.isHiddenVirtualCol());
-
- if ( astNode == null ) {
- extractRR.put(alias[0], alias[1], eColInfo);
- }
- else {
- /*
- * in case having clause refers to this column may have been added
twice;
- * once with the ASTNode.toStringTree as the alias
- * and then with the real alias.
- */
- extractRR.putExpression(astNode, eColInfo);
- if ( !astNode.toStringTree().toLowerCase().equals(alias[1]) ) {
- colsAddedByHaving.put(alias, eColInfo);
- }
- }
- }
-
- for(Map.Entry<String[], ColumnInfo> columnAddedByHaving :
colsAddedByHaving.entrySet() ) {
- String[] alias = columnAddedByHaving.getKey();
- ColumnInfo eColInfo = columnAddedByHaving.getValue();
- extractRR.put(alias[0], alias[1], eColInfo);
- }
}
private Operator genPTFPlanForComponentQuery(PTFInvocationSpec ptfQSpec,
Operator input)
@@ -12000,27 +11871,6 @@ public class SemanticAnalyzer extends Ba
RowResolver rr = opParseCtx.get(input).getRowResolver();
PTFDesc ptfDesc = translatePTFInvocationSpec(ptfQSpec, rr);
- RowResolver rsOpRR = new RowResolver();
- /*
- * Build an RR for the Extract Op from the ReduceSink Op's RR.
- * Why?
- * We need to remove the Virtual Columns present in the RS's RR. The OI
- * that gets passed to Extract at runtime doesn't contain the Virtual
Columns.
- * So internal names get changed. Consider testCase testJoinWithLeadLag,
- * which is a self join on part and also has a Windowing expression.
- * The RR of the RS op at translation time looks something like this:
- * (_co1,_col2,..,_col7, _col8(vc=true),_col9(vc=true),
- * _col10,_col11,.._col15(vc=true),_col16(vc=true),..)
- * At runtime the Virtual columns are removed and all the columns after
_col7
- * are shifted 1 or 2 positions.
- * So in child Operators ColumnExprNodeDesc's are no longer referring to
the right columns.
- *
- * So we build a new RR for the Extract Op, with the Virtual Columns
removed.
- * We hand this to the PTFTranslator as the
- * starting RR to use to translate a PTF Chain.
- */
- RowResolver extractOpRR = new RowResolver();
-
/*
* 2. build Map-side Op Graph. Graph template is either:
* Input -> PTF_map -> ReduceSink
@@ -12051,10 +11901,7 @@ public class SemanticAnalyzer extends Ba
*/
ArrayList<ExprNodeDesc> partCols = new ArrayList<ExprNodeDesc>();
- ArrayList<ExprNodeDesc> valueCols = new ArrayList<ExprNodeDesc>();
ArrayList<ExprNodeDesc> orderCols = new ArrayList<ExprNodeDesc>();
- Map<String, ExprNodeDesc> colExprMap = new HashMap<String,
ExprNodeDesc>();
- List<String> outputColumnNames = new ArrayList<String>();
StringBuilder orderString = new StringBuilder();
/*
@@ -12063,45 +11910,20 @@ public class SemanticAnalyzer extends Ba
* If the parent of ReduceSinkOperator is PTFOperator, use it's
* output RR.
*/
- buildPTFReduceSinkDetails(tabDef,
- rr,
- partCols,
- valueCols,
- orderCols,
- colExprMap,
- outputColumnNames,
- orderString,
- rsOpRR,
- extractOpRR);
-
- input = putOpInsertMap(OperatorFactory.getAndMakeChild(PlanUtils
- .getReduceSinkDesc(orderCols,
- valueCols, outputColumnNames, false,
- -1, partCols, orderString.toString(), -1,
AcidUtils.Operation.NOT_ACID),
- new RowSchema(rsOpRR.getColumnInfos()), input), rsOpRR);
- input.setColumnExprMap(colExprMap);
+ buildPTFReduceSinkDetails(tabDef, rr, partCols, orderCols, orderString);
+ input = genReduceSinkPlan(input, partCols, orderCols,
orderString.toString(), -1, Operation.NOT_ACID);
}
/*
* 3. build Reduce-side Op Graph
*/
{
- /*
- * b. Construct Extract Operator.
- */
- input = putOpInsertMap(OperatorFactory.getAndMakeChild(
- new ExtractDesc(
- new ExprNodeColumnDesc(TypeInfoFactory.stringTypeInfo,
- Utilities.ReduceField.VALUE
- .toString(), "", false)),
- new RowSchema(extractOpRR.getColumnInfos()),
- input), extractOpRR);
/*
* c. Rebuilt the QueryDef.
* Why?
* - so that the ExprNodeDescriptors in the QueryDef are based on the
- * Extract Operator's RowResolver
+ * Select Operator's RowResolver
*/
rr = opParseCtx.get(input).getRowResolver();
ptfDesc = translatePTFInvocationSpec(ptfQSpec, rr);
@@ -12115,9 +11937,7 @@ public class SemanticAnalyzer extends Ba
input), ptfOpRR);
}
-
return input;
-
}
//--------------------------- Windowing handling: PTFInvocationSpec to PTFDesc
--------------------
@@ -12145,7 +11965,7 @@ public class SemanticAnalyzer extends Ba
private Operator genReduceSinkPlanForWindowing(WindowingSpec spec,
RowResolver inputRR, Operator input) throws SemanticException{
-
+
ArrayList<ExprNodeDesc> partCols = new ArrayList<ExprNodeDesc>();
ArrayList<ExprNodeDesc> orderCols = new ArrayList<ExprNodeDesc>();
StringBuilder order = new StringBuilder();
@@ -12169,7 +11989,7 @@ public class SemanticAnalyzer extends Ba
}
}
- return genReduceSinkPlan(input, partCols, orderCols, order.toString(), -1);
+ return genReduceSinkPlan(input, partCols, orderCols, order.toString(), -1,
Operation.NOT_ACID);
}
public static ArrayList<WindowExpressionSpec> parseSelect(String
selectExprStr)
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/SelectDesc.java
URL:
http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/SelectDesc.java?rev=1656393&r1=1656392&r2=1656393&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/SelectDesc.java
(original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/SelectDesc.java Mon
Feb 2 06:41:02 2015
@@ -56,14 +56,6 @@ public class SelectDesc extends Abstract
this.outputColumnNames = outputColumnNames;
}
- public SelectDesc(
- final List<org.apache.hadoop.hive.ql.plan.ExprNodeDesc> colList,
- final boolean selectStar, final boolean selStarNoCompute) {
- this.colList = colList;
- this.selectStar = selectStar;
- this.selStarNoCompute = selStarNoCompute;
- }
-
@Override
public Object clone() {
SelectDesc ret = new SelectDesc();
Modified:
hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/exec/TestExecDriver.java
URL:
http://svn.apache.org/viewvc/hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/exec/TestExecDriver.java?rev=1656393&r1=1656392&r2=1656393&view=diff
==============================================================================
--- hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/exec/TestExecDriver.java
(original)
+++ hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/exec/TestExecDriver.java
Mon Feb 2 06:41:02 2015
@@ -38,7 +38,7 @@ import org.apache.hadoop.hive.ql.Windows
import org.apache.hadoop.hive.ql.exec.mr.ExecDriver;
import org.apache.hadoop.hive.ql.exec.mr.MapRedTask;
import org.apache.hadoop.hive.ql.io.AcidUtils;
-import org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat;
+import org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat;
import org.apache.hadoop.hive.ql.metadata.Hive;
import org.apache.hadoop.hive.ql.metadata.Table;
import org.apache.hadoop.hive.ql.parse.SemanticException;
@@ -47,7 +47,6 @@ import org.apache.hadoop.hive.ql.plan.Ex
import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
import org.apache.hadoop.hive.ql.plan.ExprNodeFieldDesc;
import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc;
-import org.apache.hadoop.hive.ql.plan.ExtractDesc;
import org.apache.hadoop.hive.ql.plan.FileSinkDesc;
import org.apache.hadoop.hive.ql.plan.FilterDesc;
import org.apache.hadoop.hive.ql.plan.MapredWork;
@@ -94,7 +93,7 @@ public class TestExecDriver extends Test
tmppath = new Path(tmpdir);
fs = FileSystem.get(conf);
- if (fs.exists(tmppath) && !fs.getFileStatus(tmppath).isDir()) {
+ if (fs.exists(tmppath) && !fs.getFileStatus(tmppath).isDirectory()) {
throw new RuntimeException(tmpdir + " exists but is not a directory");
}
@@ -137,7 +136,7 @@ public class TestExecDriver extends Test
for (String src : srctables) {
db.dropTable(MetaStoreUtils.DEFAULT_DATABASE_NAME, src, true, true);
db.createTable(src, cols, null, TextInputFormat.class,
- IgnoreKeyTextOutputFormat.class);
+ HiveIgnoreKeyTextOutputFormat.class);
db.loadTable(hadoopDataFile[i], src, false, false, true, false, false);
i++;
}
@@ -161,20 +160,19 @@ public class TestExecDriver extends Test
private static void fileDiff(String datafile, String testdir) throws
Exception {
String testFileDir = conf.get("test.data.files");
- FileInputStream fi_gold = new FileInputStream(new File(testFileDir,
- datafile));
// inbuilt assumption that the testdir has only one output file.
Path di_test = new Path(tmppath, testdir);
if (!fs.exists(di_test)) {
throw new RuntimeException(tmpdir + File.separator + testdir + " does
not exist");
}
- if (!fs.getFileStatus(di_test).isDir()) {
+ if (!fs.getFileStatus(di_test).isDirectory()) {
throw new RuntimeException(tmpdir + File.separator + testdir + " is not
a directory");
}
FSDataInputStream fi_test = fs.open((fs.listStatus(di_test))[0].getPath());
boolean ignoreWhitespace = Shell.WINDOWS;
+ FileInputStream fi_gold = new FileInputStream(new
File(testFileDir,datafile));
if (!Utilities.contentsEqual(fi_gold, fi_test, ignoreWhitespace)) {
LOG.error(di_test.toString() + " does not match " + datafile);
assertEquals(false, true);
@@ -260,8 +258,11 @@ public class TestExecDriver extends Test
Operator<FileSinkDesc> op3 = OperatorFactory.get(new FileSinkDesc(new
Path(tmpdir + File.separator
+ "mapredplan1.out"), Utilities.defaultTd, false));
- Operator<ExtractDesc> op2 = OperatorFactory.get(new ExtractDesc(
- getStringColumn(Utilities.ReduceField.VALUE.toString())), op3);
+ List<ExprNodeDesc> cols = new ArrayList<ExprNodeDesc>();
+
cols.add(getStringColumn(Utilities.ReduceField.VALUE.toString()+"."+outputColumns.get(1)));
+ List<String> colNames = new ArrayList<String>();
+ colNames.add(HiveConf.getColumnInternalName(2));
+ Operator<SelectDesc> op2 = OperatorFactory.get(new SelectDesc(cols,
colNames), op3);
rWork.setReducer(op2);
}
@@ -292,8 +293,10 @@ public class TestExecDriver extends Test
Operator<FilterDesc> op3 = OperatorFactory.get(getTestFilterDesc("0"),
op4);
- Operator<ExtractDesc> op2 = OperatorFactory.get(new ExtractDesc(
- getStringColumn(Utilities.ReduceField.VALUE.toString())), op3);
+ List<ExprNodeDesc> cols = new ArrayList<ExprNodeDesc>();
+ cols.add(getStringColumn(Utilities.ReduceField.KEY + ".reducesinkkey" +
0));
+
cols.add(getStringColumn(Utilities.ReduceField.VALUE.toString()+"."+outputColumns.get(1)));
+ Operator<SelectDesc> op2 = OperatorFactory.get(new SelectDesc(cols,
outputColumns), op3);
rWork.setReducer(op2);
}
@@ -376,10 +379,10 @@ public class TestExecDriver extends Test
// reduce side work
Operator<FileSinkDesc> op3 = OperatorFactory.get(new FileSinkDesc(new
Path(tmpdir + File.separator
+ "mapredplan4.out"), Utilities.defaultTd, false));
-
- Operator<ExtractDesc> op2 = OperatorFactory.get(new ExtractDesc(
- getStringColumn(Utilities.ReduceField.VALUE.toString())), op3);
-
+ List<ExprNodeDesc> cols = new ArrayList<ExprNodeDesc>();
+ cols.add(getStringColumn(Utilities.ReduceField.KEY + ".reducesinkkey" +
0));
+
cols.add(getStringColumn(Utilities.ReduceField.VALUE.toString()+"."+outputColumns.get(1)));
+ Operator<SelectDesc> op2 = OperatorFactory.get(new SelectDesc(cols,
outputColumns), op3);
rWork.setReducer(op2);
}
@@ -416,9 +419,10 @@ public class TestExecDriver extends Test
Operator<FileSinkDesc> op3 = OperatorFactory.get(new FileSinkDesc(new
Path(tmpdir + File.separator
+ "mapredplan5.out"), Utilities.defaultTd, false));
- Operator<ExtractDesc> op2 = OperatorFactory.get(new ExtractDesc(
- getStringColumn(Utilities.ReduceField.VALUE.toString())), op3);
-
+ List<ExprNodeDesc> cols = new ArrayList<ExprNodeDesc>();
+ cols.add(getStringColumn(Utilities.ReduceField.KEY + ".reducesinkkey" +
0));
+
cols.add(getStringColumn(Utilities.ReduceField.VALUE.toString()+"."+outputColumns.get(1)));
+ Operator<SelectDesc> op2 = OperatorFactory.get(new SelectDesc(cols,
outputColumns), op3);
rWork.setReducer(op2);
}
@@ -459,8 +463,10 @@ public class TestExecDriver extends Test
Operator<FilterDesc> op2 = OperatorFactory.get(getTestFilterDesc("0"),
op3);
- Operator<ExtractDesc> op5 = OperatorFactory.get(new ExtractDesc(
- getStringColumn(Utilities.ReduceField.VALUE.toString())), op2);
+ List<ExprNodeDesc> cols = new ArrayList<ExprNodeDesc>();
+ cols.add(getStringColumn(Utilities.ReduceField.KEY + ".reducesinkkey" +
0));
+
cols.add(getStringColumn(Utilities.ReduceField.VALUE.toString()+"."+outputColumns.get(1)));
+ Operator<SelectDesc> op5 = OperatorFactory.get(new SelectDesc(cols,
outputColumns), op2);
rWork.setReducer(op5);
}
Modified: hive/trunk/ql/src/test/results/clientpositive/bucket1.q.out
URL:
http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/bucket1.q.out?rev=1656393&r1=1656392&r2=1656393&view=diff
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/bucket1.q.out (original)
+++ hive/trunk/ql/src/test/results/clientpositive/bucket1.q.out Mon Feb 2
06:41:02 2015
@@ -110,39 +110,37 @@ STAGE PLANS:
/src [src]
Needs Tagging: false
Reduce Operator Tree:
- Extract
+ Select Operator
+ expressions: UDFToInteger(VALUE._col0) (type: int), VALUE._col1
(type: string)
+ outputColumnNames: _col0, _col1
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE
Column stats: NONE
- Select Operator
- expressions: UDFToInteger(_col0) (type: int), _col1 (type: string)
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE
Column stats: NONE
- File Output Operator
- compressed: false
- GlobalTableId: 1
+ File Output Operator
+ compressed: false
+ GlobalTableId: 1
#### A masked pattern was here ####
- NumFilesPerFileSink: 1
- Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE
Column stats: NONE
+ NumFilesPerFileSink: 1
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE
Column stats: NONE
#### A masked pattern was here ####
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format:
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- properties:
- bucket_count 100
- bucket_field_name key
- columns key,value
- columns.comments
- columns.types int:string
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format:
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ bucket_count 100
+ bucket_field_name key
+ columns key,value
+ columns.comments
+ columns.types int:string
#### A masked pattern was here ####
- name default.bucket1_1
- serialization.ddl struct bucket1_1 { i32 key, string value}
- serialization.format 1
- serialization.lib
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name default.bucket1_1
+ serialization.ddl struct bucket1_1 { i32 key, string value}
+ serialization.format 1
+ serialization.lib
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
#### A masked pattern was here ####
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.bucket1_1
- TotalFiles: 1
- GatherStats: true
- MultiFileSpray: false
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.bucket1_1
+ TotalFiles: 1
+ GatherStats: true
+ MultiFileSpray: false
Stage: Stage-0
Move Operator
Modified: hive/trunk/ql/src/test/results/clientpositive/bucket2.q.out
URL:
http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/bucket2.q.out?rev=1656393&r1=1656392&r2=1656393&view=diff
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/bucket2.q.out (original)
+++ hive/trunk/ql/src/test/results/clientpositive/bucket2.q.out Mon Feb 2
06:41:02 2015
@@ -110,39 +110,37 @@ STAGE PLANS:
/src [src]
Needs Tagging: false
Reduce Operator Tree:
- Extract
+ Select Operator
+ expressions: UDFToInteger(VALUE._col0) (type: int), VALUE._col1
(type: string)
+ outputColumnNames: _col0, _col1
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE
Column stats: NONE
- Select Operator
- expressions: UDFToInteger(_col0) (type: int), _col1 (type: string)
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE
Column stats: NONE
- File Output Operator
- compressed: false
- GlobalTableId: 1
+ File Output Operator
+ compressed: false
+ GlobalTableId: 1
#### A masked pattern was here ####
- NumFilesPerFileSink: 2
- Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE
Column stats: NONE
+ NumFilesPerFileSink: 2
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE
Column stats: NONE
#### A masked pattern was here ####
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format:
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- properties:
- bucket_count 2
- bucket_field_name key
- columns key,value
- columns.comments
- columns.types int:string
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format:
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ bucket_count 2
+ bucket_field_name key
+ columns key,value
+ columns.comments
+ columns.types int:string
#### A masked pattern was here ####
- name default.bucket2_1
- serialization.ddl struct bucket2_1 { i32 key, string value}
- serialization.format 1
- serialization.lib
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name default.bucket2_1
+ serialization.ddl struct bucket2_1 { i32 key, string value}
+ serialization.format 1
+ serialization.lib
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
#### A masked pattern was here ####
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.bucket2_1
- TotalFiles: 2
- GatherStats: true
- MultiFileSpray: true
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.bucket2_1
+ TotalFiles: 2
+ GatherStats: true
+ MultiFileSpray: true
Stage: Stage-0
Move Operator
Modified: hive/trunk/ql/src/test/results/clientpositive/bucket3.q.out
URL:
http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/bucket3.q.out?rev=1656393&r1=1656392&r2=1656393&view=diff
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/bucket3.q.out (original)
+++ hive/trunk/ql/src/test/results/clientpositive/bucket3.q.out Mon Feb 2
06:41:02 2015
@@ -114,42 +114,40 @@ STAGE PLANS:
/src [src]
Needs Tagging: false
Reduce Operator Tree:
- Extract
+ Select Operator
+ expressions: UDFToInteger(VALUE._col0) (type: int), VALUE._col1
(type: string)
+ outputColumnNames: _col0, _col1
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE
Column stats: NONE
- Select Operator
- expressions: UDFToInteger(_col0) (type: int), _col1 (type: string)
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE
Column stats: NONE
- File Output Operator
- compressed: false
- GlobalTableId: 1
+ File Output Operator
+ compressed: false
+ GlobalTableId: 1
#### A masked pattern was here ####
- NumFilesPerFileSink: 2
- Static Partition Specification: ds=1/
- Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE
Column stats: NONE
+ NumFilesPerFileSink: 2
+ Static Partition Specification: ds=1/
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE
Column stats: NONE
#### A masked pattern was here ####
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format:
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- properties:
- bucket_count 2
- bucket_field_name key
- columns key,value
- columns.comments
- columns.types int:string
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format:
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ bucket_count 2
+ bucket_field_name key
+ columns key,value
+ columns.comments
+ columns.types int:string
#### A masked pattern was here ####
- name default.bucket3_1
- partition_columns ds
- partition_columns.types string
- serialization.ddl struct bucket3_1 { i32 key, string value}
- serialization.format 1
- serialization.lib
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name default.bucket3_1
+ partition_columns ds
+ partition_columns.types string
+ serialization.ddl struct bucket3_1 { i32 key, string value}
+ serialization.format 1
+ serialization.lib
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
#### A masked pattern was here ####
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.bucket3_1
- TotalFiles: 2
- GatherStats: true
- MultiFileSpray: true
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.bucket3_1
+ TotalFiles: 2
+ GatherStats: true
+ MultiFileSpray: true
Stage: Stage-0
Move Operator
Modified: hive/trunk/ql/src/test/results/clientpositive/bucket4.q.out
URL:
http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/bucket4.q.out?rev=1656393&r1=1656392&r2=1656393&view=diff
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/bucket4.q.out (original)
+++ hive/trunk/ql/src/test/results/clientpositive/bucket4.q.out Mon Feb 2
06:41:02 2015
@@ -107,40 +107,38 @@ STAGE PLANS:
/src [src]
Needs Tagging: false
Reduce Operator Tree:
- Extract
+ Select Operator
+ expressions: UDFToInteger(VALUE._col0) (type: int), VALUE._col1
(type: string)
+ outputColumnNames: _col0, _col1
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE
Column stats: NONE
- Select Operator
- expressions: UDFToInteger(_col0) (type: int), _col1 (type: string)
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE
Column stats: NONE
- File Output Operator
- compressed: false
- GlobalTableId: 1
+ File Output Operator
+ compressed: false
+ GlobalTableId: 1
#### A masked pattern was here ####
- NumFilesPerFileSink: 2
- Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE
Column stats: NONE
+ NumFilesPerFileSink: 2
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE
Column stats: NONE
#### A masked pattern was here ####
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format:
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- properties:
- SORTBUCKETCOLSPREFIX TRUE
- bucket_count 2
- bucket_field_name key
- columns key,value
- columns.comments
- columns.types int:string
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format:
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ SORTBUCKETCOLSPREFIX TRUE
+ bucket_count 2
+ bucket_field_name key
+ columns key,value
+ columns.comments
+ columns.types int:string
#### A masked pattern was here ####
- name default.bucket4_1
- serialization.ddl struct bucket4_1 { i32 key, string value}
- serialization.format 1
- serialization.lib
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name default.bucket4_1
+ serialization.ddl struct bucket4_1 { i32 key, string value}
+ serialization.format 1
+ serialization.lib
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
#### A masked pattern was here ####
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.bucket4_1
- TotalFiles: 2
- GatherStats: true
- MultiFileSpray: true
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.bucket4_1
+ TotalFiles: 2
+ GatherStats: true
+ MultiFileSpray: true
Stage: Stage-0
Move Operator
Modified: hive/trunk/ql/src/test/results/clientpositive/bucket5.q.out
URL:
http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/bucket5.q.out?rev=1656393&r1=1656392&r2=1656393&view=diff
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/bucket5.q.out (original)
+++ hive/trunk/ql/src/test/results/clientpositive/bucket5.q.out Mon Feb 2
06:41:02 2015
@@ -171,40 +171,38 @@ STAGE PLANS:
/src [src]
Needs Tagging: false
Reduce Operator Tree:
- Extract
+ Select Operator
+ expressions: UDFToInteger(VALUE._col0) (type: int), VALUE._col1
(type: string)
+ outputColumnNames: _col0, _col1
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE
Column stats: NONE
- Select Operator
- expressions: UDFToInteger(_col0) (type: int), _col1 (type: string)
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE
Column stats: NONE
- File Output Operator
- compressed: false
- GlobalTableId: 1
+ File Output Operator
+ compressed: false
+ GlobalTableId: 1
#### A masked pattern was here ####
- NumFilesPerFileSink: 1
- Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE
Column stats: NONE
+ NumFilesPerFileSink: 1
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE
Column stats: NONE
#### A masked pattern was here ####
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format:
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- properties:
- SORTBUCKETCOLSPREFIX TRUE
- bucket_count 2
- bucket_field_name key
- columns key,value
- columns.comments
- columns.types int:string
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format:
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ SORTBUCKETCOLSPREFIX TRUE
+ bucket_count 2
+ bucket_field_name key
+ columns key,value
+ columns.comments
+ columns.types int:string
#### A masked pattern was here ####
- name default.bucketed_table
- serialization.ddl struct bucketed_table { i32 key, string
value}
- serialization.format 1
- serialization.lib
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name default.bucketed_table
+ serialization.ddl struct bucketed_table { i32 key, string
value}
+ serialization.format 1
+ serialization.lib
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
#### A masked pattern was here ####
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.bucketed_table
- TotalFiles: 1
- GatherStats: true
- MultiFileSpray: false
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.bucketed_table
+ TotalFiles: 1
+ GatherStats: true
+ MultiFileSpray: false
Stage: Stage-0
Move Operator
Modified: hive/trunk/ql/src/test/results/clientpositive/bucket6.q.out
URL:
http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/bucket6.q.out?rev=1656393&r1=1656392&r2=1656393&view=diff
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/bucket6.q.out (original)
+++ hive/trunk/ql/src/test/results/clientpositive/bucket6.q.out Mon Feb 2
06:41:02 2015
@@ -33,9 +33,11 @@ STAGE PLANS:
sort order: +
Map-reduce partition columns: _col0 (type: string)
Statistics: Num rows: 2000 Data size: 21248 Basic stats:
COMPLETE Column stats: NONE
- value expressions: _col0 (type: string), _col1 (type: string)
+ value expressions: _col1 (type: string)
Reduce Operator Tree:
- Extract
+ Select Operator
+ expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type:
string)
+ outputColumnNames: _col0, _col1
Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE
Column stats: NONE
File Output Operator
compressed: false
Modified:
hive/trunk/ql/src/test/results/clientpositive/bucketsortoptimize_insert_1.q.out
URL:
http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/bucketsortoptimize_insert_1.q.out?rev=1656393&r1=1656392&r2=1656393&view=diff
==============================================================================
---
hive/trunk/ql/src/test/results/clientpositive/bucketsortoptimize_insert_1.q.out
(original)
+++
hive/trunk/ql/src/test/results/clientpositive/bucketsortoptimize_insert_1.q.out
Mon Feb 2 06:41:02 2015
@@ -343,9 +343,11 @@ STAGE PLANS:
sort order: +
Map-reduce partition columns: _col0 (type: int)
Statistics: Num rows: 500 Data size: 5312 Basic stats:
COMPLETE Column stats: NONE
- value expressions: _col0 (type: int), _col1 (type: string)
+ value expressions: _col1 (type: string)
Reduce Operator Tree:
- Extract
+ Select Operator
+ expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type:
string)
+ outputColumnNames: _col0, _col1
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE
Column stats: NONE
File Output Operator
compressed: false