Repository: hive Updated Branches: refs/heads/master 507442319 -> ff9822eb3
HIVE-12004 : SDPO doesnt set colExprMap correctly on new RS (Ashutosh Chauhan via Prasanth J) Signed-off-by: Ashutosh Chauhan <[email protected]> Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/ff9822eb Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/ff9822eb Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/ff9822eb Branch: refs/heads/master Commit: ff9822eb33610d483dd075af82b9b4f97b314e30 Parents: 5074423 Author: Ashutosh Chauhan <[email protected]> Authored: Tue Sep 29 08:14:40 2015 -0700 Committer: Ashutosh Chauhan <[email protected]> Committed: Fri Oct 2 08:43:35 2015 -0700 ---------------------------------------------------------------------- .../optimizer/SortedDynPartitionOptimizer.java | 7 +++-- .../correlation/CorrelationUtilities.java | 33 -------------------- .../dynpart_sort_opt_vectorization.q | 2 -- .../clientpositive/dynpart_sort_optimization.q | 2 -- .../clientpositive/dynpart_sort_optimization2.q | 2 -- 5 files changed, 5 insertions(+), 41 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/ff9822eb/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SortedDynPartitionOptimizer.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SortedDynPartitionOptimizer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SortedDynPartitionOptimizer.java index 7bcb797..d58c24d 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SortedDynPartitionOptimizer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SortedDynPartitionOptimizer.java @@ -205,10 +205,8 @@ public class SortedDynPartitionOptimizer implements Transform { RowSchema outRS = new RowSchema(fsParent.getSchema()); ArrayList<ColumnInfo> valColInfo = Lists.newArrayList(fsParent.getSchema().getSignature()); ArrayList<ExprNodeDesc> newValueCols = Lists.newArrayList(); - Map<String, ExprNodeDesc> colExprMap = Maps.newHashMap(); for (ColumnInfo ci : valColInfo) { newValueCols.add(new ExprNodeColumnDesc(ci)); - colExprMap.put(ci.getInternalName(), newValueCols.get(newValueCols.size() - 1)); } ReduceSinkDesc rsConf = getReduceSinkDesc(partitionPositions, sortPositions, sortOrder, newValueCols, bucketColumns, numBuckets, fsParent, fsOp.getConf().getWriteType()); @@ -223,6 +221,11 @@ public class SortedDynPartitionOptimizer implements Transform { // Create ReduceSink operator ReduceSinkOperator rsOp = (ReduceSinkOperator) OperatorFactory.getAndMakeChild( rsConf, new RowSchema(outRS.getSignature()), fsParent); + List<String> valueColNames = rsConf.getOutputValueColumnNames(); + Map<String, ExprNodeDesc> colExprMap = Maps.newHashMap(); + for (int i = 0 ; i < valueColNames.size(); i++) { + colExprMap.put(Utilities.ReduceField.VALUE + "." + valueColNames.get(i), newValueCols.get(i)); + } rsOp.setColumnExprMap(colExprMap); List<ExprNodeDesc> valCols = rsConf.getValueCols(); http://git-wip-us.apache.org/repos/asf/hive/blob/ff9822eb/ql/src/java/org/apache/hadoop/hive/ql/optimizer/correlation/CorrelationUtilities.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/correlation/CorrelationUtilities.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/correlation/CorrelationUtilities.java index 7bb49be..388399c 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/correlation/CorrelationUtilities.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/correlation/CorrelationUtilities.java @@ -354,45 +354,12 @@ public final class CorrelationUtilities { ch.replaceParent(childRS, sel); } - removeChildSelIfApplicable(getSingleChild(childRS), sel, context, procCtx); childRS.setChildOperators(null); childRS.setParentOperators(null); procCtx.addRemovedOperator(childRS); return sel; } - //TODO: ideally this method should be removed in future, as in we need not to rely on removing - // this select operator which likely is introduced by SortedDynPartitionOptimizer. - // NonblockingdedupOptimizer should be able to merge this select Operator with its - // parent. But, that is not working at the moment. See: dynpart_sort_optimization2.q - - private static void removeChildSelIfApplicable(Operator<?> child, SelectOperator sel, - ParseContext context, AbstractCorrelationProcCtx procCtx) throws SemanticException { - - if (!(child instanceof SelectOperator)) { - return; - } - if (child.getColumnExprMap() != null) { - return; - } - - SelectOperator selOp = (SelectOperator) child; - - for (ExprNodeDesc desc : selOp.getConf().getColList()) { - if (!(desc instanceof ExprNodeColumnDesc)) { - return; - } - ExprNodeColumnDesc col = (ExprNodeColumnDesc) desc; - if(!col.getColumn().startsWith(ReduceField.VALUE.toString()+".") || - col.getTabAlias() != null || col.getIsPartitionColOrVirtualCol()){ - return; - } - } - - removeOperator(child, getSingleChild(child), sel, context); - procCtx.addRemovedOperator(child); - } - protected static void removeReduceSinkForGroupBy(ReduceSinkOperator cRS, GroupByOperator cGBYr, ParseContext context, AbstractCorrelationProcCtx procCtx) throws SemanticException { http://git-wip-us.apache.org/repos/asf/hive/blob/ff9822eb/ql/src/test/queries/clientpositive/dynpart_sort_opt_vectorization.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/dynpart_sort_opt_vectorization.q b/ql/src/test/queries/clientpositive/dynpart_sort_opt_vectorization.q index 7e94f23..3d0cdcd 100644 --- a/ql/src/test/queries/clientpositive/dynpart_sort_opt_vectorization.q +++ b/ql/src/test/queries/clientpositive/dynpart_sort_opt_vectorization.q @@ -7,8 +7,6 @@ set hive.exec.dynamic.partition.mode=nonstrict; set hive.vectorized.execution.enabled=true; set hive.enforce.bucketing=false; set hive.enforce.sorting=false; -set hive.exec.submitviachild=true; -set hive.exec.submit.local.task.via.child=true; create table over1k( t tinyint, http://git-wip-us.apache.org/repos/asf/hive/blob/ff9822eb/ql/src/test/queries/clientpositive/dynpart_sort_optimization.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/dynpart_sort_optimization.q b/ql/src/test/queries/clientpositive/dynpart_sort_optimization.q index ea670e9..a1a87d8 100644 --- a/ql/src/test/queries/clientpositive/dynpart_sort_optimization.q +++ b/ql/src/test/queries/clientpositive/dynpart_sort_optimization.q @@ -6,8 +6,6 @@ set hive.exec.max.dynamic.partitions.pernode=1000; set hive.exec.dynamic.partition.mode=nonstrict; set hive.enforce.bucketing=false; set hive.enforce.sorting=false; -set hive.exec.submitviachild=true; -set hive.exec.submit.local.task.via.child=true; create table over1k( t tinyint, http://git-wip-us.apache.org/repos/asf/hive/blob/ff9822eb/ql/src/test/queries/clientpositive/dynpart_sort_optimization2.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/dynpart_sort_optimization2.q b/ql/src/test/queries/clientpositive/dynpart_sort_optimization2.q index 5a504ec..c18f1cc 100644 --- a/ql/src/test/queries/clientpositive/dynpart_sort_optimization2.q +++ b/ql/src/test/queries/clientpositive/dynpart_sort_optimization2.q @@ -6,8 +6,6 @@ set hive.exec.max.dynamic.partitions.pernode=1000; set hive.exec.dynamic.partition.mode=nonstrict; set hive.enforce.bucketing=false; set hive.enforce.sorting=false; -set hive.exec.submitviachild=true; -set hive.exec.submit.local.task.via.child=true; -- SORT_QUERY_RESULTS
