[hive] branch master updated: HIVE-22538: RS deduplication does not always enforce hive.optimize.reducededuplication.min.reducer (Krisztian Kasa, reviewed by Jesus Camacho Rodriguez)

jcamacho Tue, 28 Jan 2020 12:59:56 -0800

This is an automated email from the ASF dual-hosted git repository.

jcamacho pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git



The following commit(s) were added to refs/heads/master by this push:
     new 964f08a  HIVE-22538: RS deduplication does not always enforce 
hive.optimize.reducededuplication.min.reducer (Krisztian Kasa, reviewed by 
Jesus Camacho Rodriguez)
964f08a is described below

commit 964f08ae733b037c6e58dfb4ed149ccad2d3ddc0
Author: Krisztian Kasa <[email protected]>
AuthorDate: Tue Jan 28 12:59:01 2020 -0800

    HIVE-22538: RS deduplication does not always enforce 
hive.optimize.reducededuplication.min.reducer (Krisztian Kasa, reviewed by 
Jesus Camacho Rodriguez)
    
    Close apache/hive#877
    Close apache/hive#855
---
 .../org/apache/hadoop/hive/ql/TestAcidOnTez.java   |   3 +-
 .../hadoop/hive/ql/io/orc/OrcRecordUpdater.java    |   6 +-
 .../apache/hadoop/hive/ql/optimizer/Optimizer.java |   2 +-
 .../correlation/AbstractCorrelationProcCtx.java    |  21 +--
 .../apache/hadoop/hive/ql/parse/ParseContext.java  |  17 +--
 .../hadoop/hive/ql/parse/SemanticAnalyzer.java     |  14 +-
 .../apache/hadoop/hive/ql/parse/TaskCompiler.java  |   8 +-
 .../apache/hadoop/hive/ql/parse/TezCompiler.java   |   3 +-
 .../org/apache/hadoop/hive/ql/plan/PlanUtils.java  |  20 ++-
 ql/src/test/queries/clientpositive/clusterctas.q   |  12 ++
 .../test/results/clientpositive/clusterctas.q.out  | 142 ++++++++++++++++++++
 .../clientpositive/llap/check_constraint.q.out     |  47 ++++---
 .../results/clientpositive/llap/clusterctas.q.out  | 145 +++++++++++++++++++++
 .../llap/enforce_constraint_notnull.q.out          |  51 +++++---
 .../llap/materialized_view_create_rewrite_4.q.out  | 137 +++++++++++--------
 .../llap/materialized_view_rewrite_window.q.out    |  82 +++++++-----
 .../clientpositive/llap/semijoin_reddedup.q.out    | 129 ++++++++++--------
 17 files changed, 604 insertions(+), 235 deletions(-)

diff --git 
a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/TestAcidOnTez.java 
b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/TestAcidOnTez.java
index 2868427..056cd27 100644
--- 
a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/TestAcidOnTez.java
+++ 
b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/TestAcidOnTez.java
@@ -699,7 +699,8 @@ ekoifman:apache-hive-3.0.0-SNAPSHOT-bin ekoifman$ tree  
~/dev/hiverwgit/itests/h
     setupTez(confForTez);
     int[][] values = {{1,2},{2,4},{5,6},{6,8},{9,10}};
     runStatementOnDriver("delete from " + Table.ACIDTBL, confForTez);
-    runStatementOnDriver("insert into " + Table.ACIDTBL + 
TestTxnCommands2.makeValuesClause(values));//make sure both buckets are not 
empty
+    //make sure both buckets are not empty
+    runStatementOnDriver("insert into " + Table.ACIDTBL + 
TestTxnCommands2.makeValuesClause(values), confForTez);
     runStatementOnDriver("drop table if exists T", confForTez);
     /*
     With bucketed target table Union All is not removed
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcRecordUpdater.java 
b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcRecordUpdater.java
index 3fa61d3..398698e 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcRecordUpdater.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcRecordUpdater.java
@@ -576,11 +576,7 @@ public class OrcRecordUpdater implements RecordUpdater {
           if (options.isWritingBase()) {
             // With insert overwrite we need the empty file to delete the 
previous content of the table
             LOG.debug("Empty file has been created for overwrite: {}", path);
-
-            OrcFile.WriterOptions wo = 
OrcFile.writerOptions(this.options.getConfiguration())
-                .inspector(rowInspector)
-                .callback(new OrcRecordUpdater.KeyIndexBuilder("testEmpty"));
-            OrcFile.createWriter(path, wo).close();
+            OrcFile.createWriter(path, writerOptions).close();
           } else {
             LOG.debug("No insert events in path: {}.. Deleting..", path);
             fs.delete(path, false);
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java
index 25e9cd0..da277d0 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java
@@ -191,7 +191,7 @@ public class Optimizer {
       transformations.add(new FixedBucketPruningOptimizer(compatMode));
     }
 
-    if(HiveConf.getBoolVar(hiveConf, 
HiveConf.ConfVars.HIVEOPTREDUCEDEDUPLICATION) || pctx.hasAcidWrite()) {
+    if(HiveConf.getBoolVar(hiveConf, 
HiveConf.ConfVars.HIVEOPTREDUCEDEDUPLICATION)) {
       transformations.add(new ReduceSinkDeDuplication());
     }
     transformations.add(new NonBlockingOpDeDupProc());
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/correlation/AbstractCorrelationProcCtx.java
 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/correlation/AbstractCorrelationProcCtx.java
index 4e72c4c..4208abe 100644
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/correlation/AbstractCorrelationProcCtx.java
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/correlation/AbstractCorrelationProcCtx.java
@@ -28,12 +28,8 @@ import java.util.Set;
 import org.apache.hadoop.hive.ql.exec.Operator;
 import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx;
 import org.apache.hadoop.hive.ql.parse.ParseContext;
-import org.apache.hadoop.hive.ql.plan.FileSinkDesc;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
 
 abstract class AbstractCorrelationProcCtx implements NodeProcessorCtx {
-  private static final Logger LOG = 
LoggerFactory.getLogger(AbstractCorrelationProcCtx.class);
   private ParseContext pctx;
   // For queries using script, the optimization cannot be applied without 
user's confirmation
   // If script preserves alias and value for columns related to keys, user can 
set this true
@@ -49,22 +45,7 @@ abstract class AbstractCorrelationProcCtx implements 
NodeProcessorCtx {
   public AbstractCorrelationProcCtx(ParseContext pctx) {
     removedOps = new HashSet<Operator<?>>();
     trustScript = pctx.getConf().getBoolVar(HIVESCRIPTOPERATORTRUST);
-    if(pctx.hasAcidWrite()) {
-      StringBuilder tblNames = new StringBuilder();
-      for(FileSinkDesc fsd : pctx.getAcidSinks()) {
-        if(fsd.getTable() != null) {
-          
tblNames.append(fsd.getTable().getDbName()).append('.').append(fsd.getTable().getTableName()).append(',');
-        }
-      }
-      if(tblNames.length() > 0) {
-        tblNames.setLength(tblNames.length() - 1);//traling ','
-      }
-      LOG.info("Overriding " + HIVEOPTREDUCEDEDUPLICATIONMINREDUCER + " to 1 
due to a write to transactional table(s) " + tblNames);
-      minReducer = 1;
-    }
-    else {
-      minReducer = 
pctx.getConf().getIntVar(HIVEOPTREDUCEDEDUPLICATIONMINREDUCER);
-    }
+    minReducer = 
pctx.getConf().getIntVar(HIVEOPTREDUCEDEDUPLICATIONMINREDUCER);
     isMapAggr = pctx.getConf().getBoolVar(HIVEMAPSIDEAGGREGATE);
     this.pctx = pctx;
   }
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseContext.java 
b/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseContext.java
index 91bdbfd..bef0217 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseContext.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseContext.java
@@ -46,17 +46,14 @@ import 
org.apache.hadoop.hive.ql.optimizer.ppr.PartitionPruner;
 import org.apache.hadoop.hive.ql.optimizer.unionproc.UnionProcContext;
 import 
org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer.AnalyzeRewriteContext;
 import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
-import org.apache.hadoop.hive.ql.plan.FileSinkDesc;
 import org.apache.hadoop.hive.ql.plan.FilterDesc.SampleDesc;
 import org.apache.hadoop.hive.ql.plan.LoadFileDesc;
 import org.apache.hadoop.hive.ql.plan.LoadTableDesc;
 import org.apache.hadoop.hive.ql.plan.MapJoinDesc;
 import org.apache.hadoop.hive.ql.plan.TableDesc;
 
-import java.io.Serializable;
 import java.util.ArrayList;
 import java.util.Collection;
-import java.util.Collections;
 import java.util.HashMap;
 import java.util.HashSet;
 import java.util.LinkedHashMap;
@@ -128,7 +125,6 @@ public class ParseContext {
   private Map<SelectOperator, Table> viewProjectToViewSchema;
   private ColumnAccessInfo columnAccessInfo;
   private boolean needViewColumnAuthorization;
-  private Set<FileSinkDesc> acidFileSinks = Collections.emptySet();
 
   private Map<ReduceSinkOperator, RuntimeValuesInfo> rsToRuntimeValuesInfo =
           new LinkedHashMap<ReduceSinkOperator, RuntimeValuesInfo>();
@@ -199,7 +195,7 @@ public class ParseContext {
       AnalyzeRewriteContext analyzeRewrite, CreateTableDesc createTableDesc,
       CreateViewDesc createViewDesc, MaterializedViewUpdateDesc 
materializedViewUpdateDesc,
       QueryProperties queryProperties,
-      Map<SelectOperator, Table> viewProjectToTableSchema, Set<FileSinkDesc> 
acidFileSinks) {
+      Map<SelectOperator, Table> viewProjectToTableSchema) {
     this.queryState = queryState;
     this.conf = queryState.getConf();
     this.opToPartPruner = opToPartPruner;
@@ -239,17 +235,8 @@ public class ParseContext {
       // authorization info.
       this.columnAccessInfo = new ColumnAccessInfo();
     }
-    if(acidFileSinks != null && !acidFileSinks.isEmpty()) {
-      this.acidFileSinks = new HashSet<>();
-      this.acidFileSinks.addAll(acidFileSinks);
-    }
-  }
-  public Set<FileSinkDesc> getAcidSinks() {
-    return acidFileSinks;
-  }
-  public boolean hasAcidWrite() {
-    return !acidFileSinks.isEmpty();
   }
+
   /**
    * @return the context
    */
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java 
b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
index 4afd454..5fcc367 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
@@ -559,7 +559,7 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
         opToSamplePruner, globalLimitCtx, nameToSplitSample, inputs, rootTasks,
         opToPartToSkewedPruner, viewAliasToInput, 
reduceSinkOperatorsAddedByEnforceBucketingSorting,
         analyzeRewrite, tableDesc, createVwDesc, materializedViewUpdateDesc,
-        queryProperties, viewProjectToTableSchema, acidFileSinks);
+        queryProperties, viewProjectToTableSchema);
   }
 
   public CompilationOpContext getOpContext() {
@@ -6853,10 +6853,15 @@ public class SemanticAnalyzer extends 
BaseSemanticAnalyzer {
     }
 
     if (enforceBucketing) {
+      Operation acidOp = AcidUtils.isFullAcidTable(dest_tab) ? 
getAcidType(table_desc.getOutputFileFormatClass(),
+              dest, AcidUtils.isInsertOnlyTable(dest_tab)) : 
Operation.NOT_ACID;
       int maxReducers = conf.getIntVar(HiveConf.ConfVars.MAXREDUCERS);
       if (conf.getIntVar(HiveConf.ConfVars.HADOOPNUMREDUCERS) > 0) {
         maxReducers = conf.getIntVar(HiveConf.ConfVars.HADOOPNUMREDUCERS);
       }
+      if (acidOp == Operation.UPDATE || acidOp == Operation.DELETE) {
+        maxReducers = 1;
+      }
       int numBuckets = dest_tab.getNumBuckets();
       if (numBuckets > maxReducers) {
         LOG.debug("numBuckets is {} and maxReducers is {}", numBuckets, 
maxReducers);
@@ -6871,7 +6876,7 @@ public class SemanticAnalyzer extends 
BaseSemanticAnalyzer {
           numFiles = totalFiles / maxReducers;
         }
       }
-      else {
+      else if (acidOp == Operation.NOT_ACID || acidOp == Operation.INSERT) {
         maxReducers = numBuckets;
       }
 
@@ -6883,8 +6888,7 @@ public class SemanticAnalyzer extends 
BaseSemanticAnalyzer {
       }
       input = genReduceSinkPlan(input, partnCols, sortCols, order.toString(), 
nullOrder.toString(),
           maxReducers,
-          (AcidUtils.isFullAcidTable(dest_tab) ? 
getAcidType(table_desc.getOutputFileFormatClass(),
-              dest, AcidUtils.isInsertOnlyTable(dest_tab)) : 
AcidUtils.Operation.NOT_ACID));
+              acidOp);
       
reduceSinkOperatorsAddedByEnforceBucketingSorting.add((ReduceSinkOperator)input.getParentOperators().get(0));
       ctx.setMultiFileSpray(multiFileSpray);
       ctx.setNumFiles(numFiles);
@@ -12540,7 +12544,7 @@ public class SemanticAnalyzer extends 
BaseSemanticAnalyzer {
         globalLimitCtx, nameToSplitSample, inputs, rootTasks, 
opToPartToSkewedPruner,
         viewAliasToInput, reduceSinkOperatorsAddedByEnforceBucketingSorting,
         analyzeRewrite, tableDesc, createVwDesc, materializedViewUpdateDesc,
-        queryProperties, viewProjectToTableSchema, acidFileSinks);
+        queryProperties, viewProjectToTableSchema);
 
     // Set the semijoin hints in parse context
     
pCtx.setSemiJoinHints(parseSemiJoinHint(getQB().getParseInfo().getHintList()));
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/TaskCompiler.java 
b/ql/src/java/org/apache/hadoop/hive/ql/parse/TaskCompiler.java
index 1e1d65b..2f3fc6c 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/TaskCompiler.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/TaskCompiler.java
@@ -702,15 +702,12 @@ public abstract class TaskCompiler {
         !HiveConf.getBoolVar(hConf, HiveConf.ConfVars.HIVEOPTLISTBUCKETING)) {
       new SortedDynPartitionOptimizer().transform(parseContext);
 
-      if(HiveConf.getBoolVar(hConf, 
HiveConf.ConfVars.HIVEOPTREDUCEDEDUPLICATION)
-          || parseContext.hasAcidWrite()) {
-
+      if(HiveConf.getBoolVar(hConf, 
HiveConf.ConfVars.HIVEOPTREDUCEDEDUPLICATION)) {
         // Dynamic sort partition adds an extra RS therefore need to de-dup
         new ReduceSinkDeDuplication().transform(parseContext);
         // there is an issue with dedup logic wherein SELECT is created with 
wrong columns
         // NonBlockingOpDeDupProc fixes that
         new NonBlockingOpDeDupProc().transform(parseContext);
-
       }
     }
   }
@@ -732,8 +729,7 @@ public abstract class TaskCompiler {
         pCtx.getReduceSinkOperatorsAddedByEnforceBucketingSorting(),
         pCtx.getAnalyzeRewrite(), pCtx.getCreateTable(),
         pCtx.getCreateViewDesc(), pCtx.getMaterializedViewUpdateDesc(),
-        pCtx.getQueryProperties(), pCtx.getViewProjectToTableSchema(),
-        pCtx.getAcidSinks());
+        pCtx.getQueryProperties(), pCtx.getViewProjectToTableSchema());
     clone.setFetchTask(pCtx.getFetchTask());
     clone.setLineageInfo(pCtx.getLineageInfo());
     clone.setMapJoinOps(pCtx.getMapJoinOps());
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java 
b/ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java
index ff81543..5a78ed5 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java
@@ -197,8 +197,7 @@ public class TezCompiler extends TaskCompiler {
       perfLogger.PerfLogEnd(this.getClass().getName(), 
PerfLogger.TEZ_COMPILER, "Sorted dynamic partition optimization");
     }
 
-    if(HiveConf.getBoolVar(procCtx.conf, 
HiveConf.ConfVars.HIVEOPTREDUCEDEDUPLICATION)
-        || procCtx.parseContext.hasAcidWrite()) {
+    if(HiveConf.getBoolVar(procCtx.conf, 
HiveConf.ConfVars.HIVEOPTREDUCEDEDUPLICATION)) {
       perfLogger.PerfLogBegin(this.getClass().getName(), 
PerfLogger.TEZ_COMPILER);
       // Dynamic sort partition adds an extra RS therefore need to de-dup
       new ReduceSinkDeDuplication().transform(procCtx.parseContext);
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/PlanUtils.java 
b/ql/src/java/org/apache/hadoop/hive/ql/plan/PlanUtils.java
index fb0a422..980f39b 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/plan/PlanUtils.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/PlanUtils.java
@@ -25,6 +25,7 @@ import java.util.ArrayList;
 import java.util.Collection;
 import java.util.Collections;
 import java.util.Comparator;
+import java.util.EnumSet;
 import java.util.HashMap;
 import java.util.LinkedHashMap;
 import java.util.List;
@@ -710,13 +711,18 @@ public final class PlanUtils {
       List<String> outputColumnNames, boolean includeKeyCols, int tag,
       List<ExprNodeDesc> partitionCols, String order, String nullOrder, 
NullOrdering defaultNullOrder,
       int numReducers, AcidUtils.Operation writeType) {
-    return getReduceSinkDesc(keyCols, keyCols.size(), valueCols,
-        new ArrayList<List<Integer>>(),
-        includeKeyCols ? outputColumnNames.subList(0, keyCols.size()) :
-          new ArrayList<String>(),
-        includeKeyCols ? outputColumnNames.subList(keyCols.size(),
-            outputColumnNames.size()) : outputColumnNames,
-        includeKeyCols, tag, partitionCols, order, nullOrder, 
defaultNullOrder, numReducers, writeType);
+    ReduceSinkDesc reduceSinkDesc = getReduceSinkDesc(keyCols, keyCols.size(), 
valueCols,
+            new ArrayList<List<Integer>>(),
+            includeKeyCols ? outputColumnNames.subList(0, keyCols.size()) :
+                    new ArrayList<String>(),
+            includeKeyCols ? outputColumnNames.subList(keyCols.size(),
+                    outputColumnNames.size()) : outputColumnNames,
+            includeKeyCols, tag, partitionCols, order, nullOrder, 
defaultNullOrder, numReducers, writeType);
+    if (writeType == AcidUtils.Operation.UPDATE || writeType == 
AcidUtils.Operation.DELETE) {
+      
reduceSinkDesc.setReducerTraits(EnumSet.of(ReduceSinkDesc.ReducerTraits.FIXED));
+      reduceSinkDesc.setNumReducers(1);
+    }
+    return reduceSinkDesc;
   }
 
   /**
diff --git a/ql/src/test/queries/clientpositive/clusterctas.q 
b/ql/src/test/queries/clientpositive/clusterctas.q
new file mode 100644
index 0000000..d4e45e0
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/clusterctas.q
@@ -0,0 +1,12 @@
+--! qt:dataset:src
+
+set hive.cbo.enable=false;
+set hive.support.concurrency=true;
+set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager;
+
+EXPLAIN
+CREATE TABLE x STORED AS ORC TBLPROPERTIES('transactional'='true') AS
+SELECT * FROM SRC x CLUSTER BY x.key;
+CREATE TABLE x STORED AS ORC TBLPROPERTIES('transactional'='true') AS
+SELECT * FROM SRC x CLUSTER BY x.key;
+DROP TABLE x;
diff --git a/ql/src/test/results/clientpositive/clusterctas.q.out 
b/ql/src/test/results/clientpositive/clusterctas.q.out
new file mode 100644
index 0000000..9d76bc5
--- /dev/null
+++ b/ql/src/test/results/clientpositive/clusterctas.q.out
@@ -0,0 +1,142 @@
+PREHOOK: query: EXPLAIN
+CREATE TABLE x STORED AS ORC TBLPROPERTIES('transactional'='true') AS
+SELECT * FROM SRC x CLUSTER BY x.key
+PREHOOK: type: CREATETABLE_AS_SELECT
+PREHOOK: Input: default@src
+PREHOOK: Output: database:default
+PREHOOK: Output: default@x
+POSTHOOK: query: EXPLAIN
+CREATE TABLE x STORED AS ORC TBLPROPERTIES('transactional'='true') AS
+SELECT * FROM SRC x CLUSTER BY x.key
+POSTHOOK: type: CREATETABLE_AS_SELECT
+POSTHOOK: Input: default@src
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@x
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+  Stage-4 depends on stages: Stage-0, Stage-3
+  Stage-2 depends on stages: Stage-4
+  Stage-3 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: x
+            Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE 
Column stats: COMPLETE
+            Select Operator
+              expressions: key (type: string), value (type: string)
+              outputColumnNames: _col0, _col1
+              Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE 
Column stats: COMPLETE
+              Reduce Output Operator
+                key expressions: _col0 (type: string)
+                null sort order: a
+                sort order: +
+                Map-reduce partition columns: _col0 (type: string)
+                Statistics: Num rows: 500 Data size: 89000 Basic stats: 
COMPLETE Column stats: COMPLETE
+                value expressions: _col1 (type: string)
+      Execution mode: vectorized
+      Reduce Operator Tree:
+        Select Operator
+          expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: 
string)
+          outputColumnNames: _col0, _col1
+          Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE 
Column stats: COMPLETE
+          File Output Operator
+            compressed: false
+            Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE 
Column stats: COMPLETE
+            table:
+                input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+                output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
+                serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
+                name: default.x
+            Write Type: INSERT
+          Select Operator
+            expressions: _col0 (type: string), _col1 (type: string)
+            outputColumnNames: col1, col2
+            Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE 
Column stats: COMPLETE
+            Group By Operator
+              aggregations: compute_stats(col1, 'hll'), compute_stats(col2, 
'hll')
+              minReductionHashAggr: 0.99
+              mode: hash
+              outputColumnNames: _col0, _col1
+              Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE 
Column stats: COMPLETE
+              File Output Operator
+                compressed: false
+                table:
+                    input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                    output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                    serde: 
org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+  Stage: Stage-0
+    Move Operator
+      files:
+          hdfs directory: true
+#### A masked pattern was here ####
+          Write Type: INSERT
+
+  Stage: Stage-4
+    Create Table
+      columns: key string, value string
+      name: default.x
+      input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+      output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
+      serde name: org.apache.hadoop.hive.ql.io.orc.OrcSerde
+      table properties:
+        transactional true
+
+  Stage: Stage-2
+    Stats Work
+      Basic Stats Work:
+      Column Stats Desc:
+          Columns: key, value
+          Column Types: string, string
+          Table: default.x
+
+  Stage: Stage-3
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            Reduce Output Operator
+              null sort order: 
+              sort order: 
+              Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE 
Column stats: COMPLETE
+              value expressions: _col0 (type: 
struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>),
 _col1 (type: 
struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>)
+      Execution mode: vectorized
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1)
+          mode: mergepartial
+          outputColumnNames: _col0, _col1
+          Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column 
stats: COMPLETE
+          File Output Operator
+            compressed: false
+            Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE 
Column stats: COMPLETE
+            table:
+                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+PREHOOK: query: CREATE TABLE x STORED AS ORC 
TBLPROPERTIES('transactional'='true') AS
+SELECT * FROM SRC x CLUSTER BY x.key
+PREHOOK: type: CREATETABLE_AS_SELECT
+PREHOOK: Input: default@src
+PREHOOK: Output: database:default
+PREHOOK: Output: default@x
+POSTHOOK: query: CREATE TABLE x STORED AS ORC 
TBLPROPERTIES('transactional'='true') AS
+SELECT * FROM SRC x CLUSTER BY x.key
+POSTHOOK: type: CREATETABLE_AS_SELECT
+POSTHOOK: Input: default@src
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@x
+POSTHOOK: Lineage: x.key SIMPLE [(src)x.FieldSchema(name:key, type:string, 
comment:default), ]
+POSTHOOK: Lineage: x.value SIMPLE [(src)x.FieldSchema(name:value, type:string, 
comment:default), ]
+PREHOOK: query: DROP TABLE x
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@x
+PREHOOK: Output: default@x
+POSTHOOK: query: DROP TABLE x
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@x
+POSTHOOK: Output: default@x
diff --git a/ql/src/test/results/clientpositive/llap/check_constraint.q.out 
b/ql/src/test/results/clientpositive/llap/check_constraint.q.out
index 9f82a10..955a071 100644
--- a/ql/src/test/results/clientpositive/llap/check_constraint.q.out
+++ b/ql/src/test/results/clientpositive/llap/check_constraint.q.out
@@ -1751,6 +1751,7 @@ STAGE PLANS:
       Edges:
         Reducer 2 <- Map 1 (SIMPLE_EDGE)
         Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
+        Reducer 4 <- Reducer 3 (SIMPLE_EDGE)
 #### A masked pattern was here ####
       Vertices:
         Map 1 
@@ -1795,26 +1796,40 @@ STAGE PLANS:
                 outputColumnNames: _col0, _col1, _col2, _col3
                 Statistics: Num rows: 250 Data size: 73500 Basic stats: 
COMPLETE Column stats: COMPLETE
                 Select Operator
-                  expressions: _col2 (type: int), _col3 (type: decimal(5,2)), 
_col1 (type: string)
-                  outputColumnNames: _col0, _col1, _col2
-                  Statistics: Num rows: 250 Data size: 51750 Basic stats: 
COMPLETE Column stats: COMPLETE
-                  Limit
-                    Number of rows: 10
-                    Statistics: Num rows: 10 Data size: 2070 Basic stats: 
COMPLETE Column stats: COMPLETE
-                    Filter Operator
-                      predicate: enforce_constraint((_col1 is not null and 
(_col1 >= CAST( _col0 AS decimal(5,2))) is not false)) (type: boolean)
-                      Statistics: Num rows: 5 Data size: 1035 Basic stats: 
COMPLETE Column stats: COMPLETE
-                      Reduce Output Operator
-                        key expressions: _col0 (type: int)
-                        null sort order: a
-                        sort order: +
-                        Map-reduce partition columns: _col0 (type: int)
-                        Statistics: Num rows: 5 Data size: 1035 Basic stats: 
COMPLETE Column stats: COMPLETE
-                        value expressions: _col1 (type: decimal(5,2)), _col2 
(type: string)
+                  expressions: _col2 (type: int), _col3 (type: decimal(5,2)), 
_col1 (type: string), _col0 (type: string)
+                  outputColumnNames: _col0, _col1, _col2, _col3
+                  Statistics: Num rows: 250 Data size: 73500 Basic stats: 
COMPLETE Column stats: COMPLETE
+                  Reduce Output Operator
+                    key expressions: _col3 (type: string), _col2 (type: string)
+                    null sort order: zz
+                    sort order: ++
+                    Statistics: Num rows: 250 Data size: 73500 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    TopN Hash Memory Usage: 0.1
+                    value expressions: _col0 (type: int), _col1 (type: 
decimal(5,2))
         Reducer 3 
             Execution mode: vectorized, llap
             Reduce Operator Tree:
               Select Operator
+                expressions: VALUE._col0 (type: int), VALUE._col1 (type: 
decimal(5,2)), KEY.reducesinkkey1 (type: string)
+                outputColumnNames: _col0, _col1, _col2
+                Statistics: Num rows: 250 Data size: 73500 Basic stats: 
COMPLETE Column stats: COMPLETE
+                Limit
+                  Number of rows: 10
+                  Statistics: Num rows: 10 Data size: 2940 Basic stats: 
COMPLETE Column stats: COMPLETE
+                  Filter Operator
+                    predicate: enforce_constraint((_col1 is not null and 
(_col1 >= CAST( _col0 AS decimal(5,2))) is not false)) (type: boolean)
+                    Statistics: Num rows: 5 Data size: 1470 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    Reduce Output Operator
+                      key expressions: _col0 (type: int)
+                      null sort order: a
+                      sort order: +
+                      Map-reduce partition columns: _col0 (type: int)
+                      Statistics: Num rows: 5 Data size: 1470 Basic stats: 
COMPLETE Column stats: COMPLETE
+                      value expressions: _col1 (type: decimal(5,2)), _col2 
(type: string)
+        Reducer 4 
+            Execution mode: vectorized, llap
+            Reduce Operator Tree:
+              Select Operator
                 expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 
(type: decimal(5,2)), CAST( VALUE._col1 AS varchar(128)) (type: varchar(128))
                 outputColumnNames: _col0, _col1, _col2
                 Statistics: Num rows: 5 Data size: 1640 Basic stats: COMPLETE 
Column stats: COMPLETE
diff --git a/ql/src/test/results/clientpositive/llap/clusterctas.q.out 
b/ql/src/test/results/clientpositive/llap/clusterctas.q.out
new file mode 100644
index 0000000..40ceee2
--- /dev/null
+++ b/ql/src/test/results/clientpositive/llap/clusterctas.q.out
@@ -0,0 +1,145 @@
+PREHOOK: query: EXPLAIN
+CREATE TABLE x STORED AS ORC TBLPROPERTIES('transactional'='true') AS
+SELECT * FROM SRC x CLUSTER BY x.key
+PREHOOK: type: CREATETABLE_AS_SELECT
+PREHOOK: Input: default@src
+PREHOOK: Output: database:default
+PREHOOK: Output: default@x
+POSTHOOK: query: EXPLAIN
+CREATE TABLE x STORED AS ORC TBLPROPERTIES('transactional'='true') AS
+SELECT * FROM SRC x CLUSTER BY x.key
+POSTHOOK: type: CREATETABLE_AS_SELECT
+POSTHOOK: Input: default@src
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@x
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-2 depends on stages: Stage-1
+  Stage-4 depends on stages: Stage-0, Stage-2
+  Stage-3 depends on stages: Stage-4
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Edges:
+        Reducer 2 <- Map 1 (SIMPLE_EDGE)
+        Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: x
+                  Statistics: Num rows: 500 Data size: 89000 Basic stats: 
COMPLETE Column stats: COMPLETE
+                  Select Operator
+                    expressions: key (type: string), value (type: string)
+                    outputColumnNames: _col0, _col1
+                    Statistics: Num rows: 500 Data size: 89000 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    Reduce Output Operator
+                      key expressions: _col0 (type: string)
+                      null sort order: a
+                      sort order: +
+                      Map-reduce partition columns: _col0 (type: string)
+                      Statistics: Num rows: 500 Data size: 89000 Basic stats: 
COMPLETE Column stats: COMPLETE
+                      value expressions: _col1 (type: string)
+            Execution mode: vectorized, llap
+            LLAP IO: no inputs
+        Reducer 2 
+            Execution mode: llap
+            Reduce Operator Tree:
+              Select Operator
+                expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 
(type: string)
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 500 Data size: 89000 Basic stats: 
COMPLETE Column stats: COMPLETE
+                File Output Operator
+                  compressed: false
+                  Statistics: Num rows: 500 Data size: 89000 Basic stats: 
COMPLETE Column stats: COMPLETE
+                  table:
+                      input format: 
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+                      output format: 
org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
+                      serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
+                      name: default.x
+                  Write Type: INSERT
+                Select Operator
+                  expressions: _col0 (type: string), _col1 (type: string)
+                  outputColumnNames: col1, col2
+                  Statistics: Num rows: 500 Data size: 89000 Basic stats: 
COMPLETE Column stats: COMPLETE
+                  Group By Operator
+                    aggregations: compute_stats(col1, 'hll'), 
compute_stats(col2, 'hll')
+                    minReductionHashAggr: 0.99
+                    mode: hash
+                    outputColumnNames: _col0, _col1
+                    Statistics: Num rows: 1 Data size: 880 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    Reduce Output Operator
+                      null sort order: 
+                      sort order: 
+                      Statistics: Num rows: 1 Data size: 880 Basic stats: 
COMPLETE Column stats: COMPLETE
+                      value expressions: _col0 (type: 
struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>),
 _col1 (type: 
struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>)
+        Reducer 3 
+            Execution mode: llap
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: compute_stats(VALUE._col0), 
compute_stats(VALUE._col1)
+                mode: mergepartial
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE 
Column stats: COMPLETE
+                File Output Operator
+                  compressed: false
+                  Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE 
Column stats: COMPLETE
+                  table:
+                      input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-2
+    Dependency Collection
+
+  Stage: Stage-4
+    Create Table
+      columns: key string, value string
+      name: default.x
+      input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+      output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
+      serde name: org.apache.hadoop.hive.ql.io.orc.OrcSerde
+      table properties:
+        transactional true
+
+  Stage: Stage-3
+    Stats Work
+      Basic Stats Work:
+      Column Stats Desc:
+          Columns: key, value
+          Column Types: string, string
+          Table: default.x
+
+  Stage: Stage-0
+    Move Operator
+      files:
+          hdfs directory: true
+#### A masked pattern was here ####
+          Write Type: INSERT
+
+PREHOOK: query: CREATE TABLE x STORED AS ORC 
TBLPROPERTIES('transactional'='true') AS
+SELECT * FROM SRC x CLUSTER BY x.key
+PREHOOK: type: CREATETABLE_AS_SELECT
+PREHOOK: Input: default@src
+PREHOOK: Output: database:default
+PREHOOK: Output: default@x
+POSTHOOK: query: CREATE TABLE x STORED AS ORC 
TBLPROPERTIES('transactional'='true') AS
+SELECT * FROM SRC x CLUSTER BY x.key
+POSTHOOK: type: CREATETABLE_AS_SELECT
+POSTHOOK: Input: default@src
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@x
+POSTHOOK: Lineage: x.key SIMPLE [(src)x.FieldSchema(name:key, type:string, 
comment:default), ]
+POSTHOOK: Lineage: x.value SIMPLE [(src)x.FieldSchema(name:value, type:string, 
comment:default), ]
+PREHOOK: query: DROP TABLE x
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@x
+PREHOOK: Output: default@x
+POSTHOOK: query: DROP TABLE x
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@x
+POSTHOOK: Output: default@x
diff --git 
a/ql/src/test/results/clientpositive/llap/enforce_constraint_notnull.q.out 
b/ql/src/test/results/clientpositive/llap/enforce_constraint_notnull.q.out
index 8ccec3a..3f99d0c 100644
--- a/ql/src/test/results/clientpositive/llap/enforce_constraint_notnull.q.out
+++ b/ql/src/test/results/clientpositive/llap/enforce_constraint_notnull.q.out
@@ -3075,7 +3075,8 @@ STAGE PLANS:
       Edges:
         Reducer 2 <- Map 1 (SIMPLE_EDGE)
         Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
-        Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE)
+        Reducer 4 <- Reducer 3 (SIMPLE_EDGE)
+        Reducer 5 <- Reducer 4 (CUSTOM_SIMPLE_EDGE)
 #### A masked pattern was here ####
       Vertices:
         Map 1 
@@ -3117,23 +3118,37 @@ STAGE PLANS:
                 outputColumnNames: _col0, _col1
                 Statistics: Num rows: 250 Data size: 44500 Basic stats: 
COMPLETE Column stats: COMPLETE
                 Select Operator
-                  expressions: UDFToInteger(_col0) (type: int), CAST( _col0 AS 
decimal(5,2)) (type: decimal(5,2)), _col1 (type: string)
-                  outputColumnNames: _col0, _col1, _col2
-                  Statistics: Num rows: 250 Data size: 51750 Basic stats: 
COMPLETE Column stats: COMPLETE
-                  Limit
-                    Number of rows: 2
-                    Statistics: Num rows: 2 Data size: 414 Basic stats: 
COMPLETE Column stats: COMPLETE
-                    Filter Operator
-                      predicate: enforce_constraint((_col1 is not null and 
_col2 is not null)) (type: boolean)
-                      Statistics: Num rows: 1 Data size: 207 Basic stats: 
COMPLETE Column stats: COMPLETE
-                      Reduce Output Operator
-                        key expressions: _col0 (type: int)
-                        null sort order: a
-                        sort order: +
-                        Map-reduce partition columns: _col0 (type: int)
-                        Statistics: Num rows: 1 Data size: 207 Basic stats: 
COMPLETE Column stats: COMPLETE
-                        value expressions: _col1 (type: decimal(5,2)), _col2 
(type: string)
+                  expressions: UDFToInteger(_col0) (type: int), CAST( _col0 AS 
decimal(5,2)) (type: decimal(5,2)), _col1 (type: string), _col0 (type: string)
+                  outputColumnNames: _col0, _col1, _col2, _col3
+                  Statistics: Num rows: 250 Data size: 73500 Basic stats: 
COMPLETE Column stats: COMPLETE
+                  Reduce Output Operator
+                    key expressions: _col3 (type: string)
+                    null sort order: z
+                    sort order: +
+                    Statistics: Num rows: 250 Data size: 73500 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    TopN Hash Memory Usage: 0.1
+                    value expressions: _col0 (type: int), _col1 (type: 
decimal(5,2)), _col2 (type: string)
         Reducer 3 
+            Execution mode: vectorized, llap
+            Reduce Operator Tree:
+              Select Operator
+                expressions: VALUE._col0 (type: int), VALUE._col1 (type: 
decimal(5,2)), VALUE._col2 (type: string)
+                outputColumnNames: _col0, _col1, _col2
+                Statistics: Num rows: 250 Data size: 73500 Basic stats: 
COMPLETE Column stats: COMPLETE
+                Limit
+                  Number of rows: 2
+                  Statistics: Num rows: 2 Data size: 588 Basic stats: COMPLETE 
Column stats: COMPLETE
+                  Filter Operator
+                    predicate: enforce_constraint((_col1 is not null and _col2 
is not null)) (type: boolean)
+                    Statistics: Num rows: 1 Data size: 294 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    Reduce Output Operator
+                      key expressions: _col0 (type: int)
+                      null sort order: a
+                      sort order: +
+                      Map-reduce partition columns: _col0 (type: int)
+                      Statistics: Num rows: 1 Data size: 294 Basic stats: 
COMPLETE Column stats: COMPLETE
+                      value expressions: _col1 (type: decimal(5,2)), _col2 
(type: string)
+        Reducer 4 
             Execution mode: llap
             Reduce Operator Tree:
               Select Operator
@@ -3164,7 +3179,7 @@ STAGE PLANS:
                       sort order: 
                       Statistics: Num rows: 1 Data size: 1496 Basic stats: 
COMPLETE Column stats: COMPLETE
                       value expressions: _col0 (type: 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>),
 _col1 (type: 
struct<columntype:string,min:decimal(5,2),max:decimal(5,2),countnulls:bigint,bitvector:binary>),
 _col2 (type: 
struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>)
-        Reducer 4 
+        Reducer 5 
             Execution mode: llap
             Reduce Operator Tree:
               Group By Operator
diff --git 
a/ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite_4.q.out
 
b/ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite_4.q.out
index 8196f11..25ce6d6 100644
--- 
a/ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite_4.q.out
+++ 
b/ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite_4.q.out
@@ -87,8 +87,9 @@ STAGE PLANS:
     Tez
 #### A masked pattern was here ####
       Edges:
-        Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE)
+        Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE)
         Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
+        Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE)
 #### A masked pattern was here ####
       Vertices:
         Map 1 
@@ -112,7 +113,7 @@ STAGE PLANS:
                         Statistics: Num rows: 5 Data size: 20 Basic stats: 
COMPLETE Column stats: COMPLETE
             Execution mode: llap
             LLAP IO: may be used (ACID table)
-        Map 4 
+        Map 5 
             Map Operator Tree:
                 TableScan
                   alias: cmv_basetable_2_n2
@@ -183,20 +184,30 @@ STAGE PLANS:
                   Statistics: Num rows: 2 Data size: 248 Basic stats: COMPLETE 
Column stats: COMPLETE
                   Group By Operator
                     aggregations: compute_stats(col1, 'hll'), 
compute_stats(col2, 'hll'), compute_stats(col3, 'hll')
-                    mode: complete
+                    minReductionHashAggr: 0.5
+                    mode: hash
                     outputColumnNames: _col0, _col1, _col2
                     Statistics: Num rows: 1 Data size: 1480 Basic stats: 
COMPLETE Column stats: COMPLETE
-                    Select Operator
-                      expressions: _col0 (type: 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>),
 _col1 (type: 
struct<columntype:string,min:decimal(10,2),max:decimal(10,2),countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>),
 _col2 (type: 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>)
-                      outputColumnNames: _col0, _col1, _col2
+                    Reduce Output Operator
+                      null sort order: 
+                      sort order: 
                       Statistics: Num rows: 1 Data size: 1480 Basic stats: 
COMPLETE Column stats: COMPLETE
-                      File Output Operator
-                        compressed: false
-                        Statistics: Num rows: 1 Data size: 1480 Basic stats: 
COMPLETE Column stats: COMPLETE
-                        table:
-                            input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
-                            output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
-                            serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                      value expressions: _col0 (type: 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>),
 _col1 (type: 
struct<columntype:string,min:decimal(10,2),max:decimal(10,2),countnulls:bigint,bitvector:binary>),
 _col2 (type: 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>)
+        Reducer 4 
+            Execution mode: llap
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: compute_stats(VALUE._col0), 
compute_stats(VALUE._col1), compute_stats(VALUE._col2)
+                mode: mergepartial
+                outputColumnNames: _col0, _col1, _col2
+                Statistics: Num rows: 1 Data size: 1528 Basic stats: COMPLETE 
Column stats: COMPLETE
+                File Output Operator
+                  compressed: false
+                  Statistics: Num rows: 1 Data size: 1528 Basic stats: 
COMPLETE Column stats: COMPLETE
+                  table:
+                      input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
 
   Stage: Stage-2
     Dependency Collection
@@ -275,10 +286,10 @@ Table Type:               MATERIALIZED_VIEW
 Table Parameters:               
        COLUMN_STATS_ACCURATE   
{\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"_c2\":\"true\",\"a\":\"true\",\"c\":\"true\"}}
        bucketing_version       2                   
-       numFiles                1                   
+       numFiles                2                   
        numRows                 2                   
        rawDataSize             0                   
-       totalSize               819                 
+       totalSize               1539                
        transactional           true                
        transactional_properties        default             
 #### A masked pattern was here ####
@@ -514,10 +525,10 @@ Table Type:               MATERIALIZED_VIEW
 Table Parameters:               
        COLUMN_STATS_ACCURATE   
{\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"_c2\":\"true\",\"a\":\"true\",\"c\":\"true\"}}
        bucketing_version       2                   
-       numFiles                1                   
+       numFiles                2                   
        numRows                 2                   
        rawDataSize             0                   
-       totalSize               819                 
+       totalSize               1539                
        transactional           true                
        transactional_properties        default             
 #### A masked pattern was here ####
@@ -997,10 +1008,10 @@ Table Type:              MATERIALIZED_VIEW
 Table Parameters:               
        COLUMN_STATS_ACCURATE   {\"BASIC_STATS\":\"true\"}
        bucketing_version       2                   
-       numFiles                2                   
+       numFiles                3                   
        numRows                 3                   
        rawDataSize             0                   
-       totalSize               1576                
+       totalSize               2296                
        transactional           true                
        transactional_properties        default             
 #### A masked pattern was here ####
@@ -1081,8 +1092,8 @@ POSTHOOK: Input: default@cmv_basetable_2_n2
 POSTHOOK: Input: default@cmv_basetable_n5
 POSTHOOK: Input: default@cmv_mat_view_n5
 #### A masked pattern was here ####
-1      2
 3      6
+1      2
 3      2
 PREHOOK: query: UPDATE cmv_basetable_2_n2 SET a=2 WHERE a=1
 PREHOOK: type: QUERY
@@ -1116,8 +1127,9 @@ STAGE PLANS:
     Tez
 #### A masked pattern was here ####
       Edges:
-        Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE)
+        Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE)
         Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
+        Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE)
 #### A masked pattern was here ####
       Vertices:
         Map 1 
@@ -1141,7 +1153,7 @@ STAGE PLANS:
                         Statistics: Num rows: 5 Data size: 20 Basic stats: 
COMPLETE Column stats: COMPLETE
             Execution mode: llap
             LLAP IO: may be used (ACID table)
-        Map 4 
+        Map 5 
             Map Operator Tree:
                 TableScan
                   alias: cmv_basetable_2_n2
@@ -1212,20 +1224,30 @@ STAGE PLANS:
                   Statistics: Num rows: 2 Data size: 248 Basic stats: COMPLETE 
Column stats: COMPLETE
                   Group By Operator
                     aggregations: compute_stats(a, 'hll'), compute_stats(c, 
'hll'), compute_stats(_c2, 'hll')
-                    mode: complete
+                    minReductionHashAggr: 0.5
+                    mode: hash
                     outputColumnNames: _col0, _col1, _col2
                     Statistics: Num rows: 1 Data size: 1480 Basic stats: 
COMPLETE Column stats: COMPLETE
-                    Select Operator
-                      expressions: _col0 (type: 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>),
 _col1 (type: 
struct<columntype:string,min:decimal(10,2),max:decimal(10,2),countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>),
 _col2 (type: 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>)
-                      outputColumnNames: _col0, _col1, _col2
+                    Reduce Output Operator
+                      null sort order: 
+                      sort order: 
                       Statistics: Num rows: 1 Data size: 1480 Basic stats: 
COMPLETE Column stats: COMPLETE
-                      File Output Operator
-                        compressed: false
-                        Statistics: Num rows: 1 Data size: 1480 Basic stats: 
COMPLETE Column stats: COMPLETE
-                        table:
-                            input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
-                            output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
-                            serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                      value expressions: _col0 (type: 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>),
 _col1 (type: 
struct<columntype:string,min:decimal(10,2),max:decimal(10,2),countnulls:bigint,bitvector:binary>),
 _col2 (type: 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>)
+        Reducer 4 
+            Execution mode: llap
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: compute_stats(VALUE._col0), 
compute_stats(VALUE._col1), compute_stats(VALUE._col2)
+                mode: mergepartial
+                outputColumnNames: _col0, _col1, _col2
+                Statistics: Num rows: 1 Data size: 1528 Basic stats: COMPLETE 
Column stats: COMPLETE
+                File Output Operator
+                  compressed: false
+                  Statistics: Num rows: 1 Data size: 1528 Basic stats: 
COMPLETE Column stats: COMPLETE
+                  table:
+                      input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
 
   Stage: Stage-2
     Dependency Collection
@@ -1287,10 +1309,10 @@ Table Type:             MATERIALIZED_VIEW
 Table Parameters:               
        COLUMN_STATS_ACCURATE   
{\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"_c2\":\"true\",\"a\":\"true\",\"c\":\"true\"}}
        bucketing_version       2                   
-       numFiles                1                   
+       numFiles                2                   
        numRows                 3                   
        rawDataSize             0                   
-       totalSize               822                 
+       totalSize               1041                
        transactional           true                
        transactional_properties        default             
 #### A masked pattern was here ####
@@ -1406,8 +1428,9 @@ STAGE PLANS:
     Tez
 #### A masked pattern was here ####
       Edges:
-        Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE)
+        Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE)
         Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
+        Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE)
 #### A masked pattern was here ####
       Vertices:
         Map 1 
@@ -1431,7 +1454,7 @@ STAGE PLANS:
                         Statistics: Num rows: 5 Data size: 20 Basic stats: 
COMPLETE Column stats: COMPLETE
             Execution mode: llap
             LLAP IO: may be used (ACID table)
-        Map 4 
+        Map 5 
             Map Operator Tree:
                 TableScan
                   alias: cmv_basetable_2_n2
@@ -1502,20 +1525,30 @@ STAGE PLANS:
                   Statistics: Num rows: 2 Data size: 248 Basic stats: COMPLETE 
Column stats: COMPLETE
                   Group By Operator
                     aggregations: compute_stats(a, 'hll'), compute_stats(c, 
'hll'), compute_stats(_c2, 'hll')
-                    mode: complete
+                    minReductionHashAggr: 0.5
+                    mode: hash
                     outputColumnNames: _col0, _col1, _col2
                     Statistics: Num rows: 1 Data size: 1480 Basic stats: 
COMPLETE Column stats: COMPLETE
-                    Select Operator
-                      expressions: _col0 (type: 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>),
 _col1 (type: 
struct<columntype:string,min:decimal(10,2),max:decimal(10,2),countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>),
 _col2 (type: 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>)
-                      outputColumnNames: _col0, _col1, _col2
+                    Reduce Output Operator
+                      null sort order: 
+                      sort order: 
                       Statistics: Num rows: 1 Data size: 1480 Basic stats: 
COMPLETE Column stats: COMPLETE
-                      File Output Operator
-                        compressed: false
-                        Statistics: Num rows: 1 Data size: 1480 Basic stats: 
COMPLETE Column stats: COMPLETE
-                        table:
-                            input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
-                            output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
-                            serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                      value expressions: _col0 (type: 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>),
 _col1 (type: 
struct<columntype:string,min:decimal(10,2),max:decimal(10,2),countnulls:bigint,bitvector:binary>),
 _col2 (type: 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>)
+        Reducer 4 
+            Execution mode: llap
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: compute_stats(VALUE._col0), 
compute_stats(VALUE._col1), compute_stats(VALUE._col2)
+                mode: mergepartial
+                outputColumnNames: _col0, _col1, _col2
+                Statistics: Num rows: 1 Data size: 1528 Basic stats: COMPLETE 
Column stats: COMPLETE
+                File Output Operator
+                  compressed: false
+                  Statistics: Num rows: 1 Data size: 1528 Basic stats: 
COMPLETE Column stats: COMPLETE
+                  table:
+                      input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
 
   Stage: Stage-2
     Dependency Collection
@@ -1577,10 +1610,10 @@ Table Type:             MATERIALIZED_VIEW
 Table Parameters:               
        COLUMN_STATS_ACCURATE   
{\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"_c2\":\"true\",\"a\":\"true\",\"c\":\"true\"}}
        bucketing_version       2                   
-       numFiles                1                   
+       numFiles                2                   
        numRows                 2                   
        rawDataSize             0                   
-       totalSize               820                 
+       totalSize               1039                
        transactional           true                
        transactional_properties        default             
 #### A masked pattern was here ####
@@ -1987,10 +2020,10 @@ Table Type:             MATERIALIZED_VIEW
 Table Parameters:               
        COLUMN_STATS_ACCURATE   {\"BASIC_STATS\":\"true\"}
        bucketing_version       2                   
-       numFiles                2                   
+       numFiles                3                   
        numRows                 3                   
        rawDataSize             0                   
-       totalSize               1576                
+       totalSize               1795                
        transactional           true                
        transactional_properties        default             
 #### A masked pattern was here ####
diff --git 
a/ql/src/test/results/clientpositive/llap/materialized_view_rewrite_window.q.out
 
b/ql/src/test/results/clientpositive/llap/materialized_view_rewrite_window.q.out
index fd330b0..4e35bba 100644
--- 
a/ql/src/test/results/clientpositive/llap/materialized_view_rewrite_window.q.out
+++ 
b/ql/src/test/results/clientpositive/llap/materialized_view_rewrite_window.q.out
@@ -239,9 +239,10 @@ STAGE PLANS:
     Tez
 #### A masked pattern was here ####
       Edges:
-        Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE)
+        Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE)
         Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
-        Reducer 5 <- Map 4 (SIMPLE_EDGE)
+        Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE)
+        Reducer 6 <- Map 5 (SIMPLE_EDGE)
 #### A masked pattern was here ####
       Vertices:
         Map 1 
@@ -265,7 +266,7 @@ STAGE PLANS:
                         Statistics: Num rows: 5 Data size: 460 Basic stats: 
COMPLETE Column stats: COMPLETE
             Execution mode: vectorized, llap
             LLAP IO: may be used (ACID table)
-        Map 4 
+        Map 5 
             Map Operator Tree:
                 TableScan
                   alias: tv_view_data
@@ -328,21 +329,31 @@ STAGE PLANS:
                   Statistics: Num rows: 2 Data size: 240 Basic stats: COMPLETE 
Column stats: COMPLETE
                   Group By Operator
                     aggregations: compute_stats(quartile, 'hll'), 
compute_stats(total, 'hll')
-                    mode: complete
+                    minReductionHashAggr: 0.5
+                    mode: hash
                     outputColumnNames: _col0, _col1
                     Statistics: Num rows: 1 Data size: 1056 Basic stats: 
COMPLETE Column stats: COMPLETE
-                    Select Operator
-                      expressions: _col0 (type: 
struct<columntype:string,min:decimal(12,1),max:decimal(12,1),countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>),
 _col1 (type: 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>)
-                      outputColumnNames: _col0, _col1
+                    Reduce Output Operator
+                      null sort order: 
+                      sort order: 
                       Statistics: Num rows: 1 Data size: 1056 Basic stats: 
COMPLETE Column stats: COMPLETE
-                      File Output Operator
-                        compressed: false
-                        Statistics: Num rows: 1 Data size: 1056 Basic stats: 
COMPLETE Column stats: COMPLETE
-                        table:
-                            input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
-                            output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
-                            serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-        Reducer 5 
+                      value expressions: _col0 (type: 
struct<columntype:string,min:decimal(12,1),max:decimal(12,1),countnulls:bigint,bitvector:binary>),
 _col1 (type: 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>)
+        Reducer 4 
+            Execution mode: llap
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: compute_stats(VALUE._col0), 
compute_stats(VALUE._col1)
+                mode: mergepartial
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 1 Data size: 1088 Basic stats: COMPLETE 
Column stats: COMPLETE
+                File Output Operator
+                  compressed: false
+                  Statistics: Num rows: 1 Data size: 1088 Basic stats: 
COMPLETE Column stats: COMPLETE
+                  table:
+                      input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+        Reducer 6 
             Execution mode: vectorized, llap
             Reduce Operator Tree:
               Select Operator
@@ -535,9 +546,10 @@ STAGE PLANS:
     Tez
 #### A masked pattern was here ####
       Edges:
-        Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE)
+        Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE)
         Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
-        Reducer 5 <- Map 4 (SIMPLE_EDGE)
+        Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE)
+        Reducer 6 <- Map 5 (SIMPLE_EDGE)
 #### A masked pattern was here ####
       Vertices:
         Map 1 
@@ -561,7 +573,7 @@ STAGE PLANS:
                         Statistics: Num rows: 5 Data size: 460 Basic stats: 
COMPLETE Column stats: COMPLETE
             Execution mode: vectorized, llap
             LLAP IO: may be used (ACID table)
-        Map 4 
+        Map 5 
             Map Operator Tree:
                 TableScan
                   alias: tv_view_data
@@ -624,21 +636,31 @@ STAGE PLANS:
                   Statistics: Num rows: 2 Data size: 240 Basic stats: COMPLETE 
Column stats: COMPLETE
                   Group By Operator
                     aggregations: compute_stats(quartile, 'hll'), 
compute_stats(total, 'hll')
-                    mode: complete
+                    minReductionHashAggr: 0.5
+                    mode: hash
                     outputColumnNames: _col0, _col1
                     Statistics: Num rows: 1 Data size: 1056 Basic stats: 
COMPLETE Column stats: COMPLETE
-                    Select Operator
-                      expressions: _col0 (type: 
struct<columntype:string,min:decimal(12,1),max:decimal(12,1),countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>),
 _col1 (type: 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>)
-                      outputColumnNames: _col0, _col1
+                    Reduce Output Operator
+                      null sort order: 
+                      sort order: 
                       Statistics: Num rows: 1 Data size: 1056 Basic stats: 
COMPLETE Column stats: COMPLETE
-                      File Output Operator
-                        compressed: false
-                        Statistics: Num rows: 1 Data size: 1056 Basic stats: 
COMPLETE Column stats: COMPLETE
-                        table:
-                            input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
-                            output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
-                            serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-        Reducer 5 
+                      value expressions: _col0 (type: 
struct<columntype:string,min:decimal(12,1),max:decimal(12,1),countnulls:bigint,bitvector:binary>),
 _col1 (type: 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>)
+        Reducer 4 
+            Execution mode: llap
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: compute_stats(VALUE._col0), 
compute_stats(VALUE._col1)
+                mode: mergepartial
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 1 Data size: 1088 Basic stats: COMPLETE 
Column stats: COMPLETE
+                File Output Operator
+                  compressed: false
+                  Statistics: Num rows: 1 Data size: 1088 Basic stats: 
COMPLETE Column stats: COMPLETE
+                  table:
+                      input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+        Reducer 6 
             Execution mode: vectorized, llap
             Reduce Operator Tree:
               Select Operator
diff --git a/ql/src/test/results/clientpositive/llap/semijoin_reddedup.q.out 
b/ql/src/test/results/clientpositive/llap/semijoin_reddedup.q.out
index ae95609..fcae6ca 100644
--- a/ql/src/test/results/clientpositive/llap/semijoin_reddedup.q.out
+++ b/ql/src/test/results/clientpositive/llap/semijoin_reddedup.q.out
@@ -258,12 +258,13 @@ STAGE PLANS:
     Tez
 #### A masked pattern was here ####
       Edges:
-        Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE)
-        Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE)
-        Reducer 4 <- Map 10 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE)
+        Reducer 10 <- Map 9 (SIMPLE_EDGE)
+        Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE)
+        Reducer 3 <- Reducer 10 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE)
+        Reducer 4 <- Map 11 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE)
         Reducer 5 <- Reducer 4 (SIMPLE_EDGE)
-        Reducer 6 <- Reducer 5 (CUSTOM_SIMPLE_EDGE)
-        Reducer 9 <- Map 8 (SIMPLE_EDGE)
+        Reducer 6 <- Reducer 5 (SIMPLE_EDGE)
+        Reducer 7 <- Reducer 6 (CUSTOM_SIMPLE_EDGE)
 #### A masked pattern was here ####
       Vertices:
         Map 1 
@@ -288,7 +289,7 @@ STAGE PLANS:
                         value expressions: _col0 (type: bigint), _col2 (type: 
double), _col3 (type: string)
             Execution mode: vectorized, llap
             LLAP IO: may be used (ACID table)
-        Map 10 
+        Map 11 
             Map Operator Tree:
                 TableScan
                   alias: l
@@ -310,7 +311,7 @@ STAGE PLANS:
                         value expressions: _col1 (type: double)
             Execution mode: vectorized, llap
             LLAP IO: may be used (ACID table)
-        Map 7 
+        Map 8 
             Map Operator Tree:
                 TableScan
                   alias: customer
@@ -332,7 +333,7 @@ STAGE PLANS:
                         value expressions: _col1 (type: string)
             Execution mode: vectorized, llap
             LLAP IO: may be used (ACID table)
-        Map 8 
+        Map 9 
             Map Operator Tree:
                 TableScan
                   alias: lineitem
@@ -359,6 +360,28 @@ STAGE PLANS:
                         value expressions: _col1 (type: double)
             Execution mode: vectorized, llap
             LLAP IO: may be used (ACID table)
+        Reducer 10 
+            Execution mode: vectorized, llap
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: sum(VALUE._col0)
+                keys: KEY._col0 (type: bigint)
+                mode: mergepartial
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 2849995116 Data size: 43319925835 Basic 
stats: COMPLETE Column stats: NONE
+                Filter Operator
+                  predicate: (_col1 > 300.0D) (type: boolean)
+                  Statistics: Num rows: 949998372 Data size: 14439975278 Basic 
stats: COMPLETE Column stats: NONE
+                  Select Operator
+                    expressions: _col0 (type: bigint)
+                    outputColumnNames: _col0
+                    Statistics: Num rows: 949998372 Data size: 14439975278 
Basic stats: COMPLETE Column stats: NONE
+                    Reduce Output Operator
+                      key expressions: _col0 (type: bigint)
+                      null sort order: z
+                      sort order: +
+                      Map-reduce partition columns: _col0 (type: bigint)
+                      Statistics: Num rows: 949998372 Data size: 14439975278 
Basic stats: COMPLETE Column stats: NONE
         Reducer 2 
             Execution mode: llap
             Reduce Operator Tree:
@@ -428,7 +451,7 @@ STAGE PLANS:
                       TopN Hash Memory Usage: 0.1
                       value expressions: _col5 (type: double)
         Reducer 5 
-            Execution mode: llap
+            Execution mode: vectorized, llap
             Reduce Operator Tree:
               Group By Operator
                 aggregations: sum(VALUE._col0)
@@ -440,34 +463,48 @@ STAGE PLANS:
                   expressions: _col4 (type: string), _col3 (type: bigint), 
_col2 (type: bigint), _col1 (type: string), _col0 (type: double), _col5 (type: 
double)
                   outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
                   Statistics: Num rows: 3134994695 Data size: 47651919443 
Basic stats: COMPLETE Column stats: NONE
-                  Limit
-                    Number of rows: 100
+                  Reduce Output Operator
+                    key expressions: _col4 (type: double), _col3 (type: string)
+                    null sort order: zz
+                    sort order: -+
+                    Statistics: Num rows: 3134994695 Data size: 47651919443 
Basic stats: COMPLETE Column stats: NONE
+                    TopN Hash Memory Usage: 0.1
+                    value expressions: _col0 (type: string), _col1 (type: 
bigint), _col2 (type: bigint), _col5 (type: double)
+        Reducer 6 
+            Execution mode: llap
+            Reduce Operator Tree:
+              Select Operator
+                expressions: VALUE._col0 (type: string), VALUE._col1 (type: 
bigint), VALUE._col2 (type: bigint), KEY.reducesinkkey1 (type: string), 
KEY.reducesinkkey0 (type: double), VALUE._col3 (type: double)
+                outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+                Statistics: Num rows: 3134994695 Data size: 47651919443 Basic 
stats: COMPLETE Column stats: NONE
+                Limit
+                  Number of rows: 100
+                  Statistics: Num rows: 100 Data size: 1500 Basic stats: 
COMPLETE Column stats: NONE
+                  File Output Operator
+                    compressed: false
                     Statistics: Num rows: 100 Data size: 1500 Basic stats: 
COMPLETE Column stats: NONE
-                    File Output Operator
-                      compressed: false
-                      Statistics: Num rows: 100 Data size: 1500 Basic stats: 
COMPLETE Column stats: NONE
-                      table:
-                          input format: 
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
-                          output format: 
org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
-                          serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
-                          name: tpch_test.q18_large_volume_customer_cached
-                      Write Type: INSERT
-                    Select Operator
-                      expressions: _col0 (type: string), _col1 (type: bigint), 
_col2 (type: bigint), _col3 (type: string), _col4 (type: double), _col5 (type: 
double)
-                      outputColumnNames: col1, col2, col3, col4, col5, col6
-                      Statistics: Num rows: 100 Data size: 1500 Basic stats: 
COMPLETE Column stats: NONE
-                      Group By Operator
-                        aggregations: compute_stats(col1, 'hll'), 
compute_stats(col2, 'hll'), compute_stats(col3, 'hll'), compute_stats(col4, 
'hll'), compute_stats(col5, 'hll'), compute_stats(col6, 'hll')
-                        minReductionHashAggr: 0.99
-                        mode: hash
-                        outputColumnNames: _col0, _col1, _col2, _col3, _col4, 
_col5
+                    table:
+                        input format: 
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+                        output format: 
org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
+                        serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
+                        name: tpch_test.q18_large_volume_customer_cached
+                    Write Type: INSERT
+                  Select Operator
+                    expressions: _col0 (type: string), _col1 (type: bigint), 
_col2 (type: bigint), _col3 (type: string), _col4 (type: double), _col5 (type: 
double)
+                    outputColumnNames: col1, col2, col3, col4, col5, col6
+                    Statistics: Num rows: 100 Data size: 1500 Basic stats: 
COMPLETE Column stats: NONE
+                    Group By Operator
+                      aggregations: compute_stats(col1, 'hll'), 
compute_stats(col2, 'hll'), compute_stats(col3, 'hll'), compute_stats(col4, 
'hll'), compute_stats(col5, 'hll'), compute_stats(col6, 'hll')
+                      minReductionHashAggr: 0.99
+                      mode: hash
+                      outputColumnNames: _col0, _col1, _col2, _col3, _col4, 
_col5
+                      Statistics: Num rows: 1 Data size: 2576 Basic stats: 
COMPLETE Column stats: NONE
+                      Reduce Output Operator
+                        null sort order: 
+                        sort order: 
                         Statistics: Num rows: 1 Data size: 2576 Basic stats: 
COMPLETE Column stats: NONE
-                        Reduce Output Operator
-                          null sort order: 
-                          sort order: 
-                          Statistics: Num rows: 1 Data size: 2576 Basic stats: 
COMPLETE Column stats: NONE
-                          value expressions: _col0 (type: 
struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>),
 _col1 (type: 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>),
 _col2 (type: 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>),
 _col3 (type: 
struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>),
 _col4 [...]
-        Reducer 6 
+                        value expressions: _col0 (type: 
struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>),
 _col1 (type: 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>),
 _col2 (type: 
struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>),
 _col3 (type: 
struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>),
 _col4 ( [...]
+        Reducer 7 
             Execution mode: llap
             Reduce Operator Tree:
               Group By Operator
@@ -482,28 +519,6 @@ STAGE PLANS:
                       input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
                       output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
                       serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-        Reducer 9 
-            Execution mode: vectorized, llap
-            Reduce Operator Tree:
-              Group By Operator
-                aggregations: sum(VALUE._col0)
-                keys: KEY._col0 (type: bigint)
-                mode: mergepartial
-                outputColumnNames: _col0, _col1
-                Statistics: Num rows: 2849995116 Data size: 43319925835 Basic 
stats: COMPLETE Column stats: NONE
-                Filter Operator
-                  predicate: (_col1 > 300.0D) (type: boolean)
-                  Statistics: Num rows: 949998372 Data size: 14439975278 Basic 
stats: COMPLETE Column stats: NONE
-                  Select Operator
-                    expressions: _col0 (type: bigint)
-                    outputColumnNames: _col0
-                    Statistics: Num rows: 949998372 Data size: 14439975278 
Basic stats: COMPLETE Column stats: NONE
-                    Reduce Output Operator
-                      key expressions: _col0 (type: bigint)
-                      null sort order: z
-                      sort order: +
-                      Map-reduce partition columns: _col0 (type: bigint)
-                      Statistics: Num rows: 949998372 Data size: 14439975278 
Basic stats: COMPLETE Column stats: NONE
 
   Stage: Stage-2
     Dependency Collection

[hive] branch master updated: HIVE-22538: RS deduplication does not always enforce hive.optimize.reducededuplication.min.reducer (Krisztian Kasa, reviewed by Jesus Camacho Rodriguez)

Reply via email to