HIVE-19793 : disable LLAP IO batch-to-row wrapper for ACID deletes/updates (Sergey Shelukhin, reviewed by Eugene Koifman and Prasanth Jayachandran)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/f5f485e3 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/f5f485e3 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/f5f485e3 Branch: refs/heads/branch-3 Commit: f5f485e3aef1a0cd85167b865d1ae54cc82dcf6d Parents: 02f4931 Author: sergey <[email protected]> Authored: Thu Jun 7 13:43:14 2018 -0700 Committer: sergey <[email protected]> Committed: Thu Jun 7 13:43:51 2018 -0700 ---------------------------------------------------------------------- common/src/java/org/apache/hadoop/hive/conf/HiveConf.java | 1 + .../apache/hadoop/hive/llap/io/api/impl/LlapInputFormat.java | 7 +++++-- .../apache/hadoop/hive/llap/io/api/impl/LlapRecordReader.java | 4 ++-- .../hive/ql/io/orc/VectorizedOrcAcidRowBatchReader.java | 4 +++- .../hadoop/hive/ql/parse/UpdateDeleteSemanticAnalyzer.java | 3 +++ 5 files changed, 14 insertions(+), 5 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/f5f485e3/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java ---------------------------------------------------------------------- diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index ee543ab..b24bef5 100644 --- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -3768,6 +3768,7 @@ public class HiveConf extends Configuration { false, "Use Tez cartesian product edge to speed up cross product"), // The default is different on the client and server, so it's null here. LLAP_IO_ENABLED("hive.llap.io.enabled", null, "Whether the LLAP IO layer is enabled."), + LLAP_IO_ROW_WRAPPER_ENABLED("hive.llap.io.row.wrapper.enabled", true, "Whether the LLAP IO row wrapper is enabled for non-vectorized queries."), LLAP_IO_ACID_ENABLED("hive.llap.io.acid", true, "Whether the LLAP IO layer is enabled for ACID."), LLAP_IO_TRACE_SIZE("hive.llap.io.trace.size", "2Mb", new SizeValidator(0L, true, (long)Integer.MAX_VALUE, false), http://git-wip-us.apache.org/repos/asf/hive/blob/f5f485e3/llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapInputFormat.java ---------------------------------------------------------------------- diff --git a/llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapInputFormat.java b/llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapInputFormat.java index 6d29163..40f7c83 100644 --- a/llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapInputFormat.java +++ b/llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapInputFormat.java @@ -20,7 +20,7 @@ package org.apache.hadoop.hive.llap.io.api.impl; import org.apache.hadoop.hive.ql.io.BatchToRowInputFormat; - +import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.conf.HiveConf.ConfVars; import java.io.IOException; @@ -114,7 +114,10 @@ public class LlapInputFormat implements InputFormat<NullWritable, VectorizedRowB // For non-vectorized operator case, wrap the reader if possible. RecordReader<NullWritable, VectorizedRowBatch> result = rr; if (!Utilities.getIsVectorized(job)) { - result = wrapLlapReader(tableIncludedCols, rr, split); + result = null; + if (HiveConf.getBoolVar(job, ConfVars.LLAP_IO_ROW_WRAPPER_ENABLED)) { + result = wrapLlapReader(tableIncludedCols, rr, split); + } if (result == null) { // Cannot wrap a reader for non-vectorized pipeline. return sourceInputFormat.getRecordReader(split, job, reporter); http://git-wip-us.apache.org/repos/asf/hive/blob/f5f485e3/llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapRecordReader.java ---------------------------------------------------------------------- diff --git a/llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapRecordReader.java b/llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapRecordReader.java index 6897336..cb57a11 100644 --- a/llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapRecordReader.java +++ b/llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapRecordReader.java @@ -178,8 +178,8 @@ class LlapRecordReader this.isVectorized = HiveConf.getBoolVar(jobConf, HiveConf.ConfVars.HIVE_VECTORIZATION_ENABLED); if (isAcidScan) { - this.acidReader = new VectorizedOrcAcidRowBatchReader((OrcSplit) split, jobConf, Reporter.NULL, null, rbCtx, - true); + this.acidReader = new VectorizedOrcAcidRowBatchReader( + (OrcSplit) split, jobConf, Reporter.NULL, null, rbCtx, true); } // Create the consumer of encoded data; it will coordinate decoding to CVBs. http://git-wip-us.apache.org/repos/asf/hive/blob/f5f485e3/ql/src/java/org/apache/hadoop/hive/ql/io/orc/VectorizedOrcAcidRowBatchReader.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/VectorizedOrcAcidRowBatchReader.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/VectorizedOrcAcidRowBatchReader.java index d2e1a68..a4568de 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/VectorizedOrcAcidRowBatchReader.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/VectorizedOrcAcidRowBatchReader.java @@ -324,8 +324,9 @@ public class VectorizedOrcAcidRowBatchReader private static boolean needSyntheticRowIds(boolean isOriginal, boolean hasDeletes, boolean rowIdProjected) { return isOriginal && (hasDeletes || rowIdProjected); } + private static boolean areRowIdsProjected(VectorizedRowBatchCtx rbCtx) { - if(rbCtx.getVirtualColumnCount() == 0) { + if (rbCtx.getVirtualColumnCount() == 0) { return false; } for(VirtualColumn vc : rbCtx.getNeededVirtualColumns()) { @@ -474,6 +475,7 @@ public class VectorizedOrcAcidRowBatchReader } } if (rowIdProjected) { + // TODO: could we just do: int ix = rbCtx.findVirtualColumnNum(VirtualColumn.ROWID); value.cols[ix] = recordIdColumnVector; rbCtx.setRecordIdColumnVector(recordIdColumnVector); } progress = baseReader.getProgress(); http://git-wip-us.apache.org/repos/asf/hive/blob/f5f485e3/ql/src/java/org/apache/hadoop/hive/ql/parse/UpdateDeleteSemanticAnalyzer.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/UpdateDeleteSemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/UpdateDeleteSemanticAnalyzer.java index 8e5118b..7925151 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/UpdateDeleteSemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/UpdateDeleteSemanticAnalyzer.java @@ -34,6 +34,7 @@ import org.antlr.runtime.TokenRewriteStream; import org.antlr.runtime.tree.Tree; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.conf.HiveConf.ConfVars; import org.apache.hadoop.hive.metastore.TableType; import org.apache.hadoop.hive.metastore.Warehouse; import org.apache.hadoop.hive.metastore.api.FieldSchema; @@ -521,6 +522,8 @@ public class UpdateDeleteSemanticAnalyzer extends SemanticAnalyzer { // references. // todo: this may be a perf issue as it prevents the optimizer.. or not HiveConf.setVar(conf, HiveConf.ConfVars.DYNAMICPARTITIONINGMODE, "nonstrict"); + // Disable LLAP IO wrapper; doesn't propagate extra ACID columns correctly. + HiveConf.setBoolVar(conf, ConfVars.LLAP_IO_ROW_WRAPPER_ENABLED, false); // Parse the rewritten query string Context rewrittenCtx; try {
