[hive] branch master updated: HIVE-23214 Get rid of skipCorrupt as part of ORC read pipeline (Panos Garefalakis via Ashutosh Chauhan)

hashutosh Tue, 26 May 2020 17:31:10 -0700

This is an automated email from the ASF dual-hosted git repository.

hashutosh pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git



The following commit(s) were added to refs/heads/master by this push:
     new f712def  HIVE-23214 Get rid of skipCorrupt as part of ORC read 
pipeline (Panos Garefalakis via Ashutosh Chauhan)
f712def is described below

commit f712def65b716ba6646828ed8f8be4464abbedc8
Author: Panos Garefalakis <[email protected]>
AuthorDate: Mon May 18 16:51:15 2020 +0100

    HIVE-23214 Get rid of skipCorrupt as part of ORC read pipeline (Panos 
Garefalakis via Ashutosh Chauhan)
    
    Change-Id: Ic1efd6dcffc71adfa1ac3059ceacbd3f30e6ef7e
    Signed-off-by: Ashutosh Chauhan <[email protected]>
---
 .../hive/llap/io/decode/GenericColumnVectorProducer.java       |  3 +--
 .../hadoop/hive/llap/io/decode/OrcColumnVectorProducer.java    |  5 +----
 .../hadoop/hive/llap/io/decode/OrcEncodedDataConsumer.java     | 10 +++-------
 3 files changed, 5 insertions(+), 13 deletions(-)

diff --git 
a/llap-server/src/java/org/apache/hadoop/hive/llap/io/decode/GenericColumnVectorProducer.java
 
b/llap-server/src/java/org/apache/hadoop/hive/llap/io/decode/GenericColumnVectorProducer.java
index 1617692..1c7e537 100644
--- 
a/llap-server/src/java/org/apache/hadoop/hive/llap/io/decode/GenericColumnVectorProducer.java
+++ 
b/llap-server/src/java/org/apache/hadoop/hive/llap/io/decode/GenericColumnVectorProducer.java
@@ -85,8 +85,7 @@ public class GenericColumnVectorProducer implements 
ColumnVectorProducer {
       SchemaEvolutionFactory sef, InputFormat<?, ?> sourceInputFormat, 
Deserializer sourceSerDe,
       Reporter reporter, JobConf job, Map<Path, PartitionDesc> parts) throws 
IOException {
     cacheMetrics.incrCacheReadRequests();
-    OrcEncodedDataConsumer edc = new OrcEncodedDataConsumer(
-        consumer, includes, false, counters, ioMetrics);
+    OrcEncodedDataConsumer edc = new OrcEncodedDataConsumer(consumer, 
includes, counters, ioMetrics);
     SerDeFileMetadata fm;
     try {
       fm = new SerDeFileMetadata(sourceSerDe);
diff --git 
a/llap-server/src/java/org/apache/hadoop/hive/llap/io/decode/OrcColumnVectorProducer.java
 
b/llap-server/src/java/org/apache/hadoop/hive/llap/io/decode/OrcColumnVectorProducer.java
index 17c4821..50abdfd 100644
--- 
a/llap-server/src/java/org/apache/hadoop/hive/llap/io/decode/OrcColumnVectorProducer.java
+++ 
b/llap-server/src/java/org/apache/hadoop/hive/llap/io/decode/OrcColumnVectorProducer.java
@@ -56,7 +56,6 @@ public class OrcColumnVectorProducer implements 
ColumnVectorProducer {
   private final LowLevelCache lowLevelCache;
   private final BufferUsageManager bufferManager;
   private final Configuration conf;
-  private boolean _skipCorrupt; // TODO: get rid of this
   private LlapDaemonCacheMetrics cacheMetrics;
   private LlapDaemonIOMetrics ioMetrics;
   // TODO: if using in multiple places, e.g. SerDe cache, pass this in.
@@ -73,7 +72,6 @@ public class OrcColumnVectorProducer implements 
ColumnVectorProducer {
     this.lowLevelCache = lowLevelCache;
     this.bufferManager = bufferManager;
     this.conf = conf;
-    this._skipCorrupt = OrcConf.SKIP_CORRUPT_DATA.getBoolean(conf);
     this.cacheMetrics = cacheMetrics;
     this.ioMetrics = ioMetrics;
     this.tracePool = tracePool;
@@ -90,8 +88,7 @@ public class OrcColumnVectorProducer implements 
ColumnVectorProducer {
       InputFormat<?, ?> unused0, Deserializer unused1, Reporter reporter, 
JobConf job,
       Map<Path, PartitionDesc> parts) throws IOException {
     cacheMetrics.incrCacheReadRequests();
-    OrcEncodedDataConsumer edc = new OrcEncodedDataConsumer(
-        consumer, includes, _skipCorrupt, counters, ioMetrics);
+    OrcEncodedDataConsumer edc = new OrcEncodedDataConsumer(consumer, 
includes, counters, ioMetrics);
     OrcEncodedDataReader reader = new OrcEncodedDataReader(lowLevelCache, 
bufferManager,
         metadataCache, conf, job, split, includes, sarg, edc, counters, sef, 
tracePool, parts);
     edc.init(reader, reader, reader.getTrace());
diff --git 
a/llap-server/src/java/org/apache/hadoop/hive/llap/io/decode/OrcEncodedDataConsumer.java
 
b/llap-server/src/java/org/apache/hadoop/hive/llap/io/decode/OrcEncodedDataConsumer.java
index b697a0d..79dba42 100644
--- 
a/llap-server/src/java/org/apache/hadoop/hive/llap/io/decode/OrcEncodedDataConsumer.java
+++ 
b/llap-server/src/java/org/apache/hadoop/hive/llap/io/decode/OrcEncodedDataConsumer.java
@@ -70,20 +70,16 @@ public class OrcEncodedDataConsumer
   private ConsumerFileMetadata fileMetadata; // We assume one request is only 
for one file.
   private CompressionCodec codec;
   private List<ConsumerStripeMetadata> stripes;
-  private final boolean skipCorrupt; // TODO: get rid of this
   private SchemaEvolution evolution;
   private IoTrace trace;
   private final Includes includes;
   private TypeDescription[] batchSchemas;
   private boolean useDecimal64ColumnVectors;
 
-  public OrcEncodedDataConsumer(
-    Consumer<ColumnVectorBatch> consumer, Includes includes, boolean 
skipCorrupt,
-    QueryFragmentCounters counters, LlapDaemonIOMetrics ioMetrics) {
+  public OrcEncodedDataConsumer(Consumer<ColumnVectorBatch> consumer, Includes 
includes,
+                                QueryFragmentCounters counters, 
LlapDaemonIOMetrics ioMetrics) {
     super(consumer, includes.getPhysicalColumnIds().size(), ioMetrics, 
counters);
     this.includes = includes;
-    // TODO: get rid of this
-    this.skipCorrupt = skipCorrupt;
     if (includes.isProbeDecodeEnabled()) {
       LlapIoImpl.LOG.info("OrcEncodedDataConsumer probeDecode is enabled with 
cacheKey {} colIndex {} and colName {}",
               this.includes.getProbeCacheKey(), 
this.includes.getProbeColIdx(), this.includes.getProbeColName());
@@ -225,7 +221,7 @@ public class OrcEncodedDataConsumer
   private void createColumnReaders(OrcEncodedColumnBatch batch,
       ConsumerStripeMetadata stripeMetadata, TypeDescription fileSchema) 
throws IOException {
     TreeReaderFactory.Context context = new TreeReaderFactory.ReaderContext()
-            .setSchemaEvolution(evolution).skipCorrupt(skipCorrupt)
+            .setSchemaEvolution(evolution)
             .writerTimeZone(stripeMetadata.getWriterTimezone())
             .fileFormat(fileMetadata == null ? null : 
fileMetadata.getFileVersion())
             .useUTCTimestamp(true)

[hive] branch master updated: HIVE-23214 Get rid of skipCorrupt as part of ORC read pipeline (Panos Garefalakis via Ashutosh Chauhan)

Reply via email to