hive git commit: HIVE-15359: skip.footer.line.count doesnt work properly for certain situations (Yongzhi Chen, reviewed by Aihua Xu)

ychena Thu, 08 Dec 2016 07:09:59 -0800

Repository: hive
Updated Branches:
  refs/heads/master b920fa735 -> b4980afd3



HIVE-15359: skip.footer.line.count doesnt work properly for certain situations 
(Yongzhi Chen, reviewed by Aihua Xu)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/b4980afd
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/b4980afd
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/b4980afd

Branch: refs/heads/master
Commit: b4980afd3500b7367a82dc1fc5cd01fe942c53d1
Parents: b920fa7
Author: Yongzhi Chen <ych...@apache.org>
Authored: Mon Dec 5 17:45:41 2016 -0500
Committer: Yongzhi Chen <ych...@apache.org>
Committed: Thu Dec 8 09:50:45 2016 -0500

----------------------------------------------------------------------
 .../hadoop/hive/ql/io/CombineHiveRecordReader.java      | 12 +++++++++++-
 .../hadoop/hive/ql/io/HiveContextAwareRecordReader.java | 10 ++++++++++
 .../org/apache/hadoop/hive/shims/HadoopShimsSecure.java |  6 ++++--
 3 files changed, 25 insertions(+), 3 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/b4980afd/ql/src/java/org/apache/hadoop/hive/ql/io/CombineHiveRecordReader.java
----------------------------------------------------------------------
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/io/CombineHiveRecordReader.java 
b/ql/src/java/org/apache/hadoop/hive/ql/io/CombineHiveRecordReader.java
index aa607cc..ba25573 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/CombineHiveRecordReader.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/CombineHiveRecordReader.java
@@ -31,6 +31,7 @@ import org.apache.hadoop.mapred.InputFormat;
 import org.apache.hadoop.mapred.InputSplit;
 import org.apache.hadoop.mapred.JobConf;
 import org.apache.hadoop.mapred.Reporter;
+import org.apache.hadoop.mapred.RecordReader;
 import org.apache.hadoop.mapred.lib.CombineFileSplit;
 
 /**
@@ -43,7 +44,7 @@ public class CombineHiveRecordReader<K extends 
WritableComparable, V extends Wri
     extends HiveContextAwareRecordReader<K, V> {
 
   public CombineHiveRecordReader(InputSplit split, Configuration conf,
-      Reporter reporter, Integer partition) throws IOException {
+      Reporter reporter, Integer partition, RecordReader preReader) throws 
IOException {
     super((JobConf)conf);
     CombineHiveInputSplit hsplit = split instanceof CombineHiveInputSplit ?
         (CombineHiveInputSplit) split :
@@ -67,6 +68,15 @@ public class CombineHiveRecordReader<K extends 
WritableComparable, V extends Wri
     this.setRecordReader(inputFormat.getRecordReader(fsplit, jobConf, 
reporter));
 
     this.initIOContext(fsplit, jobConf, inputFormatClass, this.recordReader);
+
+    //If current split is from the same file as preceding split and the 
preceding split has footerbuffer,
+    //the current split should use the preceding split's footerbuffer in order 
to skip footer correctly.
+    if (preReader != null && preReader instanceof CombineHiveRecordReader
+        && ((CombineHiveRecordReader)preReader).getFooterBuffer() != null) {
+      if (partition != 0 && hsplit.getPaths()[partition 
-1].equals(hsplit.getPaths()[partition]))
+        
this.setFooterBuffer(((CombineHiveRecordReader)preReader).getFooterBuffer());
+    }
+
   }
 
   @Override

http://git-wip-us.apache.org/repos/asf/hive/blob/b4980afd/ql/src/java/org/apache/hadoop/hive/ql/io/HiveContextAwareRecordReader.java
----------------------------------------------------------------------
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/io/HiveContextAwareRecordReader.java 
b/ql/src/java/org/apache/hadoop/hive/ql/io/HiveContextAwareRecordReader.java
index d602c76..46f9970 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/HiveContextAwareRecordReader.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/HiveContextAwareRecordReader.java
@@ -244,6 +244,15 @@ public abstract class HiveContextAwareRecordReader<K, V> 
implements RecordReader
   private int headerCount = 0;
   private int footerCount = 0;
 
+  protected FooterBuffer getFooterBuffer() {
+       return footerBuffer;
+  }
+
+  protected void setFooterBuffer( FooterBuffer buf) {
+    footerBuffer = buf;
+  }
+
+
   public boolean doNext(K key, V value) throws IOException {
     if (this.isSorted) {
       if (this.getIOContext().shouldEndBinarySearch() ||
@@ -308,6 +317,7 @@ public abstract class HiveContextAwareRecordReader<K, V> 
implements RecordReader
       if (this.ioCxtRef.getCurrentBlockStart() == 0) {
 
         // Check if the table file has header to skip.
+        footerBuffer = null;
         Path filePath = this.ioCxtRef.getInputPath();
         PartitionDesc part = null;
         try {

http://git-wip-us.apache.org/repos/asf/hive/blob/b4980afd/shims/common/src/main/java/org/apache/hadoop/hive/shims/HadoopShimsSecure.java
----------------------------------------------------------------------
diff --git 
a/shims/common/src/main/java/org/apache/hadoop/hive/shims/HadoopShimsSecure.java
 
b/shims/common/src/main/java/org/apache/hadoop/hive/shims/HadoopShimsSecure.java
index 224ce3b..018cb9f 100644
--- 
a/shims/common/src/main/java/org/apache/hadoop/hive/shims/HadoopShimsSecure.java
+++ 
b/shims/common/src/main/java/org/apache/hadoop/hive/shims/HadoopShimsSecure.java
@@ -118,7 +118,8 @@ public abstract class HadoopShimsSecure implements 
HadoopShims {
         InputSplit.class,
         Configuration.class,
         Reporter.class,
-        Integer.class
+        Integer.class,
+        RecordReader.class
         };
 
     protected CombineFileSplit split;
@@ -237,6 +238,7 @@ public abstract class HadoopShimsSecure implements 
HadoopShims {
      */
     protected boolean initNextRecordReader(K key) throws IOException {
 
+      RecordReader preReader = curReader; //it is OK, curReader is closed, for 
we only need footer buffer info from preReader.
       if (curReader != null) {
         curReader.close();
         curReader = null;
@@ -253,7 +255,7 @@ public abstract class HadoopShimsSecure implements 
HadoopShims {
       // get a record reader for the idx-th chunk
       try {
         curReader = rrConstructor.newInstance(new Object[]
-            {split, jc, reporter, Integer.valueOf(idx)});
+            {split, jc, reporter, Integer.valueOf(idx), preReader});
 
         // change the key if need be
         if (key != null) {

hive git commit: HIVE-15359: skip.footer.line.count doesnt work properly for certain situations (Yongzhi Chen, reviewed by Aihua Xu)

Reply via email to