Repository: hive Updated Branches: refs/heads/master b920fa735 -> b4980afd3
HIVE-15359: skip.footer.line.count doesnt work properly for certain situations (Yongzhi Chen, reviewed by Aihua Xu) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/b4980afd Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/b4980afd Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/b4980afd Branch: refs/heads/master Commit: b4980afd3500b7367a82dc1fc5cd01fe942c53d1 Parents: b920fa7 Author: Yongzhi Chen <ych...@apache.org> Authored: Mon Dec 5 17:45:41 2016 -0500 Committer: Yongzhi Chen <ych...@apache.org> Committed: Thu Dec 8 09:50:45 2016 -0500 ---------------------------------------------------------------------- .../hadoop/hive/ql/io/CombineHiveRecordReader.java | 12 +++++++++++- .../hadoop/hive/ql/io/HiveContextAwareRecordReader.java | 10 ++++++++++ .../org/apache/hadoop/hive/shims/HadoopShimsSecure.java | 6 ++++-- 3 files changed, 25 insertions(+), 3 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/b4980afd/ql/src/java/org/apache/hadoop/hive/ql/io/CombineHiveRecordReader.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/CombineHiveRecordReader.java b/ql/src/java/org/apache/hadoop/hive/ql/io/CombineHiveRecordReader.java index aa607cc..ba25573 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/CombineHiveRecordReader.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/CombineHiveRecordReader.java @@ -31,6 +31,7 @@ import org.apache.hadoop.mapred.InputFormat; import org.apache.hadoop.mapred.InputSplit; import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.mapred.Reporter; +import org.apache.hadoop.mapred.RecordReader; import org.apache.hadoop.mapred.lib.CombineFileSplit; /** @@ -43,7 +44,7 @@ public class CombineHiveRecordReader<K extends WritableComparable, V extends Wri extends HiveContextAwareRecordReader<K, V> { public CombineHiveRecordReader(InputSplit split, Configuration conf, - Reporter reporter, Integer partition) throws IOException { + Reporter reporter, Integer partition, RecordReader preReader) throws IOException { super((JobConf)conf); CombineHiveInputSplit hsplit = split instanceof CombineHiveInputSplit ? (CombineHiveInputSplit) split : @@ -67,6 +68,15 @@ public class CombineHiveRecordReader<K extends WritableComparable, V extends Wri this.setRecordReader(inputFormat.getRecordReader(fsplit, jobConf, reporter)); this.initIOContext(fsplit, jobConf, inputFormatClass, this.recordReader); + + //If current split is from the same file as preceding split and the preceding split has footerbuffer, + //the current split should use the preceding split's footerbuffer in order to skip footer correctly. + if (preReader != null && preReader instanceof CombineHiveRecordReader + && ((CombineHiveRecordReader)preReader).getFooterBuffer() != null) { + if (partition != 0 && hsplit.getPaths()[partition -1].equals(hsplit.getPaths()[partition])) + this.setFooterBuffer(((CombineHiveRecordReader)preReader).getFooterBuffer()); + } + } @Override http://git-wip-us.apache.org/repos/asf/hive/blob/b4980afd/ql/src/java/org/apache/hadoop/hive/ql/io/HiveContextAwareRecordReader.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/HiveContextAwareRecordReader.java b/ql/src/java/org/apache/hadoop/hive/ql/io/HiveContextAwareRecordReader.java index d602c76..46f9970 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/HiveContextAwareRecordReader.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/HiveContextAwareRecordReader.java @@ -244,6 +244,15 @@ public abstract class HiveContextAwareRecordReader<K, V> implements RecordReader private int headerCount = 0; private int footerCount = 0; + protected FooterBuffer getFooterBuffer() { + return footerBuffer; + } + + protected void setFooterBuffer( FooterBuffer buf) { + footerBuffer = buf; + } + + public boolean doNext(K key, V value) throws IOException { if (this.isSorted) { if (this.getIOContext().shouldEndBinarySearch() || @@ -308,6 +317,7 @@ public abstract class HiveContextAwareRecordReader<K, V> implements RecordReader if (this.ioCxtRef.getCurrentBlockStart() == 0) { // Check if the table file has header to skip. + footerBuffer = null; Path filePath = this.ioCxtRef.getInputPath(); PartitionDesc part = null; try { http://git-wip-us.apache.org/repos/asf/hive/blob/b4980afd/shims/common/src/main/java/org/apache/hadoop/hive/shims/HadoopShimsSecure.java ---------------------------------------------------------------------- diff --git a/shims/common/src/main/java/org/apache/hadoop/hive/shims/HadoopShimsSecure.java b/shims/common/src/main/java/org/apache/hadoop/hive/shims/HadoopShimsSecure.java index 224ce3b..018cb9f 100644 --- a/shims/common/src/main/java/org/apache/hadoop/hive/shims/HadoopShimsSecure.java +++ b/shims/common/src/main/java/org/apache/hadoop/hive/shims/HadoopShimsSecure.java @@ -118,7 +118,8 @@ public abstract class HadoopShimsSecure implements HadoopShims { InputSplit.class, Configuration.class, Reporter.class, - Integer.class + Integer.class, + RecordReader.class }; protected CombineFileSplit split; @@ -237,6 +238,7 @@ public abstract class HadoopShimsSecure implements HadoopShims { */ protected boolean initNextRecordReader(K key) throws IOException { + RecordReader preReader = curReader; //it is OK, curReader is closed, for we only need footer buffer info from preReader. if (curReader != null) { curReader.close(); curReader = null; @@ -253,7 +255,7 @@ public abstract class HadoopShimsSecure implements HadoopShims { // get a record reader for the idx-th chunk try { curReader = rrConstructor.newInstance(new Object[] - {split, jc, reporter, Integer.valueOf(idx)}); + {split, jc, reporter, Integer.valueOf(idx), preReader}); // change the key if need be if (key != null) {