Repository: hive Updated Branches: refs/heads/master 1b0bbb89a -> 0b46f4e9a
HIVE-14448: Queries with predicate fail when ETL split strategy is chosen for ACID tables (Matt McCline, reviewed by Sergey Shelukhin) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/0b46f4e9 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/0b46f4e9 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/0b46f4e9 Branch: refs/heads/master Commit: 0b46f4e9acbf8097b7f0a1f699a965ffc25caf7a Parents: 1b0bbb8 Author: Matt McCline <[email protected]> Authored: Fri Aug 12 23:00:42 2016 -0700 Committer: Matt McCline <[email protected]> Committed: Fri Aug 12 23:00:42 2016 -0700 ---------------------------------------------------------------------- .../hadoop/hive/ql/io/orc/OrcInputFormat.java | 22 ++++++++---- .../apache/hadoop/hive/ql/TestTxnCommands2.java | 35 ++++++++++++++++++++ 2 files changed, 50 insertions(+), 7 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/0b46f4e9/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java index 6261a14..969b73b 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java @@ -1159,7 +1159,7 @@ public class OrcInputFormat implements InputFormat<NullWritable, OrcStruct>, private final long blockSize; private final TreeMap<Long, BlockLocation> locations; private OrcTail orcTail; - private final List<OrcProto.Type> readerTypes; + private List<OrcProto.Type> readerTypes; private List<StripeInformation> stripes; private List<StripeStatistics> stripeStats; private List<OrcProto.Type> fileTypes; @@ -1492,6 +1492,11 @@ public class OrcInputFormat implements InputFormat<NullWritable, OrcStruct>, } TypeDescription readerSchema = OrcUtils.convertTypeFromProtobuf(readerTypes, 0); evolution = new SchemaEvolution(fileSchema, readerSchema, readerIncluded); + if (!isOriginal) { + // The SchemaEvolution class has added the ACID metadata columns. Let's update our + // readerTypes so PPD code will work correctly. + readerTypes = OrcUtils.getOrcTypes(evolution.getReaderSchema()); + } } writerVersion = orcTail.getWriterVersion(); List<OrcProto.ColumnStatistics> fileColStats = orcTail.getFooter().getStatisticsList(); @@ -1508,21 +1513,24 @@ public class OrcInputFormat implements InputFormat<NullWritable, OrcStruct>, } } } - projColsUncompressedSize = computeProjectionSize(fileTypes, fileColStats, fileIncluded, - isOriginal); + projColsUncompressedSize = computeProjectionSize(fileTypes, fileColStats, fileIncluded); if (!context.footerInSplits) { orcTail = null; } } private long computeProjectionSize(List<OrcProto.Type> fileTypes, - List<OrcProto.ColumnStatistics> stats, boolean[] fileIncluded, boolean isOriginal) { - final int rootIdx = getRootColumn(isOriginal); + List<OrcProto.ColumnStatistics> stats, boolean[] fileIncluded) { List<Integer> internalColIds = Lists.newArrayList(); - if (fileIncluded != null) { + if (fileIncluded == null) { + // Add all. + for (int i = 0; i < fileTypes.size(); i++) { + internalColIds.add(i); + } + } else { for (int i = 0; i < fileIncluded.length; i++) { if (fileIncluded[i]) { - internalColIds.add(rootIdx + i); + internalColIds.add(i); } } } http://git-wip-us.apache.org/repos/asf/hive/blob/0b46f4e9/ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands2.java ---------------------------------------------------------------------- diff --git a/ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands2.java b/ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands2.java index f499980..949e071 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands2.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands2.java @@ -1298,6 +1298,41 @@ public class TestTxnCommands2 { Assert.assertEquals(Arrays.asList(expectedResult), rs); } + @Test + public void testETLSplitStrategyForACID() throws Exception { + hiveConf.setVar(HiveConf.ConfVars.HIVE_ORC_SPLIT_STRATEGY, "ETL"); + hiveConf.setBoolVar(HiveConf.ConfVars.HIVEOPTINDEXFILTER, true); + runStatementOnDriver("insert into " + Table.ACIDTBL + " values(1,2)"); + runStatementOnDriver("alter table " + Table.ACIDTBL + " compact 'MAJOR'"); + runWorker(hiveConf); + List<String> rs = runStatementOnDriver("select * from " + Table.ACIDTBL + " where a = 1"); + int[][] resultData = new int[][] {{1,2}}; + Assert.assertEquals(stringifyValues(resultData), rs); + } + + @Test + public void testAcidWithSchemaEvolution() throws Exception { + hiveConf.setVar(HiveConf.ConfVars.HIVE_ORC_SPLIT_STRATEGY, "ETL"); + String tblName = "acidTblWithSchemaEvol"; + runStatementOnDriver("drop table if exists " + tblName); + runStatementOnDriver("CREATE TABLE " + tblName + "(a INT, b STRING) " + + " CLUSTERED BY(a) INTO 2 BUCKETS" + //currently ACID requires table to be bucketed + " STORED AS ORC TBLPROPERTIES ('transactional'='true')"); + + runStatementOnDriver("INSERT INTO " + tblName + " VALUES (1, 'foo'), (2, 'bar')"); + + // Major compact to create a base that has ACID schema. + runStatementOnDriver("ALTER TABLE " + tblName + " COMPACT 'MAJOR'"); + runWorker(hiveConf); + + // Alter table for perform schema evolution. + runStatementOnDriver("ALTER TABLE " + tblName + " ADD COLUMNS(c int)"); + + // Validate there is an added NULL for column c. + List<String> rs = runStatementOnDriver("SELECT * FROM " + tblName + " ORDER BY a"); + String[] expectedResult = { "1\tfoo\tNULL", "2\tbar\tNULL" }; + Assert.assertEquals(Arrays.asList(expectedResult), rs); + } /** * takes raw data and turns it into a string as if from Driver.getResults()
