hive git commit: HIVE-14448: Queries with predicate fail when ETL split strategy is chosen for ACID tables (Matt McCline, reviewed by Sergey Shelukhin)

mmccline Fri, 12 Aug 2016 23:01:24 -0700

Repository: hive
Updated Branches:
  refs/heads/master 1b0bbb89a -> 0b46f4e9a



HIVE-14448: Queries with predicate fail when ETL split strategy is chosen for 
ACID tables (Matt McCline, reviewed by Sergey Shelukhin)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/0b46f4e9
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/0b46f4e9
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/0b46f4e9

Branch: refs/heads/master
Commit: 0b46f4e9acbf8097b7f0a1f699a965ffc25caf7a
Parents: 1b0bbb8
Author: Matt McCline <[email protected]>
Authored: Fri Aug 12 23:00:42 2016 -0700
Committer: Matt McCline <[email protected]>
Committed: Fri Aug 12 23:00:42 2016 -0700

----------------------------------------------------------------------
 .../hadoop/hive/ql/io/orc/OrcInputFormat.java   | 22 ++++++++----
 .../apache/hadoop/hive/ql/TestTxnCommands2.java | 35 ++++++++++++++++++++
 2 files changed, 50 insertions(+), 7 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/0b46f4e9/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java 
b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
index 6261a14..969b73b 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
@@ -1159,7 +1159,7 @@ public class OrcInputFormat implements 
InputFormat<NullWritable, OrcStruct>,
     private final long blockSize;
     private final TreeMap<Long, BlockLocation> locations;
     private OrcTail orcTail;
-    private final List<OrcProto.Type> readerTypes;
+    private List<OrcProto.Type> readerTypes;
     private List<StripeInformation> stripes;
     private List<StripeStatistics> stripeStats;
     private List<OrcProto.Type> fileTypes;
@@ -1492,6 +1492,11 @@ public class OrcInputFormat implements 
InputFormat<NullWritable, OrcStruct>,
         }
         TypeDescription readerSchema = 
OrcUtils.convertTypeFromProtobuf(readerTypes, 0);
         evolution = new SchemaEvolution(fileSchema, readerSchema, 
readerIncluded);
+        if (!isOriginal) {
+          // The SchemaEvolution class has added the ACID metadata columns.  
Let's update our
+          // readerTypes so PPD code will work correctly.
+          readerTypes = OrcUtils.getOrcTypes(evolution.getReaderSchema());
+        }
       }
       writerVersion = orcTail.getWriterVersion();
       List<OrcProto.ColumnStatistics> fileColStats = 
orcTail.getFooter().getStatisticsList();
@@ -1508,21 +1513,24 @@ public class OrcInputFormat implements 
InputFormat<NullWritable, OrcStruct>,
           }
         }
       }
-      projColsUncompressedSize = computeProjectionSize(fileTypes, 
fileColStats, fileIncluded,
-          isOriginal);
+      projColsUncompressedSize = computeProjectionSize(fileTypes, 
fileColStats, fileIncluded);
       if (!context.footerInSplits) {
         orcTail = null;
       }
     }
 
     private long computeProjectionSize(List<OrcProto.Type> fileTypes,
-        List<OrcProto.ColumnStatistics> stats, boolean[] fileIncluded, boolean 
isOriginal) {
-      final int rootIdx = getRootColumn(isOriginal);
+        List<OrcProto.ColumnStatistics> stats, boolean[] fileIncluded) {
       List<Integer> internalColIds = Lists.newArrayList();
-      if (fileIncluded != null) {
+      if (fileIncluded == null) {
+        // Add all.
+        for (int i = 0; i < fileTypes.size(); i++) {
+          internalColIds.add(i);
+        }
+      } else {
         for (int i = 0; i < fileIncluded.length; i++) {
           if (fileIncluded[i]) {
-            internalColIds.add(rootIdx + i);
+            internalColIds.add(i);
           }
         }
       }

http://git-wip-us.apache.org/repos/asf/hive/blob/0b46f4e9/ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands2.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands2.java 
b/ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands2.java
index f499980..949e071 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands2.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands2.java
@@ -1298,6 +1298,41 @@ public class TestTxnCommands2 {
     Assert.assertEquals(Arrays.asList(expectedResult), rs);
   }
 
+  @Test
+  public void testETLSplitStrategyForACID() throws Exception {
+    hiveConf.setVar(HiveConf.ConfVars.HIVE_ORC_SPLIT_STRATEGY, "ETL");
+    hiveConf.setBoolVar(HiveConf.ConfVars.HIVEOPTINDEXFILTER, true);
+    runStatementOnDriver("insert into " + Table.ACIDTBL + " values(1,2)");
+    runStatementOnDriver("alter table " + Table.ACIDTBL + " compact 'MAJOR'");
+    runWorker(hiveConf);
+    List<String> rs = runStatementOnDriver("select * from " +  Table.ACIDTBL  
+ " where a = 1");
+    int[][] resultData = new int[][] {{1,2}};
+    Assert.assertEquals(stringifyValues(resultData), rs);
+  }
+
+  @Test
+  public void testAcidWithSchemaEvolution() throws Exception {
+    hiveConf.setVar(HiveConf.ConfVars.HIVE_ORC_SPLIT_STRATEGY, "ETL");
+    String tblName = "acidTblWithSchemaEvol";
+    runStatementOnDriver("drop table if exists " + tblName);
+    runStatementOnDriver("CREATE TABLE " + tblName + "(a INT, b STRING) " +
+      " CLUSTERED BY(a) INTO 2 BUCKETS" + //currently ACID requires table to 
be bucketed
+      " STORED AS ORC TBLPROPERTIES ('transactional'='true')");
+
+    runStatementOnDriver("INSERT INTO " + tblName + " VALUES (1, 'foo'), (2, 
'bar')");
+
+    // Major compact to create a base that has ACID schema.
+    runStatementOnDriver("ALTER TABLE " + tblName + " COMPACT 'MAJOR'");
+    runWorker(hiveConf);
+
+    // Alter table for perform schema evolution.
+    runStatementOnDriver("ALTER TABLE " + tblName + " ADD COLUMNS(c int)");
+
+    // Validate there is an added NULL for column c.
+    List<String> rs = runStatementOnDriver("SELECT * FROM " + tblName + " 
ORDER BY a");
+    String[] expectedResult = { "1\tfoo\tNULL", "2\tbar\tNULL" };
+    Assert.assertEquals(Arrays.asList(expectedResult), rs);
+  }
 
   /**
    * takes raw data and turns it into a string as if from Driver.getResults()

hive git commit: HIVE-14448: Queries with predicate fail when ETL split strategy is chosen for ACID tables (Matt McCline, reviewed by Sergey Shelukhin)

Reply via email to