Repository: hive
Updated Branches:
  refs/heads/master 87414f37e -> 35278429d


HIVE-20719: SELECT statement fails after UPDATE with 
hive.optimize.sort.dynamic.partition optimization and vectorization on (Eugene 
Koifman, reviewed by Gopal V)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/3c6a36b9
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/3c6a36b9
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/3c6a36b9

Branch: refs/heads/master
Commit: 3c6a36b99d609b1baf5023fc5e801f74486bbb54
Parents: 87414f3
Author: Eugene Koifman <ekoif...@apache.org>
Authored: Sat Oct 13 16:38:13 2018 -0700
Committer: Eugene Koifman <ekoif...@apache.org>
Committed: Sat Oct 13 16:38:13 2018 -0700

----------------------------------------------------------------------
 .../optimizer/SortedDynPartitionOptimizer.java  |  5 +-
 .../apache/hadoop/hive/ql/TestTxnCommands3.java | 61 ++++++++++++++++++++
 2 files changed, 64 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/3c6a36b9/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SortedDynPartitionOptimizer.java
----------------------------------------------------------------------
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SortedDynPartitionOptimizer.java
 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SortedDynPartitionOptimizer.java
index 2dc2351..314b8b4 100644
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SortedDynPartitionOptimizer.java
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SortedDynPartitionOptimizer.java
@@ -54,6 +54,7 @@ import org.apache.hadoop.hive.ql.lib.RuleRegExp;
 import org.apache.hadoop.hive.ql.metadata.Table;
 import org.apache.hadoop.hive.ql.metadata.VirtualColumn;
 import org.apache.hadoop.hive.ql.parse.ParseContext;
+import org.apache.hadoop.hive.ql.parse.ParseUtils;
 import org.apache.hadoop.hive.ql.parse.SemanticException;
 import org.apache.hadoop.hive.ql.plan.DynamicPartitionCtx;
 import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc;
@@ -208,8 +209,8 @@ public class SortedDynPartitionOptimizer extends Transform {
         if(!VirtualColumn.ROWID.getTypeInfo().equals(ci.getType())) {
           throw new IllegalStateException("expected 1st column to be ROW__ID 
but got wrong type: " + ci.toString());
         }
-        //HIVE-17328: not sure this is correct... I don't think is gets 
wrapped in UDFToInteger....
-        bucketColumns.add(new ExprNodeColumnDesc(ci));
+        //add a cast(ROW__ID as int) to wrap in UDFToInteger()
+        bucketColumns.add(ParseUtils.createConversionCast(new 
ExprNodeColumnDesc(ci), TypeInfoFactory.intTypeInfo));
       } else {
         if (!destTable.getSortCols().isEmpty()) {
           // Sort columns specified by table

http://git-wip-us.apache.org/repos/asf/hive/blob/3c6a36b9/ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands3.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands3.java 
b/ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands3.java
index a25406d..833e637 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands3.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands3.java
@@ -222,4 +222,65 @@ public class TestTxnCommands3 extends 
TxnCommandsBaseForTests {
     rs = runStatementOnDriver("select a, b from T order by a, b");
     Assert.assertEquals(stringifyValues(dataAll), rs);
   }
+
+  /**
+   * Test that rows are routed to proper files based on bucket col/ROW__ID
+   * Only the Vectorized Acid Reader checks if bucketId in ROW__ID inside the 
file
+   * matches the file name and only for files in delete_delta
+   */
+  @Test
+  public void testSdpoBucketed() throws Exception {
+    testSdpoBucketed(true, true, 1);
+    testSdpoBucketed(true, false, 1);
+    testSdpoBucketed(false, true, 1);
+    testSdpoBucketed(false, false,1);
+
+    testSdpoBucketed(true, true, 2);
+    testSdpoBucketed(true, false, 2);
+    testSdpoBucketed(false, true, 2);
+    testSdpoBucketed(false, false,2);
+  }
+  private void testSdpoBucketed(boolean isVectorized, boolean isSdpo, int 
bucketing_version)
+      throws Exception {
+    hiveConf.setBoolVar(HiveConf.ConfVars.HIVE_VECTORIZATION_ENABLED, 
isVectorized);
+    hiveConf.setBoolVar(HiveConf.ConfVars.HIVEOPTSORTDYNAMICPARTITION, isSdpo);
+    runStatementOnDriver("drop table if exists acid_uap");
+    runStatementOnDriver("create transactional table acid_uap(a int, b 
varchar(128)) " +
+        "partitioned by (ds string) clustered by (a) into 2 buckets stored as 
orc TBLPROPERTIES " +
+        "('bucketing_version'='" + bucketing_version + "')");
+    runStatementOnDriver("insert into table acid_uap partition (ds='tomorrow') 
" +
+        "values (1, 'bah'),(2, 'yah')");
+    runStatementOnDriver("insert into table acid_uap partition (ds='today') " +
+        "values (1, 'bah'),(2, 'yah')");
+    runStatementOnDriver("select a,b, ds from acid_uap order by a,b, ds");
+
+    String testQuery = isVectorized ?
+        "select ROW__ID, a, b, ds from acid_uap order by ds, a, b" :
+        "select ROW__ID, a, b, ds, INPUT__FILE__NAME from acid_uap order by 
ds, a, b";
+    String[][] expected = new String[][]{
+        {"{\"writeid\":2,\"bucketid\":536936448,\"rowid\":0}\t1\tbah\ttoday",
+            
"warehouse/acid_uap/ds=today/delta_0000002_0000002_0000/bucket_00001"},
+        {"{\"writeid\":2,\"bucketid\":536870912,\"rowid\":0}\t2\tyah\ttoday",
+            
"warehouse/acid_uap/ds=today/delta_0000002_0000002_0000/bucket_00000"},
+
+        
{"{\"writeid\":1,\"bucketid\":536936448,\"rowid\":0}\t1\tbah\ttomorrow",
+            
"warehouse/acid_uap/ds=tomorrow/delta_0000001_0000001_0000/bucket_00001"},
+        
{"{\"writeid\":1,\"bucketid\":536870912,\"rowid\":0}\t2\tyah\ttomorrow",
+            
"warehouse/acid_uap/ds=tomorrow/delta_0000001_0000001_0000/bucket_00000"}};
+    checkResult(expected, testQuery, isVectorized, "after insert", LOG);
+
+    runStatementOnDriver("update acid_uap set b = 'fred'");
+
+    String[][] expected2 = new String[][]{
+        {"{\"writeid\":3,\"bucketid\":536936448,\"rowid\":0}\t1\tfred\ttoday",
+            
"warehouse/acid_uap/ds=today/delta_0000003_0000003_0000/bucket_00001"},
+        {"{\"writeid\":3,\"bucketid\":536870912,\"rowid\":0}\t2\tfred\ttoday",
+            
"warehouse/acid_uap/ds=today/delta_0000003_0000003_0000/bucket_00000"},
+
+        
{"{\"writeid\":3,\"bucketid\":536936448,\"rowid\":0}\t1\tfred\ttomorrow",
+            
"warehouse/acid_uap/ds=tomorrow/delta_0000003_0000003_0000/bucket_00001"},
+        
{"{\"writeid\":3,\"bucketid\":536870912,\"rowid\":0}\t2\tfred\ttomorrow",
+            
"warehouse/acid_uap/ds=tomorrow/delta_0000003_0000003_0000/bucket_00000"}};
+    checkResult(expected2, testQuery, isVectorized, "after update", LOG);
+  }
 }

Reply via email to