Repository: hive
Updated Branches:
  refs/heads/master 29736e438 -> d31dc22ae


HIVE-15782: query on parquet table returns incorrect result when 
hive.optimize.index.filter is set to true (Aihua Xu, reviewed by Yongzhi Chen)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/d31dc22a
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/d31dc22a
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/d31dc22a

Branch: refs/heads/master
Commit: d31dc22ae5d9f0983d985f45551f20058777d524
Parents: 29736e4
Author: Aihua Xu <aihu...@apache.org>
Authored: Wed Feb 1 14:23:15 2017 -0500
Committer: Aihua Xu <aihu...@apache.org>
Committed: Mon Feb 6 09:21:39 2017 -0500

----------------------------------------------------------------------
 .../hive/ql/io/parquet/LeafFilterFactory.java   | 13 +++--
 .../read/ParquetFilterPredicateConverter.java   | 19 ++++----
 .../parquet/TestParquetRecordReaderWrapper.java | 39 +++++++--------
 .../clientpositive/parquet_ppd_multifiles.q     | 13 +++++
 .../clientpositive/parquet_ppd_multifiles.q.out | 50 ++++++++++++++++++++
 5 files changed, 100 insertions(+), 34 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/d31dc22a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/LeafFilterFactory.java
----------------------------------------------------------------------
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/LeafFilterFactory.java 
b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/LeafFilterFactory.java
index f95ebcd..3bd01f2 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/LeafFilterFactory.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/LeafFilterFactory.java
@@ -18,7 +18,7 @@ import org.slf4j.LoggerFactory;
 
 import org.apache.hadoop.hive.ql.io.sarg.PredicateLeaf;
 import org.apache.hadoop.hive.ql.io.sarg.PredicateLeaf.Operator;
-
+import org.apache.hadoop.hive.ql.metadata.HiveException;
 import org.apache.parquet.filter2.predicate.FilterApi;
 import org.apache.parquet.filter2.predicate.FilterPredicate;
 import org.apache.parquet.io.api.Binary;
@@ -167,9 +167,11 @@ public class LeafFilterFactory {
    * supported yet.
    * @param type FilterPredicateType
    * @return
+   * @throws HiveException Exception is thrown for unsupported data types so 
we can skip filtering
    */
-  public FilterPredicateLeafBuilder 
getLeafFilterBuilderByType(PredicateLeaf.Type type,
-                                                               Type 
parquetType){
+  public FilterPredicateLeafBuilder getLeafFilterBuilderByType(
+      PredicateLeaf.Type type,
+      Type parquetType) throws HiveException {
     switch (type){
       case LONG:
         if (parquetType.asPrimitiveType().getPrimitiveTypeName() ==
@@ -193,8 +195,9 @@ public class LeafFilterFactory {
       case DECIMAL:
       case TIMESTAMP:
       default:
-        LOG.debug("Conversion to Parquet FilterPredicate not supported for " + 
type);
-        return null;
+        String msg = "Conversion to Parquet FilterPredicate not supported for 
" + type;
+        LOG.debug(msg);
+        throw new HiveException(msg);
     }
   }
 }

http://git-wip-us.apache.org/repos/asf/hive/blob/d31dc22a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/read/ParquetFilterPredicateConverter.java
----------------------------------------------------------------------
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/read/ParquetFilterPredicateConverter.java
 
b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/read/ParquetFilterPredicateConverter.java
index 786a260..47777f8 100644
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/read/ParquetFilterPredicateConverter.java
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/read/ParquetFilterPredicateConverter.java
@@ -39,7 +39,8 @@ public class ParquetFilterPredicateConverter {
   /**
    * Translate the search argument to the filter predicate parquet uses. It 
includes
    * only the columns from the passed schema.
-   * @return translate the sarg into a filter predicate
+   * @return  a filter predicate translated from search argument. null is 
returned
+   *          if failed to convert.
    */
   public static FilterPredicate toFilterPredicate(SearchArgument sarg, 
MessageType schema) {
     Set<String> columns = null;
@@ -50,13 +51,17 @@ public class ParquetFilterPredicateConverter {
       }
     }
 
-    return translate(sarg.getExpression(), sarg.getLeaves(), columns, schema);
+    try {
+      return translate(sarg.getExpression(), sarg.getLeaves(), columns, 
schema);
+    } catch(Exception e) {
+      return null;
+    }
   }
 
   private static FilterPredicate translate(ExpressionTree root,
                                            List<PredicateLeaf> leaves,
                                            Set<String> columns,
-                                           MessageType schema) {
+                                           MessageType schema) throws 
Exception {
     FilterPredicate p = null;
     switch (root.getOperator()) {
       case OR:
@@ -113,15 +118,13 @@ public class ParquetFilterPredicateConverter {
   }
 
   private static FilterPredicate buildFilterPredicateFromPredicateLeaf
-      (PredicateLeaf leaf, Type parquetType) {
+      (PredicateLeaf leaf, Type parquetType) throws Exception {
     LeafFilterFactory leafFilterFactory = new LeafFilterFactory();
     FilterPredicateLeafBuilder builder;
     try {
       builder = leafFilterFactory
           .getLeafFilterBuilderByType(leaf.getType(), parquetType);
-      if (builder == null) {
-        return null;
-      }
+
       if (isMultiLiteralsOperator(leaf.getOperator())) {
         return builder.buildPredicate(leaf.getOperator(),
             leaf.getLiteralList(),
@@ -134,7 +137,7 @@ public class ParquetFilterPredicateConverter {
       }
     } catch (Exception e) {
       LOG.error("fail to build predicate filter leaf with errors" + e, e);
-      return null;
+      throw e;
     }
   }
 

http://git-wip-us.apache.org/repos/asf/hive/blob/d31dc22a/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/TestParquetRecordReaderWrapper.java
----------------------------------------------------------------------
diff --git 
a/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/TestParquetRecordReaderWrapper.java
 
b/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/TestParquetRecordReaderWrapper.java
index 35d0342..1d6fc31 100644
--- 
a/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/TestParquetRecordReaderWrapper.java
+++ 
b/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/TestParquetRecordReaderWrapper.java
@@ -18,20 +18,19 @@
 
 package org.apache.hadoop.hive.ql.io.parquet;
 
-import static junit.framework.Assert.assertEquals;
-
 import org.apache.hadoop.hive.common.type.HiveChar;
 import org.apache.hadoop.hive.common.type.HiveVarchar;
 import 
org.apache.hadoop.hive.ql.io.parquet.read.ParquetFilterPredicateConverter;
 import org.apache.hadoop.hive.ql.io.sarg.PredicateLeaf;
 import org.apache.hadoop.hive.ql.io.sarg.SearchArgument;
-import org.apache.hadoop.hive.ql.io.sarg.SearchArgument.TruthValue;
 import org.apache.hadoop.hive.ql.io.sarg.SearchArgumentFactory;
 import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable;
 import org.apache.parquet.schema.MessageType;
 import org.apache.parquet.schema.MessageTypeParser;
 import org.junit.Test;
 
+import static org.junit.Assert.assertEquals;
+
 import java.sql.Date;
 
 import org.apache.parquet.filter2.predicate.FilterPredicate;
@@ -41,10 +40,6 @@ import org.apache.parquet.filter2.predicate.FilterPredicate;
  */
 public class TestParquetRecordReaderWrapper {
 
-  private static TruthValue[] values(TruthValue... vals) {
-    return vals;
-  }
-
   @Test
   public void testBuilder() throws Exception {
      SearchArgument sarg = SearchArgumentFactory.newBuilder()
@@ -69,6 +64,10 @@ public class TestParquetRecordReaderWrapper {
     assertEquals(expected, p.toString());
   }
 
+  /**
+   * Check the converted filter predicate is null if unsupported types are 
included
+   * @throws Exception
+   */
   @Test
   public void testBuilderComplexTypes() throws Exception {
     SearchArgument sarg =
@@ -83,8 +82,8 @@ public class TestParquetRecordReaderWrapper {
             .build();
     MessageType schema = MessageTypeParser.parseMessageType("message test {" +
         " required int32 x; required binary y; required binary z;}");
-    assertEquals("lteq(y, Binary{\"hi        \"})",
-        ParquetFilterPredicateConverter.toFilterPredicate(sarg, 
schema).toString());
+    assertEquals(null,
+        ParquetFilterPredicateConverter.toFilterPredicate(sarg, schema));
 
     sarg = SearchArgumentFactory.newBuilder()
         .startNot()
@@ -101,13 +100,14 @@ public class TestParquetRecordReaderWrapper {
     schema = MessageTypeParser.parseMessageType("message test {" +
         " optional int32 x; required binary y; required int32 z;" +
         " optional binary a;}");
-    FilterPredicate p = 
ParquetFilterPredicateConverter.toFilterPredicate(sarg, schema);
-    String expected =
-        "and(and(not(eq(x, null)), not(or(or(eq(z, 1), eq(z, 2)), eq(z, 3)))), 
" +
-        "not(eq(a, Binary{\"stinger\"})))";
-    assertEquals(expected, p.toString());
+    assertEquals(null,
+        ParquetFilterPredicateConverter.toFilterPredicate(sarg, schema));
   }
 
+  /**
+   * Check the converted filter predicate is null if unsupported types are 
included
+   * @throws Exception
+   */
   @Test
   public void testBuilderComplexTypes2() throws Exception {
     SearchArgument sarg =
@@ -122,8 +122,8 @@ public class TestParquetRecordReaderWrapper {
             .build();
     MessageType schema = MessageTypeParser.parseMessageType("message test {" +
         " required int32 x; required binary y; required binary z;}");
-    assertEquals("lteq(y, Binary{\"hi        \"})",
-        ParquetFilterPredicateConverter.toFilterPredicate(sarg, 
schema).toString());
+    assertEquals(null,
+        ParquetFilterPredicateConverter.toFilterPredicate(sarg, schema));
 
     sarg = SearchArgumentFactory.newBuilder()
         .startNot()
@@ -140,11 +140,8 @@ public class TestParquetRecordReaderWrapper {
     schema = MessageTypeParser.parseMessageType("message test {" +
         " optional int32 x; required binary y; required int32 z;" +
         " optional binary a;}");
-
-    FilterPredicate p = 
ParquetFilterPredicateConverter.toFilterPredicate(sarg, schema);
-    String expected = "and(and(not(eq(x, null)), not(or(or(eq(z, 1), eq(z, 
2)), eq(z, 3)))), " +
-        "not(eq(a, Binary{\"stinger\"})))";
-    assertEquals(expected, p.toString());
+    assertEquals(null,
+        ParquetFilterPredicateConverter.toFilterPredicate(sarg, schema));
   }
 
   @Test

http://git-wip-us.apache.org/repos/asf/hive/blob/d31dc22a/ql/src/test/queries/clientpositive/parquet_ppd_multifiles.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/parquet_ppd_multifiles.q 
b/ql/src/test/queries/clientpositive/parquet_ppd_multifiles.q
new file mode 100644
index 0000000..6483684
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/parquet_ppd_multifiles.q
@@ -0,0 +1,13 @@
+CREATE TABLE parquet_ppd_multifiles (
+  name string,
+  dec decimal(5,0)
+) stored as parquet;
+
+insert into table parquet_ppd_multifiles values('Jim', 3);
+insert into table parquet_ppd_multifiles values('Tom', 5);
+
+set hive.optimize.index.filter=false;
+select * from parquet_ppd_multifiles where (name = 'Jim' or dec = 5);
+
+set hive.optimize.index.filter=true;
+select * from parquet_ppd_multifiles where (name = 'Jim' or dec = 5);

http://git-wip-us.apache.org/repos/asf/hive/blob/d31dc22a/ql/src/test/results/clientpositive/parquet_ppd_multifiles.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/parquet_ppd_multifiles.q.out 
b/ql/src/test/results/clientpositive/parquet_ppd_multifiles.q.out
new file mode 100644
index 0000000..d7688f8
--- /dev/null
+++ b/ql/src/test/results/clientpositive/parquet_ppd_multifiles.q.out
@@ -0,0 +1,50 @@
+PREHOOK: query: CREATE TABLE parquet_ppd_multifiles (
+  name string,
+  dec decimal(5,0)
+) stored as parquet
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@parquet_ppd_multifiles
+POSTHOOK: query: CREATE TABLE parquet_ppd_multifiles (
+  name string,
+  dec decimal(5,0)
+) stored as parquet
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@parquet_ppd_multifiles
+PREHOOK: query: insert into table parquet_ppd_multifiles values('Jim', 3)
+PREHOOK: type: QUERY
+PREHOOK: Output: default@parquet_ppd_multifiles
+POSTHOOK: query: insert into table parquet_ppd_multifiles values('Jim', 3)
+POSTHOOK: type: QUERY
+POSTHOOK: Output: default@parquet_ppd_multifiles
+POSTHOOK: Lineage: parquet_ppd_multifiles.dec EXPRESSION 
[(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, 
type:string, comment:), ]
+POSTHOOK: Lineage: parquet_ppd_multifiles.name SIMPLE 
[(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, 
type:string, comment:), ]
+PREHOOK: query: insert into table parquet_ppd_multifiles values('Tom', 5)
+PREHOOK: type: QUERY
+PREHOOK: Output: default@parquet_ppd_multifiles
+POSTHOOK: query: insert into table parquet_ppd_multifiles values('Tom', 5)
+POSTHOOK: type: QUERY
+POSTHOOK: Output: default@parquet_ppd_multifiles
+POSTHOOK: Lineage: parquet_ppd_multifiles.dec EXPRESSION 
[(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col2, 
type:string, comment:), ]
+POSTHOOK: Lineage: parquet_ppd_multifiles.name SIMPLE 
[(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, 
type:string, comment:), ]
+PREHOOK: query: select * from parquet_ppd_multifiles where (name = 'Jim' or 
dec = 5)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@parquet_ppd_multifiles
+#### A masked pattern was here ####
+POSTHOOK: query: select * from parquet_ppd_multifiles where (name = 'Jim' or 
dec = 5)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@parquet_ppd_multifiles
+#### A masked pattern was here ####
+Jim    3
+Tom    5
+PREHOOK: query: select * from parquet_ppd_multifiles where (name = 'Jim' or 
dec = 5)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@parquet_ppd_multifiles
+#### A masked pattern was here ####
+POSTHOOK: query: select * from parquet_ppd_multifiles where (name = 'Jim' or 
dec = 5)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@parquet_ppd_multifiles
+#### A masked pattern was here ####
+Jim    3
+Tom    5

Reply via email to