Author: prasanthj Date: Wed Sep 10 18:16:36 2014 New Revision: 1624087 URL: http://svn.apache.org/r1624087 Log: HIVE-7818: Support boolean PPD for ORC (Daniel Dai reviewed by Prasanth Jayachandran)
Added: hive/trunk/ql/src/test/queries/clientpositive/orc_ppd_boolean.q hive/trunk/ql/src/test/results/clientpositive/orc_ppd_boolean.q.out Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgumentImpl.java hive/trunk/serde/src/java/org/apache/hadoop/hive/ql/io/sarg/PredicateLeaf.java Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java?rev=1624087&r1=1624086&r2=1624087&view=diff ============================================================================== --- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java (original) +++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java Wed Sep 10 18:16:36 2014 @@ -2336,6 +2336,12 @@ class RecordReaderImpl implements Record return ((DecimalColumnStatistics) index).getMaximum(); } else if (index instanceof TimestampColumnStatistics) { return ((TimestampColumnStatistics) index).getMaximum(); + } else if (index instanceof BooleanColumnStatistics) { + if (((BooleanColumnStatistics)index).getTrueCount()!=0) { + return "true"; + } else { + return "false"; + } } else { return null; } @@ -2360,6 +2366,12 @@ class RecordReaderImpl implements Record return ((DecimalColumnStatistics) index).getMinimum(); } else if (index instanceof TimestampColumnStatistics) { return ((TimestampColumnStatistics) index).getMinimum(); + } else if (index instanceof BooleanColumnStatistics) { + if (((BooleanColumnStatistics)index).getFalseCount()!=0) { + return "false"; + } else { + return "true"; + } } else { return null; } Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgumentImpl.java URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgumentImpl.java?rev=1624087&r1=1624086&r2=1624087&view=diff ============================================================================== --- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgumentImpl.java (original) +++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgumentImpl.java Wed Sep 10 18:16:36 2014 @@ -331,6 +331,8 @@ final class SearchArgumentImpl implement return PredicateLeaf.Type.TIMESTAMP; case DECIMAL: return PredicateLeaf.Type.DECIMAL; + case BOOLEAN: + return PredicateLeaf.Type.BOOLEAN; default: } } @@ -368,6 +370,7 @@ final class SearchArgumentImpl implement case DATE: case TIMESTAMP: case DECIMAL: + case BOOLEAN: return lit; default: throw new IllegalArgumentException("Unknown literal " + getType(lit)); @@ -963,7 +966,8 @@ final class SearchArgumentImpl implement literal instanceof DateWritable || literal instanceof Timestamp || literal instanceof HiveDecimal || - literal instanceof BigDecimal) { + literal instanceof BigDecimal || + literal instanceof Boolean) { return literal; } else if (literal instanceof HiveChar || literal instanceof HiveVarchar) { @@ -1000,6 +1004,8 @@ final class SearchArgumentImpl implement }else if (literal instanceof HiveDecimal || literal instanceof BigDecimal) { return PredicateLeaf.Type.DECIMAL; + } else if (literal instanceof Boolean) { + return PredicateLeaf.Type.BOOLEAN; } throw new IllegalArgumentException("Unknown type for literal " + literal); } Added: hive/trunk/ql/src/test/queries/clientpositive/orc_ppd_boolean.q URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/orc_ppd_boolean.q?rev=1624087&view=auto ============================================================================== --- hive/trunk/ql/src/test/queries/clientpositive/orc_ppd_boolean.q (added) +++ hive/trunk/ql/src/test/queries/clientpositive/orc_ppd_boolean.q Wed Sep 10 18:16:36 2014 @@ -0,0 +1,34 @@ +SET hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat; +SET mapred.min.split.size=1000; +SET mapred.max.split.size=5000; + +create table newtypesorc(c char(10), v varchar(10), d decimal(5,3), b boolean) stored as orc tblproperties("orc.stripe.size"="16777216"); + +insert overwrite table newtypesorc select * from (select cast("apple" as char(10)), cast("bee" as varchar(10)), 0.22, true from src src1 union all select cast("hello" as char(10)), cast("world" as varchar(10)), 11.22, false from src src2) uniontbl; + +set hive.optimize.index.filter=false; + +-- char data types (EQUAL, NOT_EQUAL, LESS_THAN, LESS_THAN_EQUALS, IN, BETWEEN tests) +select sum(hash(*)) from newtypesorc where b=true; + +set hive.optimize.index.filter=true; +select sum(hash(*)) from newtypesorc where b=false; + +set hive.optimize.index.filter=false; +select sum(hash(*)) from newtypesorc where b!=true; + +set hive.optimize.index.filter=true; +select sum(hash(*)) from newtypesorc where b!=false; + +set hive.optimize.index.filter=false; +select sum(hash(*)) from newtypesorc where b<true; + +set hive.optimize.index.filter=true; +select sum(hash(*)) from newtypesorc where b<false; + +set hive.optimize.index.filter=false; +select sum(hash(*)) from newtypesorc where b<=true; + +set hive.optimize.index.filter=true; +select sum(hash(*)) from newtypesorc where b<=false; + Added: hive/trunk/ql/src/test/results/clientpositive/orc_ppd_boolean.q.out URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/orc_ppd_boolean.q.out?rev=1624087&view=auto ============================================================================== --- hive/trunk/ql/src/test/results/clientpositive/orc_ppd_boolean.q.out (added) +++ hive/trunk/ql/src/test/results/clientpositive/orc_ppd_boolean.q.out Wed Sep 10 18:16:36 2014 @@ -0,0 +1,94 @@ +PREHOOK: query: create table newtypesorc(c char(10), v varchar(10), d decimal(5,3), b boolean) stored as orc tblproperties("orc.stripe.size"="16777216") +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@newtypesorc +POSTHOOK: query: create table newtypesorc(c char(10), v varchar(10), d decimal(5,3), b boolean) stored as orc tblproperties("orc.stripe.size"="16777216") +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@newtypesorc +PREHOOK: query: insert overwrite table newtypesorc select * from (select cast("apple" as char(10)), cast("bee" as varchar(10)), 0.22, true from src src1 union all select cast("hello" as char(10)), cast("world" as varchar(10)), 11.22, false from src src2) uniontbl +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@newtypesorc +POSTHOOK: query: insert overwrite table newtypesorc select * from (select cast("apple" as char(10)), cast("bee" as varchar(10)), 0.22, true from src src1 union all select cast("hello" as char(10)), cast("world" as varchar(10)), 11.22, false from src src2) uniontbl +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@newtypesorc +POSTHOOK: Lineage: newtypesorc.b EXPRESSION [] +POSTHOOK: Lineage: newtypesorc.c EXPRESSION [] +POSTHOOK: Lineage: newtypesorc.d EXPRESSION [] +POSTHOOK: Lineage: newtypesorc.v EXPRESSION [] +PREHOOK: query: -- char data types (EQUAL, NOT_EQUAL, LESS_THAN, LESS_THAN_EQUALS, IN, BETWEEN tests) +select sum(hash(*)) from newtypesorc where b=true +PREHOOK: type: QUERY +PREHOOK: Input: default@newtypesorc +#### A masked pattern was here #### +POSTHOOK: query: -- char data types (EQUAL, NOT_EQUAL, LESS_THAN, LESS_THAN_EQUALS, IN, BETWEEN tests) +select sum(hash(*)) from newtypesorc where b=true +POSTHOOK: type: QUERY +POSTHOOK: Input: default@newtypesorc +#### A masked pattern was here #### +-252951953500 +PREHOOK: query: select sum(hash(*)) from newtypesorc where b=false +PREHOOK: type: QUERY +PREHOOK: Input: default@newtypesorc +#### A masked pattern was here #### +POSTHOOK: query: select sum(hash(*)) from newtypesorc where b=false +POSTHOOK: type: QUERY +POSTHOOK: Input: default@newtypesorc +#### A masked pattern was here #### +334427776000 +PREHOOK: query: select sum(hash(*)) from newtypesorc where b!=true +PREHOOK: type: QUERY +PREHOOK: Input: default@newtypesorc +#### A masked pattern was here #### +POSTHOOK: query: select sum(hash(*)) from newtypesorc where b!=true +POSTHOOK: type: QUERY +POSTHOOK: Input: default@newtypesorc +#### A masked pattern was here #### +334427776000 +PREHOOK: query: select sum(hash(*)) from newtypesorc where b!=false +PREHOOK: type: QUERY +PREHOOK: Input: default@newtypesorc +#### A masked pattern was here #### +POSTHOOK: query: select sum(hash(*)) from newtypesorc where b!=false +POSTHOOK: type: QUERY +POSTHOOK: Input: default@newtypesorc +#### A masked pattern was here #### +-252951953500 +PREHOOK: query: select sum(hash(*)) from newtypesorc where b<true +PREHOOK: type: QUERY +PREHOOK: Input: default@newtypesorc +#### A masked pattern was here #### +POSTHOOK: query: select sum(hash(*)) from newtypesorc where b<true +POSTHOOK: type: QUERY +POSTHOOK: Input: default@newtypesorc +#### A masked pattern was here #### +334427776000 +PREHOOK: query: select sum(hash(*)) from newtypesorc where b<false +PREHOOK: type: QUERY +PREHOOK: Input: default@newtypesorc +#### A masked pattern was here #### +POSTHOOK: query: select sum(hash(*)) from newtypesorc where b<false +POSTHOOK: type: QUERY +POSTHOOK: Input: default@newtypesorc +#### A masked pattern was here #### +NULL +PREHOOK: query: select sum(hash(*)) from newtypesorc where b<=true +PREHOOK: type: QUERY +PREHOOK: Input: default@newtypesorc +#### A masked pattern was here #### +POSTHOOK: query: select sum(hash(*)) from newtypesorc where b<=true +POSTHOOK: type: QUERY +POSTHOOK: Input: default@newtypesorc +#### A masked pattern was here #### +81475822500 +PREHOOK: query: select sum(hash(*)) from newtypesorc where b<=false +PREHOOK: type: QUERY +PREHOOK: Input: default@newtypesorc +#### A masked pattern was here #### +POSTHOOK: query: select sum(hash(*)) from newtypesorc where b<=false +POSTHOOK: type: QUERY +POSTHOOK: Input: default@newtypesorc +#### A masked pattern was here #### +334427776000 Modified: hive/trunk/serde/src/java/org/apache/hadoop/hive/ql/io/sarg/PredicateLeaf.java URL: http://svn.apache.org/viewvc/hive/trunk/serde/src/java/org/apache/hadoop/hive/ql/io/sarg/PredicateLeaf.java?rev=1624087&r1=1624086&r2=1624087&view=diff ============================================================================== --- hive/trunk/serde/src/java/org/apache/hadoop/hive/ql/io/sarg/PredicateLeaf.java (original) +++ hive/trunk/serde/src/java/org/apache/hadoop/hive/ql/io/sarg/PredicateLeaf.java Wed Sep 10 18:16:36 2014 @@ -48,7 +48,8 @@ public interface PredicateLeaf { STRING, // string, char, varchar DATE, DECIMAL, - TIMESTAMP + TIMESTAMP, + BOOLEAN } /**