Repository: hive Updated Branches: refs/heads/master 6f828383d -> d4f274867
HIVE-13200: Aggregation functions returning empty rows on partitioned columns (Yongzhi Chen, reviewed by Sergey Shelukhin) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/d4f27486 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/d4f27486 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/d4f27486 Branch: refs/heads/master Commit: d4f2748670e8c14a50066e17148c4f2d8cd0bc39 Parents: 6f82838 Author: Yongzhi Chen <[email protected]> Authored: Thu Mar 3 11:55:37 2016 -0500 Committer: Yongzhi Chen <[email protected]> Committed: Sat Mar 5 10:56:50 2016 -0500 ---------------------------------------------------------------------- .../physical/MetadataOnlyOptimizer.java | 3 +- .../hadoop/hive/ql/plan/TableScanDesc.java | 16 ++ .../test/queries/clientpositive/skiphf_aggr.q | 42 +++ .../results/clientpositive/skiphf_aggr.q.out | 267 +++++++++++++++++++ 4 files changed, 327 insertions(+), 1 deletion(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/d4f27486/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/MetadataOnlyOptimizer.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/MetadataOnlyOptimizer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/MetadataOnlyOptimizer.java index d47d3c2..5758282 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/MetadataOnlyOptimizer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/MetadataOnlyOptimizer.java @@ -119,7 +119,8 @@ public class MetadataOnlyOptimizer implements PhysicalPlanResolver { boolean noColNeeded = (colIDs == null) || (colIDs.isEmpty()); boolean noVCneeded = (desc == null) || (desc.getVirtualCols() == null) || (desc.getVirtualCols().isEmpty()); - if (noColNeeded && noVCneeded) { + boolean isSkipHF = desc.isNeedSkipHeaderFooters(); + if (noColNeeded && noVCneeded && !isSkipHF) { walkerCtx.setMayBeMetadataOnly(tsOp); } return nd; http://git-wip-us.apache.org/repos/asf/hive/blob/d4f27486/ql/src/java/org/apache/hadoop/hive/ql/plan/TableScanDesc.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/TableScanDesc.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/TableScanDesc.java index 5381247..8cf261d 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/plan/TableScanDesc.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/TableScanDesc.java @@ -30,6 +30,7 @@ import org.apache.hadoop.hive.ql.metadata.Table; import org.apache.hadoop.hive.ql.metadata.VirtualColumn; import org.apache.hadoop.hive.ql.parse.TableSample; import org.apache.hadoop.hive.ql.plan.Explain.Level; +import org.apache.hadoop.hive.serde.serdeConstants; /** @@ -364,4 +365,19 @@ public class TableScanDesc extends AbstractOperatorDesc { public void setNumBuckets(int numBuckets) { this.numBuckets = numBuckets; } + + public boolean isNeedSkipHeaderFooters() { + boolean rtn = false; + if (tableMetadata != null && tableMetadata.getTTable() != null) { + Map<String, String> params = tableMetadata.getTTable().getParameters(); + if (params != null) { + String skipHVal = params.get(serdeConstants.HEADER_COUNT); + int hcount = skipHVal == null? 0 : Integer.parseInt(skipHVal); + String skipFVal = params.get(serdeConstants.FOOTER_COUNT); + int fcount = skipFVal == null? 0 : Integer.parseInt(skipFVal); + rtn = (hcount != 0 || fcount !=0 ); + } + } + return rtn; + } } http://git-wip-us.apache.org/repos/asf/hive/blob/d4f27486/ql/src/test/queries/clientpositive/skiphf_aggr.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/skiphf_aggr.q b/ql/src/test/queries/clientpositive/skiphf_aggr.q new file mode 100644 index 0000000..fcd0b35 --- /dev/null +++ b/ql/src/test/queries/clientpositive/skiphf_aggr.q @@ -0,0 +1,42 @@ +DROP TABLE IF EXISTS skipHTbl; + +CREATE TABLE skipHTbl (a int) +PARTITIONED BY (b int) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' +TBLPROPERTIES('skip.header.line.count'='1'); + +INSERT OVERWRITE TABLE skipHTbl PARTITION (b = 1) VALUES (1), (2), (3), (4); +INSERT OVERWRITE TABLE skipHTbl PARTITION (b = 2) VALUES (1), (2), (3), (4); + +SELECT * FROM skipHTbl; + +SELECT DISTINCT b FROM skipHTbl; +SELECT MAX(b) FROM skipHTbl; +SELECT DISTINCT a FROM skipHTbl; + +INSERT OVERWRITE TABLE skipHTbl PARTITION (b = 1) VALUES (1); +INSERT OVERWRITE TABLE skipHTbl PARTITION (b = 2) VALUES (1), (2), (3), (4); + +SELECT DISTINCT b FROM skipHTbl; +SELECT MIN(b) FROM skipHTbl; +SELECT DISTINCT a FROM skipHTbl; + +DROP TABLE IF EXISTS skipFTbl; + +CREATE TABLE skipFTbl (a int) +PARTITIONED BY (b int) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' +TBLPROPERTIES('skip.footer.line.count'='1'); + +INSERT OVERWRITE TABLE skipFTbl PARTITION (b = 1) VALUES (1), (2), (3), (4); +INSERT OVERWRITE TABLE skipFTbl PARTITION (b = 2) VALUES (1), (2), (3), (4); + +SELECT * FROM skipFTbl; + +SELECT DISTINCT b FROM skipFTbl; +SELECT MAX(b) FROM skipFTbl; +SELECT DISTINCT a FROM skipFTbl; + +DROP TABLE skipHTbl; +DROP TABLE skipFTbl; + http://git-wip-us.apache.org/repos/asf/hive/blob/d4f27486/ql/src/test/results/clientpositive/skiphf_aggr.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/skiphf_aggr.q.out b/ql/src/test/results/clientpositive/skiphf_aggr.q.out new file mode 100644 index 0000000..aeb4b1b --- /dev/null +++ b/ql/src/test/results/clientpositive/skiphf_aggr.q.out @@ -0,0 +1,267 @@ +PREHOOK: query: DROP TABLE IF EXISTS skipHTbl +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE IF EXISTS skipHTbl +POSTHOOK: type: DROPTABLE +PREHOOK: query: CREATE TABLE skipHTbl (a int) +PARTITIONED BY (b int) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' +TBLPROPERTIES('skip.header.line.count'='1') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@skipHTbl +POSTHOOK: query: CREATE TABLE skipHTbl (a int) +PARTITIONED BY (b int) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' +TBLPROPERTIES('skip.header.line.count'='1') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@skipHTbl +PREHOOK: query: INSERT OVERWRITE TABLE skipHTbl PARTITION (b = 1) VALUES (1), (2), (3), (4) +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__1 +PREHOOK: Output: default@skiphtbl@b=1 +POSTHOOK: query: INSERT OVERWRITE TABLE skipHTbl PARTITION (b = 1) VALUES (1), (2), (3), (4) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__1 +POSTHOOK: Output: default@skiphtbl@b=1 +POSTHOOK: Lineage: skiphtbl PARTITION(b=1).a EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +PREHOOK: query: INSERT OVERWRITE TABLE skipHTbl PARTITION (b = 2) VALUES (1), (2), (3), (4) +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__2 +PREHOOK: Output: default@skiphtbl@b=2 +POSTHOOK: query: INSERT OVERWRITE TABLE skipHTbl PARTITION (b = 2) VALUES (1), (2), (3), (4) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__2 +POSTHOOK: Output: default@skiphtbl@b=2 +POSTHOOK: Lineage: skiphtbl PARTITION(b=2).a EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +PREHOOK: query: SELECT * FROM skipHTbl +PREHOOK: type: QUERY +PREHOOK: Input: default@skiphtbl +PREHOOK: Input: default@skiphtbl@b=1 +PREHOOK: Input: default@skiphtbl@b=2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM skipHTbl +POSTHOOK: type: QUERY +POSTHOOK: Input: default@skiphtbl +POSTHOOK: Input: default@skiphtbl@b=1 +POSTHOOK: Input: default@skiphtbl@b=2 +#### A masked pattern was here #### +2 1 +3 1 +4 1 +2 2 +3 2 +4 2 +PREHOOK: query: SELECT DISTINCT b FROM skipHTbl +PREHOOK: type: QUERY +PREHOOK: Input: default@skiphtbl +PREHOOK: Input: default@skiphtbl@b=1 +PREHOOK: Input: default@skiphtbl@b=2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT DISTINCT b FROM skipHTbl +POSTHOOK: type: QUERY +POSTHOOK: Input: default@skiphtbl +POSTHOOK: Input: default@skiphtbl@b=1 +POSTHOOK: Input: default@skiphtbl@b=2 +#### A masked pattern was here #### +1 +2 +PREHOOK: query: SELECT MAX(b) FROM skipHTbl +PREHOOK: type: QUERY +PREHOOK: Input: default@skiphtbl +PREHOOK: Input: default@skiphtbl@b=1 +PREHOOK: Input: default@skiphtbl@b=2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT MAX(b) FROM skipHTbl +POSTHOOK: type: QUERY +POSTHOOK: Input: default@skiphtbl +POSTHOOK: Input: default@skiphtbl@b=1 +POSTHOOK: Input: default@skiphtbl@b=2 +#### A masked pattern was here #### +2 +PREHOOK: query: SELECT DISTINCT a FROM skipHTbl +PREHOOK: type: QUERY +PREHOOK: Input: default@skiphtbl +PREHOOK: Input: default@skiphtbl@b=1 +PREHOOK: Input: default@skiphtbl@b=2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT DISTINCT a FROM skipHTbl +POSTHOOK: type: QUERY +POSTHOOK: Input: default@skiphtbl +POSTHOOK: Input: default@skiphtbl@b=1 +POSTHOOK: Input: default@skiphtbl@b=2 +#### A masked pattern was here #### +2 +3 +4 +PREHOOK: query: INSERT OVERWRITE TABLE skipHTbl PARTITION (b = 1) VALUES (1) +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__3 +PREHOOK: Output: default@skiphtbl@b=1 +POSTHOOK: query: INSERT OVERWRITE TABLE skipHTbl PARTITION (b = 1) VALUES (1) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__3 +POSTHOOK: Output: default@skiphtbl@b=1 +POSTHOOK: Lineage: skiphtbl PARTITION(b=1).a EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +PREHOOK: query: INSERT OVERWRITE TABLE skipHTbl PARTITION (b = 2) VALUES (1), (2), (3), (4) +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__4 +PREHOOK: Output: default@skiphtbl@b=2 +POSTHOOK: query: INSERT OVERWRITE TABLE skipHTbl PARTITION (b = 2) VALUES (1), (2), (3), (4) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__4 +POSTHOOK: Output: default@skiphtbl@b=2 +POSTHOOK: Lineage: skiphtbl PARTITION(b=2).a EXPRESSION [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +PREHOOK: query: SELECT DISTINCT b FROM skipHTbl +PREHOOK: type: QUERY +PREHOOK: Input: default@skiphtbl +PREHOOK: Input: default@skiphtbl@b=1 +PREHOOK: Input: default@skiphtbl@b=2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT DISTINCT b FROM skipHTbl +POSTHOOK: type: QUERY +POSTHOOK: Input: default@skiphtbl +POSTHOOK: Input: default@skiphtbl@b=1 +POSTHOOK: Input: default@skiphtbl@b=2 +#### A masked pattern was here #### +2 +PREHOOK: query: SELECT MIN(b) FROM skipHTbl +PREHOOK: type: QUERY +PREHOOK: Input: default@skiphtbl +PREHOOK: Input: default@skiphtbl@b=1 +PREHOOK: Input: default@skiphtbl@b=2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT MIN(b) FROM skipHTbl +POSTHOOK: type: QUERY +POSTHOOK: Input: default@skiphtbl +POSTHOOK: Input: default@skiphtbl@b=1 +POSTHOOK: Input: default@skiphtbl@b=2 +#### A masked pattern was here #### +2 +PREHOOK: query: SELECT DISTINCT a FROM skipHTbl +PREHOOK: type: QUERY +PREHOOK: Input: default@skiphtbl +PREHOOK: Input: default@skiphtbl@b=1 +PREHOOK: Input: default@skiphtbl@b=2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT DISTINCT a FROM skipHTbl +POSTHOOK: type: QUERY +POSTHOOK: Input: default@skiphtbl +POSTHOOK: Input: default@skiphtbl@b=1 +POSTHOOK: Input: default@skiphtbl@b=2 +#### A masked pattern was here #### +2 +3 +4 +PREHOOK: query: DROP TABLE IF EXISTS skipFTbl +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE IF EXISTS skipFTbl +POSTHOOK: type: DROPTABLE +PREHOOK: query: CREATE TABLE skipFTbl (a int) +PARTITIONED BY (b int) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' +TBLPROPERTIES('skip.footer.line.count'='1') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@skipFTbl +POSTHOOK: query: CREATE TABLE skipFTbl (a int) +PARTITIONED BY (b int) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' +TBLPROPERTIES('skip.footer.line.count'='1') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@skipFTbl +PREHOOK: query: INSERT OVERWRITE TABLE skipFTbl PARTITION (b = 1) VALUES (1), (2), (3), (4) +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__5 +PREHOOK: Output: default@skipftbl@b=1 +POSTHOOK: query: INSERT OVERWRITE TABLE skipFTbl PARTITION (b = 1) VALUES (1), (2), (3), (4) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__5 +POSTHOOK: Output: default@skipftbl@b=1 +POSTHOOK: Lineage: skipftbl PARTITION(b=1).a EXPRESSION [(values__tmp__table__5)values__tmp__table__5.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +PREHOOK: query: INSERT OVERWRITE TABLE skipFTbl PARTITION (b = 2) VALUES (1), (2), (3), (4) +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__6 +PREHOOK: Output: default@skipftbl@b=2 +POSTHOOK: query: INSERT OVERWRITE TABLE skipFTbl PARTITION (b = 2) VALUES (1), (2), (3), (4) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__6 +POSTHOOK: Output: default@skipftbl@b=2 +POSTHOOK: Lineage: skipftbl PARTITION(b=2).a EXPRESSION [(values__tmp__table__6)values__tmp__table__6.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +PREHOOK: query: SELECT * FROM skipFTbl +PREHOOK: type: QUERY +PREHOOK: Input: default@skipftbl +PREHOOK: Input: default@skipftbl@b=1 +PREHOOK: Input: default@skipftbl@b=2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM skipFTbl +POSTHOOK: type: QUERY +POSTHOOK: Input: default@skipftbl +POSTHOOK: Input: default@skipftbl@b=1 +POSTHOOK: Input: default@skipftbl@b=2 +#### A masked pattern was here #### +1 1 +2 1 +3 1 +1 2 +2 2 +3 2 +PREHOOK: query: SELECT DISTINCT b FROM skipFTbl +PREHOOK: type: QUERY +PREHOOK: Input: default@skipftbl +PREHOOK: Input: default@skipftbl@b=1 +PREHOOK: Input: default@skipftbl@b=2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT DISTINCT b FROM skipFTbl +POSTHOOK: type: QUERY +POSTHOOK: Input: default@skipftbl +POSTHOOK: Input: default@skipftbl@b=1 +POSTHOOK: Input: default@skipftbl@b=2 +#### A masked pattern was here #### +1 +2 +PREHOOK: query: SELECT MAX(b) FROM skipFTbl +PREHOOK: type: QUERY +PREHOOK: Input: default@skipftbl +PREHOOK: Input: default@skipftbl@b=1 +PREHOOK: Input: default@skipftbl@b=2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT MAX(b) FROM skipFTbl +POSTHOOK: type: QUERY +POSTHOOK: Input: default@skipftbl +POSTHOOK: Input: default@skipftbl@b=1 +POSTHOOK: Input: default@skipftbl@b=2 +#### A masked pattern was here #### +2 +PREHOOK: query: SELECT DISTINCT a FROM skipFTbl +PREHOOK: type: QUERY +PREHOOK: Input: default@skipftbl +PREHOOK: Input: default@skipftbl@b=1 +PREHOOK: Input: default@skipftbl@b=2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT DISTINCT a FROM skipFTbl +POSTHOOK: type: QUERY +POSTHOOK: Input: default@skipftbl +POSTHOOK: Input: default@skipftbl@b=1 +POSTHOOK: Input: default@skipftbl@b=2 +#### A masked pattern was here #### +1 +2 +3 +PREHOOK: query: DROP TABLE skipHTbl +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@skiphtbl +PREHOOK: Output: default@skiphtbl +POSTHOOK: query: DROP TABLE skipHTbl +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@skiphtbl +POSTHOOK: Output: default@skiphtbl +PREHOOK: query: DROP TABLE skipFTbl +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@skipftbl +PREHOOK: Output: default@skipftbl +POSTHOOK: query: DROP TABLE skipFTbl +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@skipftbl +POSTHOOK: Output: default@skipftbl
