This is an automated email from the ASF dual-hosted git repository. mahesh pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push: new 243155b HIVE-24373 : Wrong predicate is pushed down for view with constant value projection. (Mahesh Kumar Behera, reviewed by Jesus Camacho Rodriguez) 243155b is described below commit 243155b0db3a17ce12ec90697c8195b09fb2879c Author: mahesh kumar behera <mah...@apache.org> AuthorDate: Fri Nov 13 16:52:19 2020 +0530 HIVE-24373 : Wrong predicate is pushed down for view with constant value projection. (Mahesh Kumar Behera, reviewed by Jesus Camacho Rodriguez) --- .../apache/hadoop/hive/ql/ppd/ExprWalkerInfo.java | 4 +- ql/src/test/queries/clientpositive/ppd2.q | 38 +++++ ql/src/test/results/clientpositive/llap/ppd2.q.out | 154 +++++++++++++++++++++ 3 files changed, 195 insertions(+), 1 deletion(-) diff --git a/ql/src/java/org/apache/hadoop/hive/ql/ppd/ExprWalkerInfo.java b/ql/src/java/org/apache/hadoop/hive/ql/ppd/ExprWalkerInfo.java index b4241c1..7926a03 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/ppd/ExprWalkerInfo.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/ppd/ExprWalkerInfo.java @@ -250,7 +250,9 @@ public class ExprWalkerInfo implements NodeProcessorCtx { for (Map.Entry<String, List<ExprNodeDesc>> entry : nonFinalPreds.entrySet()) { List<ExprNodeDesc> converted = new ArrayList<ExprNodeDesc>(); for (ExprNodeDesc newExpr : entry.getValue()) { - converted.add(newToOldExprMap.get(newExpr)); + // We should clone it to avoid getting overwritten if two or more operator uses + // this same expression. + converted.add(newToOldExprMap.get(newExpr).clone()); } oldExprs.put(entry.getKey(), converted); } diff --git a/ql/src/test/queries/clientpositive/ppd2.q b/ql/src/test/queries/clientpositive/ppd2.q index 39e3170..2262331 100644 --- a/ql/src/test/queries/clientpositive/ppd2.q +++ b/ql/src/test/queries/clientpositive/ppd2.q @@ -77,3 +77,41 @@ from ( distribute by a.key sort by a.key,a.cc desc) b where b.cc>1; + + +set hive.explain.user=false; +set hive.cbo.enable=false; +set hive.optimize.ppd=true; +set hive.ppd.remove.duplicatefilters=true; + +DROP TABLE arc; +CREATE table arc(`dt_from` string, `dt_to` string); + +DROP TABLE loc1; +CREATE table loc1(`dt_from` string, `dt_to` string); + +-- INSERT INTO arc VALUES('2020', '2020'); +-- INSERT INTO loc1 VALUES('2020', '2020'); + +DROP VIEW view; +CREATE + VIEW view AS + SELECT + '9999' as DT_FROM, + uuid() as DT_TO + FROM + loc1 + UNION ALL + SELECT + dt_from as DT_FROM, + uuid() as DT_TO + FROM + arc; + +EXPLAIN + SELECT + dt_from, dt_to + FROM + view + WHERE + '2020' between dt_from and dt_to; diff --git a/ql/src/test/results/clientpositive/llap/ppd2.q.out b/ql/src/test/results/clientpositive/llap/ppd2.q.out index 184c580..b067ae6 100644 --- a/ql/src/test/results/clientpositive/llap/ppd2.q.out +++ b/ql/src/test/results/clientpositive/llap/ppd2.q.out @@ -691,3 +691,157 @@ POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 95 2 97 2 98 2 +PREHOOK: query: DROP TABLE arc +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE arc +POSTHOOK: type: DROPTABLE +PREHOOK: query: CREATE table arc(`dt_from` string, `dt_to` string) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@arc +POSTHOOK: query: CREATE table arc(`dt_from` string, `dt_to` string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@arc +PREHOOK: query: DROP TABLE loc1 +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE loc1 +POSTHOOK: type: DROPTABLE +PREHOOK: query: CREATE table loc1(`dt_from` string, `dt_to` string) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@loc1 +POSTHOOK: query: CREATE table loc1(`dt_from` string, `dt_to` string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@loc1 +PREHOOK: query: DROP VIEW view +PREHOOK: type: DROPVIEW +POSTHOOK: query: DROP VIEW view +POSTHOOK: type: DROPVIEW +PREHOOK: query: CREATE + VIEW view AS + SELECT + '9999' as DT_FROM, + uuid() as DT_TO + FROM + loc1 + UNION ALL + SELECT + dt_from as DT_FROM, + uuid() as DT_TO + FROM + arc +PREHOOK: type: CREATEVIEW +PREHOOK: Input: default@arc +PREHOOK: Input: default@loc1 +PREHOOK: Output: database:default +PREHOOK: Output: default@view +POSTHOOK: query: CREATE + VIEW view AS + SELECT + '9999' as DT_FROM, + uuid() as DT_TO + FROM + loc1 + UNION ALL + SELECT + dt_from as DT_FROM, + uuid() as DT_TO + FROM + arc +POSTHOOK: type: CREATEVIEW +POSTHOOK: Input: default@arc +POSTHOOK: Input: default@loc1 +POSTHOOK: Output: database:default +POSTHOOK: Output: default@view +POSTHOOK: Lineage: view.dt_from EXPRESSION [(arc)arc.FieldSchema(name:dt_from, type:string, comment:null), ] +POSTHOOK: Lineage: view.dt_to EXPRESSION [] +PREHOOK: query: EXPLAIN + SELECT + dt_from, dt_to + FROM + view + WHERE + '2020' between dt_from and dt_to +PREHOOK: type: QUERY +PREHOOK: Input: default@arc +PREHOOK: Input: default@loc1 +PREHOOK: Input: default@view +#### A masked pattern was here #### +POSTHOOK: query: EXPLAIN + SELECT + dt_from, dt_to + FROM + view + WHERE + '2020' between dt_from and dt_to +POSTHOOK: type: QUERY +POSTHOOK: Input: default@arc +POSTHOOK: Input: default@loc1 +POSTHOOK: Input: default@view +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Union 2 (CONTAINS) + Map 3 <- Union 2 (CONTAINS) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: loc1 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + Select Operator + expressions: '9999' (type: string), uuid() (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + Filter Operator + predicate: '2020' BETWEEN '9999' AND _col1 (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 184 Basic stats: PARTIAL Column stats: PARTIAL + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + LLAP IO: all inputs + Map 3 + Map Operator Tree: + TableScan + alias: arc + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: dt_from (type: string), uuid() (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: '2020' BETWEEN _col0 AND _col1 (type: boolean) + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 184 Basic stats: PARTIAL Column stats: PARTIAL + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + LLAP IO: all inputs + Union 2 + Vertex: Union 2 + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink +