This is an automated email from the ASF dual-hosted git repository. vgarg pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push: new 64a020b HIVE-21778: CBO: "Struct is not null" gets evaluated as `nullable` always causing filter miss in the query (Vineet Garg, reviewed by Jesus Camacho Rodriguez) 64a020b is described below commit 64a020b2392cdfd6d2230a99a010dbeafd31eaf5 Author: Vineet Garg <vg...@apache.org> AuthorDate: Fri Mar 13 09:13:22 2020 -0700 HIVE-21778: CBO: "Struct is not null" gets evaluated as `nullable` always causing filter miss in the query (Vineet Garg, reviewed by Jesus Camacho Rodriguez) --- .../HiveProjectFilterPullUpConstantsRule.java | 2 +- .../optimizer/calcite/translator/ASTBuilder.java | 4 +- .../calcite/translator/TypeConverter.java | 3 +- ql/src/test/queries/clientpositive/structin.q | 18 +++ .../results/clientpositive/intersect_all_rj.q.out | 19 ++- .../results/clientpositive/llap/subquery_in.q.out | 8 +- .../llap/vector_identity_reuse.q.out | 42 +++---- .../clientpositive/perf/spark/query93.q.out | 18 +-- .../clientpositive/perf/tez/cbo_query93.q.out | 10 +- .../results/clientpositive/perf/tez/query93.q.out | 18 +-- ql/src/test/results/clientpositive/structin.q.out | 127 +++++++++++++++++++++ 11 files changed, 208 insertions(+), 61 deletions(-) diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveProjectFilterPullUpConstantsRule.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveProjectFilterPullUpConstantsRule.java index 25227a3..efabe5d 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveProjectFilterPullUpConstantsRule.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveProjectFilterPullUpConstantsRule.java @@ -119,7 +119,7 @@ public class HiveProjectFilterPullUpConstantsRule extends RelOptRule { case IS_NULL: conditions.put(conjCall.operands.get(0).toString(), relBuilder.getRexBuilder().makeNullLiteral( - conjCall.operands.get(0).getType().getSqlTypeName())); + conjCall.operands.get(0).getType())); } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ASTBuilder.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ASTBuilder.java index 7328b72..f714f07 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ASTBuilder.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ASTBuilder.java @@ -243,6 +243,7 @@ public class ASTBuilder { case INTERVAL_SECOND: case INTERVAL_YEAR: case INTERVAL_YEAR_MONTH: + case ROW: if (literal.getValue() == null) { return ASTBuilder.construct(HiveParser.TOK_NULL, "TOK_NULL").node(); } @@ -364,8 +365,9 @@ public class ASTBuilder { type = HiveParser.TOK_NULL; break; - //binary type should not be seen. + //binary, ROW type should not be seen. case BINARY: + case ROW: default: throw new RuntimeException("Unsupported Type: " + sqlType); } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/TypeConverter.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/TypeConverter.java index edb3199..ed4a73e 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/TypeConverter.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/TypeConverter.java @@ -160,7 +160,8 @@ public class TypeConverter { convertedType = convert((UnionTypeInfo) type, dtFactory); break; } - return convertedType; + // hive does not have concept of not nullable types + return dtFactory.createTypeWithNullability(convertedType, true); } public static RelDataType convert(PrimitiveTypeInfo type, RelDataTypeFactory dtFactory) { diff --git a/ql/src/test/queries/clientpositive/structin.q b/ql/src/test/queries/clientpositive/structin.q index 9621195..7d8a6e4 100644 --- a/ql/src/test/queries/clientpositive/structin.q +++ b/ql/src/test/queries/clientpositive/structin.q @@ -21,3 +21,21 @@ IN ( struct('1234-1111-0074578664','3'), struct('1234-1111-0074578695',1) ); + +CREATE TABLE test_struct +( + f1 string, + demo_struct struct<f1:string, f2:string, f3:string>, + datestr string +); + +insert into test_struct values('s1', named_struct('f1','1', 'f2','2', 'f3','3'), '02-02-2020'); +insert into test_struct values('s2', named_struct('f1',cast(null as string),'f2', cast(null as string),'f3', cast(null as string)), '02-02-2020'); +insert into test_struct values('s4', named_struct('f1','100', 'f2','200', 'f3','300'), '02-02-2020'); + +explain select * from test_struct where datestr='02-02-2020' and demo_struct is not null + order by f1; +select * from test_struct where datestr='02-02-2020' and demo_struct is not null + order by f1; + +DROP TABLE test_struct; \ No newline at end of file diff --git a/ql/src/test/results/clientpositive/intersect_all_rj.q.out b/ql/src/test/results/clientpositive/intersect_all_rj.q.out index 427b841..a807f11 100644 --- a/ql/src/test/results/clientpositive/intersect_all_rj.q.out +++ b/ql/src/test/results/clientpositive/intersect_all_rj.q.out @@ -178,16 +178,15 @@ HiveProject($f0=[$1]) HiveUnion(all=[true]) HiveProject($f0=[$0], $f1=[$1]) HiveAggregate(group=[{0}], agg#0=[count()]) - HiveProject($f0=[$0]) - HiveAggregate(group=[{0}]) - HiveProject($f0=[CASE(IS NOT NULL($3), $3, if($1, $4, $2))]) - HiveJoin(condition=[>=($0, $5)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(bigint_col_3=[$1]) - HiveFilter(condition=[IS NOT NULL($1)]) - HiveTableScan(table=[[default, table_7]], table:alias=[a3]) - HiveProject(boolean_col_16=[$0], timestamp_col_5=[$1], timestamp_col_15=[$2], timestamp_col_30=[$3], CAST=[CAST($4):BIGINT]) - HiveFilter(condition=[IS NOT NULL(CAST($4):BIGINT)]) - HiveTableScan(table=[[default, table_10]], table:alias=[a4]) + HiveProject(CASE=[$0]) + HiveAggregate(group=[{1}]) + HiveJoin(condition=[>=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(bigint_col_3=[$1]) + HiveFilter(condition=[IS NOT NULL($1)]) + HiveTableScan(table=[[default, table_7]], table:alias=[a3]) + HiveProject(CASE=[CASE(IS NOT NULL($2), $2, if($0, $3, $1))], CAST=[CAST($4):BIGINT]) + HiveFilter(condition=[IS NOT NULL(CAST($4):BIGINT)]) + HiveTableScan(table=[[default, table_10]], table:alias=[a4]) HiveProject($f0=[$0], $f1=[$1]) HiveAggregate(group=[{0}], agg#0=[count()]) HiveProject($f0=[$0]) diff --git a/ql/src/test/results/clientpositive/llap/subquery_in.q.out b/ql/src/test/results/clientpositive/llap/subquery_in.q.out index 966215c..60522c8 100644 --- a/ql/src/test/results/clientpositive/llap/subquery_in.q.out +++ b/ql/src/test/results/clientpositive/llap/subquery_in.q.out @@ -4028,13 +4028,13 @@ STAGE PLANS: Select Operator expressions: p_name (type: string), p_type (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 26 Data size: 5954 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 26 Data size: 5850 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 26 Data size: 5954 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 26 Data size: 5850 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: string) Execution mode: vectorized, llap LLAP IO: no inputs @@ -4087,10 +4087,10 @@ STAGE PLANS: keys: 0 _col1 (type: string) 1 _col0 (type: string) - outputColumnNames: _col0, _col4 + outputColumnNames: _col0, _col3 Statistics: Num rows: 28 Data size: 3500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col0 (type: string), _col4 (type: int) + expressions: _col0 (type: string), _col3 (type: int) outputColumnNames: _col0, _col1 Statistics: Num rows: 28 Data size: 3500 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator diff --git a/ql/src/test/results/clientpositive/llap/vector_identity_reuse.q.out b/ql/src/test/results/clientpositive/llap/vector_identity_reuse.q.out index 77e118c..6db296d 100644 --- a/ql/src/test/results/clientpositive/llap/vector_identity_reuse.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_identity_reuse.q.out @@ -172,13 +172,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1] - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 99 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Left Outer Join 0 to 1 keys: 0 _col0 (type: bigint) - 1 _col3 (type: bigint) + 1 UDFToLong(_col2) (type: bigint) Map Join Vectorization: bigTableKeyColumns: 0:bigint bigTableRetainColumnNums: [1] @@ -189,7 +189,7 @@ STAGE PLANS: projectedOutput: 1:int, 4:int, 5:smallint, 6:int smallTableValueMapping: 4:int, 5:smallint, 6:int hashTableImplementationType: OPTIMIZED - outputColumnNames: _col1, _col2, _col3, _col4 + outputColumnNames: _col1, _col3, _col4, _col5 input vertices: 1 Map 3 Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE @@ -198,15 +198,16 @@ STAGE PLANS: className: VectorFilterOperator native: true predicateExpression: FilterExprOrExpr(children: FilterLongColEqualLongScalar(col 1:int, val 5), FilterLongColEqualLongScalar(col 6:int, val 10), FilterLongColEqualLongScalar(col 6:bigint, val 571)(children: col 6:int)) - predicate: ((_col1 = 5) or (_col4 = 10) or (UDFToLong(_col4) = 571L)) (type: boolean) + predicate: ((_col1 = 5) or (_col5 = 10) or (UDFToLong(_col5) = 571L)) (type: boolean) Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col2 (type: int), _col3 (type: smallint), _col4 (type: int) + expressions: _col3 (type: int), _col5 (type: int), CASE WHEN (_col3 is not null) THEN (_col3) ELSE (UDFToInteger(_col4)) END (type: int) outputColumnNames: _col0, _col1, _col2 Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [4, 5, 6] + projectedOutputColumnNums: [4, 6, 8] + selectExpressions: IfExprColumnCondExpr(col 7:boolean, col 4:intcol 5:smallint)(children: IsNotNull(col 4:int) -> 7:boolean, col 4:int, col 5:smallint) -> 8:int Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: @@ -215,13 +216,13 @@ STAGE PLANS: 0 1 Map Join Vectorization: - bigTableRetainColumnNums: [4, 5, 6] - bigTableValueColumns: 4:int, 5:smallint, 6:int + bigTableRetainColumnNums: [4, 6, 8] + bigTableValueColumns: 4:int, 6:int, 8:int className: VectorMapJoinInnerBigOnlyMultiKeyOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true nonOuterSmallTableKeyMapping: [] - projectedOutput: 4:int, 5:smallint, 6:int + projectedOutput: 4:int, 6:int, 8:int hashTableImplementationType: OPTIMIZED outputColumnNames: _col0, _col1, _col2 input vertices: @@ -238,13 +239,12 @@ STAGE PLANS: keyExpressions: col 4:int native: true Select Operator - expressions: _col0 (type: int), CASE WHEN (_col0 is not null) THEN (_col0) ELSE (UDFToInteger(_col1)) END (type: int), _col2 (type: int) + expressions: _col0 (type: int), _col2 (type: int), _col1 (type: int) outputColumnNames: _col0, _col1, _col2 Select Vectorization: className: VectorSelectOperator native: true projectedOutputColumnNums: [4, 8, 6] - selectExpressions: IfExprColumnCondExpr(col 7:boolean, col 4:intcol 5:smallint)(children: IsNotNull(col 4:int) -> 7:boolean, col 4:int, col 5:smallint) -> 8:int Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) @@ -293,26 +293,26 @@ STAGE PLANS: predicate: UDFToLong(CASE WHEN (int_col_5 is not null) THEN (int_col_5) ELSE (UDFToInteger(smallint_col_22)) END) is not null (type: boolean) Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: int_col_5 (type: int), smallint_col_22 (type: smallint), CASE WHEN (int_col_5 is not null) THEN (int_col_5) ELSE (UDFToInteger(smallint_col_22)) END (type: int), UDFToLong(CASE WHEN (int_col_5 is not null) THEN (int_col_5) ELSE (UDFToInteger(smallint_col_22)) END) (type: bigint) - outputColumnNames: _col0, _col1, _col2, _col3 + expressions: int_col_5 (type: int), smallint_col_22 (type: smallint), CASE WHEN (int_col_5 is not null) THEN (int_col_5) ELSE (UDFToInteger(smallint_col_22)) END (type: int) + outputColumnNames: _col0, _col1, _col2 Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [1, 0, 6, 8] - selectExpressions: IfExprColumnCondExpr(col 5:boolean, col 1:intcol 0:smallint)(children: IsNotNull(col 1:int) -> 5:boolean, col 1:int, col 0:smallint) -> 6:int, IfExprColumnCondExpr(col 7:boolean, col 1:intcol 0:smallint)(children: IsNotNull(col 1:int) -> 7:boolean, col 1:int, col 0:smallint) -> 8:int - Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE + projectedOutputColumnNums: [1, 0, 6] + selectExpressions: IfExprColumnCondExpr(col 5:boolean, col 1:intcol 0:smallint)(children: IsNotNull(col 1:int) -> 5:boolean, col 1:int, col 0:smallint) -> 6:int + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: _col3 (type: bigint) + key expressions: UDFToLong(_col2) (type: bigint) null sort order: z sort order: + - Map-reduce partition columns: _col3 (type: bigint) + Map-reduce partition columns: UDFToLong(_col2) (type: bigint) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumns: 8:bigint + keyColumns: 6:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true valueColumns: 1:int, 0:smallint, 6:int - Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: smallint), _col2 (type: int) Execution mode: vectorized, llap LLAP IO: no inputs @@ -330,7 +330,7 @@ STAGE PLANS: includeColumns: [0, 1] dataColumns: smallint_col_22:smallint, int_col_5:int partitionColumnCount: 0 - scratchColumnTypeNames: [bigint, bigint, bigint, bigint, bigint, bigint] + scratchColumnTypeNames: [bigint, bigint, bigint, bigint] Map 4 Map Operator Tree: TableScan diff --git a/ql/src/test/results/clientpositive/perf/spark/query93.q.out b/ql/src/test/results/clientpositive/perf/spark/query93.q.out index 5cc7058..09441d2 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query93.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query93.q.out @@ -89,8 +89,8 @@ STAGE PLANS: predicate: (sr_item_sk is not null and sr_ticket_number is not null and sr_reason_sk is not null) (type: boolean) Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: sr_item_sk (type: int), sr_reason_sk (type: int), sr_ticket_number (type: int), sr_return_quantity (type: int) - outputColumnNames: _col0, _col1, _col2, _col3 + expressions: sr_item_sk (type: int), sr_reason_sk (type: int), sr_ticket_number (type: int), sr_return_quantity (type: int), sr_return_quantity is not null (type: boolean) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -98,7 +98,7 @@ STAGE PLANS: keys: 0 _col1 (type: int) 1 _col0 (type: int) - outputColumnNames: _col0, _col2, _col3 + outputColumnNames: _col0, _col2, _col3, _col4 input vertices: 1 Map 5 Statistics: Num rows: 63350266 Data size: 4908414421 Basic stats: COMPLETE Column stats: NONE @@ -108,7 +108,7 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: _col0 (type: int), _col2 (type: int) Statistics: Num rows: 63350266 Data size: 4908414421 Basic stats: COMPLETE Column stats: NONE - value expressions: _col3 (type: int) + value expressions: _col3 (type: int), _col4 (type: boolean) Execution mode: vectorized Local Work: Map Reduce Local Work @@ -122,8 +122,8 @@ STAGE PLANS: predicate: (ss_item_sk is not null and ss_ticket_number is not null) (type: boolean) Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: ss_item_sk (type: int), ss_customer_sk (type: int), ss_ticket_number (type: int), ss_quantity (type: int), ss_sales_price (type: decimal(7,2)) - outputColumnNames: _col0, _col1, _col2, _col3, _col4 + expressions: ss_item_sk (type: int), ss_customer_sk (type: int), ss_ticket_number (type: int), ss_quantity (type: int), ss_sales_price (type: decimal(7,2)), (CAST( ss_quantity AS decimal(10,0)) * ss_sales_price) (type: decimal(18,2)) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int), _col2 (type: int) @@ -131,7 +131,7 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: _col0 (type: int), _col2 (type: int) Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int), _col3 (type: int), _col4 (type: decimal(7,2)) + value expressions: _col1 (type: int), _col3 (type: int), _col4 (type: decimal(7,2)), _col5 (type: decimal(18,2)) Execution mode: vectorized Reducer 2 Reduce Operator Tree: @@ -141,10 +141,10 @@ STAGE PLANS: keys: 0 _col0 (type: int), _col2 (type: int) 1 _col0 (type: int), _col2 (type: int) - outputColumnNames: _col3, _col6, _col8, _col9 + outputColumnNames: _col3, _col4, _col7, _col9, _col10, _col11 Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col6 (type: int), CASE WHEN (_col3 is not null) THEN ((CAST( (_col8 - _col3) AS decimal(10,0)) * _col9)) ELSE ((CAST( _col8 AS decimal(10,0)) * _col9)) END (type: decimal(18,2)) + expressions: _col7 (type: int), CASE WHEN (_col4) THEN ((CAST( (_col9 - _col3) AS decimal(10,0)) * _col10)) ELSE (_col11) END (type: decimal(18,2)) outputColumnNames: _col0, _col1 Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE Group By Operator diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query93.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query93.q.out index b0b8169..6a8ed39 100644 --- a/ql/src/test/results/clientpositive/perf/tez/cbo_query93.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query93.q.out @@ -44,13 +44,13 @@ CBO PLAN: HiveSortLimit(sort0=[$1], sort1=[$0], dir0=[ASC], dir1=[ASC], fetch=[100]) HiveProject(ss_customer_sk=[$0], $f1=[$1]) HiveAggregate(group=[{0}], agg#0=[sum($1)]) - HiveProject(ss_customer_sk=[$1], act_sales=[CASE(IS NOT NULL($8), *(CAST(-($3, $8)):DECIMAL(10, 0), $4), *(CAST($3):DECIMAL(10, 0), $4))]) - HiveJoin(condition=[AND(=($5, $0), =($7, $2))], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ss_item_sk=[$2], ss_customer_sk=[$3], ss_ticket_number=[$9], ss_quantity=[$10], ss_sales_price=[$13]) + HiveProject(ss_customer_sk=[$1], act_sales=[CASE($10, *(CAST(-($3, $9)):DECIMAL(10, 0), $4), $5)]) + HiveJoin(condition=[AND(=($6, $0), =($8, $2))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_item_sk=[$2], ss_customer_sk=[$3], ss_ticket_number=[$9], ss_quantity=[$10], ss_sales_price=[$13], *=[*(CAST($10):DECIMAL(10, 0), $13)]) HiveFilter(condition=[AND(IS NOT NULL($2), IS NOT NULL($9))]) HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) - HiveJoin(condition=[=($1, $4)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(sr_item_sk=[$2], sr_reason_sk=[$8], sr_ticket_number=[$9], sr_return_quantity=[$10]) + HiveJoin(condition=[=($1, $5)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(sr_item_sk=[$2], sr_reason_sk=[$8], sr_ticket_number=[$9], sr_return_quantity=[$10], IS NOT NULL=[IS NOT NULL($10)]) HiveFilter(condition=[AND(IS NOT NULL($8), IS NOT NULL($2), IS NOT NULL($9))]) HiveTableScan(table=[[default, store_returns]], table:alias=[store_returns]) HiveProject(r_reason_sk=[$0]) diff --git a/ql/src/test/results/clientpositive/perf/tez/query93.q.out b/ql/src/test/results/clientpositive/perf/tez/query93.q.out index d0bde22..6a47c90 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query93.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query93.q.out @@ -69,15 +69,15 @@ Stage-0 PartitionCols:_col0 Group By Operator [GBY_17] (rows=569016747 width=115) Output:["_col0","_col1"],aggregations:["sum(_col1)"],keys:_col0 - Select Operator [SEL_15] (rows=569016747 width=118) + Select Operator [SEL_15] (rows=569016747 width=234) Output:["_col0","_col1"] - Merge Join Operator [MERGEJOIN_65] (rows=569016747 width=118) - Conds:RS_12._col0, _col2=RS_74._col0, _col2(Inner),Output:["_col3","_col6","_col8","_col9"] + Merge Join Operator [MERGEJOIN_65] (rows=569016747 width=234) + Conds:RS_12._col0, _col2=RS_74._col0, _col2(Inner),Output:["_col3","_col4","_col7","_col9","_col10","_col11"] <-Map 7 [SIMPLE_EDGE] vectorized SHUFFLE [RS_74] PartitionCols:_col0, _col2 - Select Operator [SEL_73] (rows=575995635 width=122) - Output:["_col0","_col1","_col2","_col3","_col4"] + Select Operator [SEL_73] (rows=575995635 width=234) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"] Filter Operator [FIL_72] (rows=575995635 width=122) predicate:(ss_item_sk is not null and ss_ticket_number is not null) TableScan [TS_6] (rows=575995635 width=122) @@ -85,13 +85,13 @@ Stage-0 <-Reducer 2 [SIMPLE_EDGE] SHUFFLE [RS_12] PartitionCols:_col0, _col2 - Merge Join Operator [MERGEJOIN_64] (rows=55574563 width=11) - Conds:RS_68._col1=RS_71._col0(Inner),Output:["_col0","_col2","_col3"] + Merge Join Operator [MERGEJOIN_64] (rows=55574563 width=15) + Conds:RS_68._col1=RS_71._col0(Inner),Output:["_col0","_col2","_col3","_col4"] <-Map 1 [SIMPLE_EDGE] vectorized SHUFFLE [RS_68] PartitionCols:_col1 - Select Operator [SEL_67] (rows=55574563 width=15) - Output:["_col0","_col1","_col2","_col3"] + Select Operator [SEL_67] (rows=55574563 width=19) + Output:["_col0","_col1","_col2","_col3","_col4"] Filter Operator [FIL_66] (rows=55574563 width=15) predicate:(sr_reason_sk is not null and sr_item_sk is not null and sr_ticket_number is not null) TableScan [TS_0] (rows=57591150 width=15) diff --git a/ql/src/test/results/clientpositive/structin.q.out b/ql/src/test/results/clientpositive/structin.q.out index 0fe88f5..49c4b56 100644 --- a/ql/src/test/results/clientpositive/structin.q.out +++ b/ql/src/test/results/clientpositive/structin.q.out @@ -119,3 +119,130 @@ STAGE PLANS: Processor Tree: ListSink +PREHOOK: query: CREATE TABLE test_struct +( + f1 string, + demo_struct struct<f1:string, f2:string, f3:string>, + datestr string +) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@test_struct +POSTHOOK: query: CREATE TABLE test_struct +( + f1 string, + demo_struct struct<f1:string, f2:string, f3:string>, + datestr string +) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@test_struct +PREHOOK: query: insert into test_struct values('s1', named_struct('f1','1', 'f2','2', 'f3','3'), '02-02-2020') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@test_struct +POSTHOOK: query: insert into test_struct values('s1', named_struct('f1','1', 'f2','2', 'f3','3'), '02-02-2020') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@test_struct +POSTHOOK: Lineage: test_struct.datestr SCRIPT [] +POSTHOOK: Lineage: test_struct.demo_struct SCRIPT [] +POSTHOOK: Lineage: test_struct.f1 SCRIPT [] +PREHOOK: query: insert into test_struct values('s2', named_struct('f1',cast(null as string),'f2', cast(null as string),'f3', cast(null as string)), '02-02-2020') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@test_struct +POSTHOOK: query: insert into test_struct values('s2', named_struct('f1',cast(null as string),'f2', cast(null as string),'f3', cast(null as string)), '02-02-2020') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@test_struct +POSTHOOK: Lineage: test_struct.datestr SCRIPT [] +POSTHOOK: Lineage: test_struct.demo_struct SCRIPT [] +POSTHOOK: Lineage: test_struct.f1 SCRIPT [] +PREHOOK: query: insert into test_struct values('s4', named_struct('f1','100', 'f2','200', 'f3','300'), '02-02-2020') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@test_struct +POSTHOOK: query: insert into test_struct values('s4', named_struct('f1','100', 'f2','200', 'f3','300'), '02-02-2020') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@test_struct +POSTHOOK: Lineage: test_struct.datestr SCRIPT [] +POSTHOOK: Lineage: test_struct.demo_struct SCRIPT [] +POSTHOOK: Lineage: test_struct.f1 SCRIPT [] +PREHOOK: query: explain select * from test_struct where datestr='02-02-2020' and demo_struct is not null + order by f1 +PREHOOK: type: QUERY +PREHOOK: Input: default@test_struct +#### A masked pattern was here #### +POSTHOOK: query: explain select * from test_struct where datestr='02-02-2020' and demo_struct is not null + order by f1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_struct +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: test_struct + filterExpr: ((datestr = '02-02-2020') and demo_struct is not null) (type: boolean) + Statistics: Num rows: 3 Data size: 2658 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((datestr = '02-02-2020') and demo_struct is not null) (type: boolean) + Statistics: Num rows: 3 Data size: 2658 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: f1 (type: string), demo_struct (type: struct<f1:string,f2:string,f3:string>) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 2658 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Statistics: Num rows: 3 Data size: 2658 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct<f1:string,f2:string,f3:string>) + Execution mode: vectorized + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: struct<f1:string,f2:string,f3:string>), '02-02-2020' (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 3 Data size: 2658 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 3 Data size: 2658 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select * from test_struct where datestr='02-02-2020' and demo_struct is not null + order by f1 +PREHOOK: type: QUERY +PREHOOK: Input: default@test_struct +#### A masked pattern was here #### +POSTHOOK: query: select * from test_struct where datestr='02-02-2020' and demo_struct is not null + order by f1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_struct +#### A masked pattern was here #### +s1 {"f1":"1","f2":"2","f3":"3"} 02-02-2020 +s2 {"f1":null,"f2":null,"f3":null} 02-02-2020 +s4 {"f1":"100","f2":"200","f3":"300"} 02-02-2020 +PREHOOK: query: DROP TABLE test_struct +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@test_struct +PREHOOK: Output: default@test_struct +POSTHOOK: query: DROP TABLE test_struct +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@test_struct +POSTHOOK: Output: default@test_struct