zabetak commented on code in PR #4442: URL: https://github.com/apache/hive/pull/4442#discussion_r1335607615
########## ql/src/java/org/apache/hadoop/hive/ql/ppd/OpProcFactory.java: ########## @@ -385,6 +385,23 @@ public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx, } + public static class LateralViewJoinerPPD extends JoinerPPD implements SemanticNodeProcessor { + @Override + public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx, + Object... nodeOutputs) throws SemanticException { + Object o = super.process(nd, stack, procCtx, nodeOutputs); + Operator<?> operator = (Operator<?>) nd; + OpWalkerInfo owi = (OpWalkerInfo) procCtx; + if (HiveConf.getBoolVar(owi.getParseContext().getConf(), + HiveConf.ConfVars.HIVEPPDREMOVEDUPLICATEFILTERS)) { + // remove all the candidate filter operators + // when we get to the TS Review Comment: I don't think we are in the TS op here? I think we are in the `LateralViewJoinOperator` (LVJ). ########## ql/src/java/org/apache/hadoop/hive/ql/ppd/OpProcFactory.java: ########## @@ -385,6 +385,23 @@ public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx, } + public static class LateralViewJoinerPPD extends JoinerPPD implements SemanticNodeProcessor { + @Override + public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx, + Object... nodeOutputs) throws SemanticException { + Object o = super.process(nd, stack, procCtx, nodeOutputs); + Operator<?> operator = (Operator<?>) nd; Review Comment: unused variable/dead code? ########## ql/src/test/results/clientpositive/llap/lateral_view_cbo.q.out: ########## @@ -1,34 +1,113 @@ -PREHOOK: query: EXPLAIN CBO SELECT myTable.myCol FROM src +PREHOOK: query: CREATE TABLE simple_table (col string, array_col array<string>) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@simple_table +POSTHOOK: query: CREATE TABLE simple_table (col string, array_col array<string>) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@simple_table +PREHOOK: query: EXPLAIN CBO SELECT myTable.myCol FROM simple_table LATERAL VIEW explode(array(1,2,3)) myTable AS myCol PREHOOK: type: QUERY -PREHOOK: Input: default@src +PREHOOK: Input: default@simple_table #### A masked pattern was here #### -POSTHOOK: query: EXPLAIN CBO SELECT myTable.myCol FROM src +POSTHOOK: query: EXPLAIN CBO SELECT myTable.myCol FROM simple_table LATERAL VIEW explode(array(1,2,3)) myTable AS myCol POSTHOOK: type: QUERY -POSTHOOK: Input: default@src +POSTHOOK: Input: default@simple_table #### A masked pattern was here #### CBO PLAN: HiveProject(mycol=[$6]) - HiveTableFunctionScan(invocation=[LATERAL(explode(ARRAY(1, 2, 3)), $0, $1, $2, $3, $4, $5)], rowType=[RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID, BOOLEAN ROW__IS__DELETED, INTEGER mytable.mycol)]) - HiveTableScan(table=[[default, src]], table:alias=[src]) + HiveTableFunctionScan(invocation=[LATERAL(explode(ARRAY(1, 2, 3)), $0, $1, $2, $3, $4, $5)], rowType=[RecordType(VARCHAR(2147483647) col, VARCHAR(2147483647) ARRAY array_col, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID, BOOLEAN ROW__IS__DELETED, INTEGER mytable.mycol)]) + HiveTableScan(table=[[default, simple_table]], table:alias=[simple_table]) -PREHOOK: query: EXPLAIN CBO SELECT myTable.myCol, myTable2.myCol2 FROM src +PREHOOK: query: EXPLAIN CBO SELECT myTable.myCol, myTable2.myCol2 FROM simple_table LATERAL VIEW explode(array(1,2,3)) myTable AS myCol LATERAL VIEW explode(array(1,2,3)) myTable2 AS myCol2 PREHOOK: type: QUERY -PREHOOK: Input: default@src +PREHOOK: Input: default@simple_table #### A masked pattern was here #### -POSTHOOK: query: EXPLAIN CBO SELECT myTable.myCol, myTable2.myCol2 FROM src +POSTHOOK: query: EXPLAIN CBO SELECT myTable.myCol, myTable2.myCol2 FROM simple_table LATERAL VIEW explode(array(1,2,3)) myTable AS myCol LATERAL VIEW explode(array(1,2,3)) myTable2 AS myCol2 POSTHOOK: type: QUERY -POSTHOOK: Input: default@src +POSTHOOK: Input: default@simple_table #### A masked pattern was here #### CBO PLAN: HiveProject(mycol=[$6], mycol2=[$7]) - HiveTableFunctionScan(invocation=[LATERAL(explode(ARRAY(1, 2, 3)), $0, $1, $2, $3, $4, $5, $6)], rowType=[RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID, BOOLEAN ROW__IS__DELETED, INTEGER mytable.mycol, INTEGER mytable2.mycol2)]) - HiveProject(key=[$0], value=[$1], BLOCK__OFFSET__INSIDE__FILE=[$2], INPUT__FILE__NAME=[$3], ROW__ID=[$4], ROW__IS__DELETED=[$5], mytable.mycol=[$6]) - HiveTableFunctionScan(invocation=[LATERAL(explode(ARRAY(1, 2, 3)), $0, $1, $2, $3, $4, $5)], rowType=[RecordType(VARCHAR(2147483647) key, VARCHAR(2147483647) value, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID, BOOLEAN ROW__IS__DELETED, INTEGER mytable.mycol)]) - HiveTableScan(table=[[default, src]], table:alias=[src]) + HiveTableFunctionScan(invocation=[LATERAL(explode(ARRAY(1, 2, 3)), $0, $1, $2, $3, $4, $5, $6)], rowType=[RecordType(VARCHAR(2147483647) col, VARCHAR(2147483647) ARRAY array_col, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID, BOOLEAN ROW__IS__DELETED, INTEGER mytable.mycol, INTEGER mytable2.mycol2)]) + HiveProject(col=[$0], array_col=[$1], BLOCK__OFFSET__INSIDE__FILE=[$2], INPUT__FILE__NAME=[$3], ROW__ID=[$4], ROW__IS__DELETED=[$5], mytable.mycol=[$6]) + HiveTableFunctionScan(invocation=[LATERAL(explode(ARRAY(1, 2, 3)), $0, $1, $2, $3, $4, $5)], rowType=[RecordType(VARCHAR(2147483647) col, VARCHAR(2147483647) ARRAY array_col, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID, BOOLEAN ROW__IS__DELETED, INTEGER mytable.mycol)]) + HiveTableScan(table=[[default, simple_table]], table:alias=[simple_table]) + +PREHOOK: query: EXPLAIN CBO SELECT tf.col1, tf.col2, tf.col3 +FROM simple_table + LATERAL TABLE(VALUES('A', 10, simple_table.col),('B', 20, simple_table.col)) AS tf(col1, col2, col3) +PREHOOK: type: QUERY +PREHOOK: Input: default@simple_table +#### A masked pattern was here #### +POSTHOOK: query: EXPLAIN CBO SELECT tf.col1, tf.col2, tf.col3 +FROM simple_table + LATERAL TABLE(VALUES('A', 10, simple_table.col),('B', 20, simple_table.col)) AS tf(col1, col2, col3) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@simple_table +#### A masked pattern was here #### +CBO PLAN: +HiveProject(col1=[$6], col2=[$7], col3=[$8]) + HiveTableFunctionScan(invocation=[inline(ARRAY(ROW($0, $1, $2, $3, $4, $5, _UTF-16LE'A':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", 10, $0), ROW($0, $1, $2, $3, $4, $5, _UTF-16LE'B':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", 20, $0)))], rowType=[RecordType(VARCHAR(2147483647) col, VARCHAR(2147483647) ARRAY array_col, BIGINT BLOCK__OFFSET__INSIDE__FILE, VARCHAR(2147483647) INPUT__FILE__NAME, RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid) ROW__ID, BOOLEAN ROW__IS__DELETED, VARCHAR(2147483647) tf.col1, INTEGER tf.col2, VARCHAR(2147483647) tf.col3)]) Review Comment: Do we need to keep `$0, $1, $2, $3, $4, $5,` in this case. They seem redundant should we pass them as an input to the table function? Does this affect runtime plan/performance? Are we going to copy all these columns and pass them as input to the function? ########## ql/src/test/results/clientpositive/llap/lateral_view_ppd.q.out: ########## @@ -316,29 +332,36 @@ STAGE PLANS: filterExpr: (key = '0') (type: boolean) Filter Operator predicate: (key = '0') (type: boolean) - Lateral View Forward - Select Operator - expressions: value (type: string) - outputColumnNames: value - Lateral View Join Operator - outputColumnNames: _col1, _col6 - Select Operator - expressions: _col1 (type: string), _col6 (type: int) - outputColumnNames: _col0, _col1 - ListSink - Select Operator - expressions: array(1,2,3) (type: array<int>) - outputColumnNames: _col0 - UDTF Operator - function name: explode - Filter Operator - predicate: (col > 1) (type: boolean) Review Comment: Can't find the other comment with the extra details. Can you please share the link? ########## ql/src/java/org/apache/hadoop/hive/ql/ppd/OpProcFactory.java: ########## @@ -385,6 +385,23 @@ public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx, } + public static class LateralViewJoinerPPD extends JoinerPPD implements SemanticNodeProcessor { + @Override + public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx, + Object... nodeOutputs) throws SemanticException { + Object o = super.process(nd, stack, procCtx, nodeOutputs); + Operator<?> operator = (Operator<?>) nd; + OpWalkerInfo owi = (OpWalkerInfo) procCtx; + if (HiveConf.getBoolVar(owi.getParseContext().getConf(), + HiveConf.ConfVars.HIVEPPDREMOVEDUPLICATEFILTERS)) { + // remove all the candidate filter operators + // when we get to the TS Review Comment: Why do we need to do this here and not as part of `TableScanPPD` as it is done in every other case? Why is it specific to LVJ? -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: gitbox-unsubscr...@hive.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: gitbox-unsubscr...@hive.apache.org For additional commands, e-mail: gitbox-h...@hive.apache.org