kasakrisz commented on code in PR #6043: URL: https://github.com/apache/hive/pull/6043#discussion_r2321364484
########## ql/src/java/org/apache/hadoop/hive/ql/optimizer/lineage/OpProcFactory.java: ########## @@ -676,6 +683,92 @@ public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx, } } + /** + * PTF processor + */ + public static class PTFLineage implements SemanticNodeProcessor { + + @Override + public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx, Object... nodeOutputs) throws SemanticException { + // LineageCTx + LineageCtx lCtx = (LineageCtx) procCtx; + + // The operators + @SuppressWarnings("unchecked") + PTFOperator op = (PTFOperator)nd; + Operator<? extends OperatorDesc> inpOp = getParent(stack); + lCtx.getIndex().copyPredicates(inpOp, op); + + Dependency dep = new Dependency(); + DependencyType new_type = DependencyType.EXPRESSION; + dep.setType(new_type); + // TODO: Fix this to a non-null value. This comment comes from the default implementation (TransformLineage) + dep.setExpr(null); + + List<String> columns = new ArrayList<>(); + PartitionedTableFunctionDef funcDef = op.getConf().getFuncDef(); + + if (funcDef.getPartition() != null) { + addAllMakeUniqueIfNotNull(columns, funcDef.getPartition().getExpressions().getFirst().getExprNode().getCols()); + } + if (funcDef.getOrder() != null) { + addAllMakeUniqueIfNotNull(columns, funcDef.getOrder().getExpressions().getFirst().getExprNode().getCols()); + } + + if (!(funcDef.getTFunction() instanceof Noop)) { + + if (funcDef instanceof WindowTableFunctionDef + && ((WindowTableFunctionDef) funcDef).getWindowFunctions().getFirst().getArgs() != null) { + + for (PTFExpressionDef arg : ((WindowTableFunctionDef) funcDef).getWindowFunctions().getFirst().getArgs()) { + + if (!(arg.getExprNode() instanceof ExprNodeConstantDesc)) { + columns.addAll(arg.getExprNode().getCols()); Review Comment: Should column uniqueness be enforced here? I saw we use `addAllMakeUniqueIfNotNull` a few lines earlier. Could `columns` be a set instead of list ? ########## ql/src/java/org/apache/hadoop/hive/ql/optimizer/lineage/OpProcFactory.java: ########## @@ -676,6 +683,92 @@ public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx, } } + /** + * PTF processor + */ + public static class PTFLineage implements SemanticNodeProcessor { + + @Override + public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx, Object... nodeOutputs) throws SemanticException { + // LineageCTx + LineageCtx lCtx = (LineageCtx) procCtx; + + // The operators + @SuppressWarnings("unchecked") + PTFOperator op = (PTFOperator)nd; + Operator<? extends OperatorDesc> inpOp = getParent(stack); + lCtx.getIndex().copyPredicates(inpOp, op); + + Dependency dep = new Dependency(); + DependencyType new_type = DependencyType.EXPRESSION; + dep.setType(new_type); + // TODO: Fix this to a non-null value. This comment comes from the default implementation (TransformLineage) + dep.setExpr(null); + + List<String> columns = new ArrayList<>(); + PartitionedTableFunctionDef funcDef = op.getConf().getFuncDef(); + + if (funcDef.getPartition() != null) { + addAllMakeUniqueIfNotNull(columns, funcDef.getPartition().getExpressions().getFirst().getExprNode().getCols()); + } + if (funcDef.getOrder() != null) { + addAllMakeUniqueIfNotNull(columns, funcDef.getOrder().getExpressions().getFirst().getExprNode().getCols()); + } + + if (!(funcDef.getTFunction() instanceof Noop)) { + + if (funcDef instanceof WindowTableFunctionDef + && ((WindowTableFunctionDef) funcDef).getWindowFunctions().getFirst().getArgs() != null) { Review Comment: There might be more than one window function here: ``` ... select row_number() over (partition by src.p1) as r_num, rank() over (partition by src.col_002) as r_num2 ... ``` Should we check all of them here? ########## ql/src/test/queries/clientpositive/lineage_ptf.q: ########## @@ -0,0 +1,125 @@ +set hive.exec.post.hooks=org.apache.hadoop.hive.ql.hooks.LineageLogger; + +-- Create a source table for testing +create table source_tbl2(col_001 int, col_002 int, col_003 int, p1 int); + +--- Views for single PTF operators + +-- Test for ROW_NUMBER +create view b_v_4_0 as +select * +from (select col_001, + row_number() over (partition by src.p1) as r_num, + row_number() over (partition by src.col_002) as r_num2, + row_number() over (partition by src.col_002) as r_num3 + Review Comment: nit.: unnecessary extra line -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: gitbox-unsubscr...@hive.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: gitbox-unsubscr...@hive.apache.org For additional commands, e-mail: gitbox-h...@hive.apache.org