ulysses-you commented on code in PR #4393:
URL: https://github.com/apache/kyuubi/pull/4393#discussion_r1115522050
##########
extensions/spark/kyuubi-spark-lineage/src/main/scala/org/apache/kyuubi/plugin/lineage/helper/SparkSQLLineageParseHelper.scala:
##########
@@ -160,15 +162,22 @@ trait LineageParser {
}
}
+ private def isNameWithQualifier(attr: Attribute, qualifier: Seq[String]):
Boolean = {
+ val nameTokens = attr.name.split('.')
+ val namespace = nameTokens.init.mkString(".")
+ nameTokens.length > 1 && namespace.endsWith(qualifier.mkString("."))
+ }
+
private def mergeRelationColumnLineage(
parentColumnsLineage: AttributeMap[AttributeSet],
relationOutput: Seq[Attribute],
relationColumnLineage: AttributeMap[AttributeSet]):
AttributeMap[AttributeSet] = {
val mergedRelationColumnLineage = {
- relationOutput.foldLeft((ListMap[Attribute, AttributeSet](),
relationColumnLineage)) {
- case ((acc, x), attr) =>
- (acc + (attr -> x.head._2), x.tail)
- }._1
+ relationOutput.slice(0, relationColumnLineage.size)
Review Comment:
> subquery's lineage of CacheTable is not include #rank column
why does the subquery's lineage of CacheTable is not include `#rank` column
? should it return `rank -> [a, b]` ? or do you mean we do not support collect
lineage from window ?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]