anuragmantri commented on code in PR #55518:
URL: https://github.com/apache/spark/pull/55518#discussion_r3192166920
##########
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/RewriteRowLevelCommand.scala:
##########
@@ -50,20 +51,35 @@ trait RewriteRowLevelCommand extends Rule[LogicalPlan] {
protected def buildOperationTable(
table: SupportsRowLevelOperations,
command: Command,
- options: CaseInsensitiveStringMap): RowLevelOperationTable = {
- val info = RowLevelOperationInfoImpl(command, options)
+ options: CaseInsensitiveStringMap,
+ updatedColumns: Seq[NamedReference] = Nil): RowLevelOperationTable = {
+ val info = RowLevelOperationInfoImpl(command, options, updatedColumns)
val operation = table.newRowLevelOperationBuilder(info).build()
RowLevelOperationTable(table, operation)
}
+ // Builds a DataSourceV2Relation for a row-level operation, optionally
narrowing its output.
+ //
+ // When dataAttrs is non-empty, the relation output is narrowed to include
only columns
+ // required for a column-update write. When dataAttrs is empty, the full
relation.output is
+ // preserved.
protected def buildRelationWithAttrs(
relation: DataSourceV2Relation,
table: RowLevelOperationTable,
metadataAttrs: Seq[AttributeReference],
- rowIdAttrs: Seq[AttributeReference] = Nil): DataSourceV2Relation = {
-
- val attrs = dedupAttrs(relation.output ++ rowIdAttrs ++ metadataAttrs)
- relation.copy(table = table, output = attrs)
+ rowIdAttrs: Seq[AttributeReference] = Nil,
+ dataAttrs: Seq[AttributeReference] = Nil,
+ cond: Expression = TrueLiteral): DataSourceV2Relation = {
+
+ if (dataAttrs.nonEmpty) {
+ val required =
+ AttributeSet(dataAttrs) ++ AttributeSet(Seq(cond)) ++
AttributeSet(rowIdAttrs)
+ val narrowOutput = relation.output.filter(required.contains)
+ relation.copy(table = table, output = dedupAttrs(narrowOutput ++
rowIdAttrs ++ metadataAttrs))
Review Comment:
> Can an attribute in required be missing from relation.output?
No. dataAttrs come from the connector's `requiredDataAttributes()` which are
resolved against relation (via `V2ExpressionUtils.resolveRefs`), so they're
guaranteed to be present. The condition's referenced columns are also table
columns from the user's `WHERE` clause. `rowIdAttrs` and `metadataAttrs` can
be absent from relation.output (they're resolved separately), but they're not
part of the filter. They're appended unconditionally afterward via
`dedupAttrs(narrowOutput ++ rowIdAttrs ++ metadataAttrs)`
> rowIdAttrs seems to be added 2 times. If we already have dedupAttrs() then
probably doesn't make sense to build AttributeSets.
Agreed. I fixed it.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]