yihua commented on code in PR #12716:
URL: https://github.com/apache/hudi/pull/12716#discussion_r1931272327
##########
hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/UpdateHoodieTableCommand.scala:
##########
@@ -101,22 +110,39 @@ case class UpdateHoodieTableCommand(ut: UpdateTable)
extends HoodieLeafRunnableC
val filteredOutput = if
(sparkSession.sqlContext.conf.getConfString(SPARK_SQL_OPTIMIZED_WRITES.key()
, SPARK_SQL_OPTIMIZED_WRITES.defaultValue()) == "true") {
- ut.table.output
+ attributeSeq
} else {
- removeMetaFields(ut.table.output)
+ removeMetaFields(attributeSeq)
}
- val targetExprs = filteredOutput.map { targetAttr =>
+ val condition = ut.condition.getOrElse(TrueLiteral)
+ val targetAttributes = filteredOutput.map { targetAttr =>
// NOTE: [[UpdateTable]] permits partial updates and therefore here we
correlate assigned
// assigned attributes to the ones of the target table. Ones not
being assigned
// will simply be carried over (from the old record)
assignedAttributes.find(p => attributeEquals(p._1, targetAttr))
.map { case (_, expr) => Alias(castIfNeeded(expr,
targetAttr.dataType), targetAttr.name)() }
.getOrElse(targetAttr)
- }
-
- val condition = ut.condition.getOrElse(TrueLiteral)
- val filteredPlan = Filter(condition, Project(targetExprs, ut.table))
+ }.map(attr => toUnresolved(attr).asInstanceOf[NamedExpression])
+
+ // Include temporary row index column name in the attribute refs of
logical plan
+ var attributeRefs = attributeSeq.map(expr => AttributeReference(expr.name,
expr.dataType, nullable = expr.nullable)())
+ attributeRefs = attributeRefs :+
AttributeReference(SparkAdapterSupport.sparkAdapter.getTemporaryRowIndexColumnName(),
LongType, nullable = true)()
+
+ val schema = AvroSchemaUtils.projectSchema(
+ convertToAvroSchema(catalogTable.tableSchema, catalogTable.tableName),
Review Comment:
Let's see whether we need the full table schema or not, i.e., whether
getting `targetAttributes` or non-updated columns are enough.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]