JingsongLi commented on code in PR #7789:
URL: https://github.com/apache/paimon/pull/7789#discussion_r3212618948
##########
paimon-spark/paimon-spark-common/src/main/scala/org/apache/paimon/spark/catalyst/analysis/AssignmentAlignmentHelper.scala:
##########
@@ -177,4 +175,72 @@ trait AssignmentAlignmentHelper extends SQLConfHelper with
ExpressionHelper {
}
}
+ /**
+ * Resolve an assignment value expression by-name against the target type,
then cast if needed.
+ * Recursively reorders nested type fields (Struct, Array, Map and any
combination) by name to
+ * match target field order before casting. This is consistent with Spark's
native MERGE INTO
+ * behavior (see TableOutputResolver.resolveUpdate).
+ */
+ private def resolveByNameAndCast(expression: Expression, targetType:
DataType): Expression = {
+ if (PaimonUtils.sameType(expression.dataType, targetType)) {
+ // Types already structurally identical — no reordering needed.
+ // This guarantees idempotence when the rule is applied multiple times.
+ castIfNeeded(expression, targetType)
+ } else {
+ val reordered = reorderFieldsByName(expression, expression.dataType,
targetType)
+ castIfNeeded(reordered, targetType)
+ }
+ }
+
+ /**
+ * Recursively reorder nested type fields by name to match target type's
field order. Supports
+ * StructType, ArrayType and MapType in any nesting combination. Returns the
original expression
+ * if no reordering is needed.
+ */
+ private def reorderFieldsByName(
+ expression: Expression,
+ sourceType: DataType,
+ targetType: DataType): Expression = {
+ (sourceType, targetType) match {
+ case (s: StructType, t: StructType) if s != t =>
+ reorderStructByName(expression, s, t)
+ case (ArrayType(sElem, sNull), ArrayType(tElem, _)) if sElem != tElem =>
+ val elementVar = NamedLambdaVariable("element", sElem, sNull)
+ val reordered = reorderFieldsByName(elementVar, sElem, tElem)
+ ArrayTransform(expression, LambdaFunction(reordered, Seq(elementVar)))
+ case (MapType(sKey, sVal, sValNull), MapType(tKey, tVal, _))
+ if sKey != tKey || sVal != tVal =>
+ val keyVar = NamedLambdaVariable("key", sKey, nullable = false)
+ val valVar = NamedLambdaVariable("value", sVal, sValNull)
+ val reorderedKey = reorderFieldsByName(keyVar, sKey, tKey)
+ val reorderedVal = reorderFieldsByName(valVar, sVal, tVal)
+ val newKeys = ArrayTransform(MapKeys(expression),
LambdaFunction(reorderedKey, Seq(keyVar)))
+ val newVals =
+ ArrayTransform(MapValues(expression), LambdaFunction(reorderedVal,
Seq(valVar)))
+ MapFromArrays(newKeys, newVals)
+ case _ =>
+ expression
+ }
+ }
+
+ /** Reorder source struct fields to match target field order by name,
recursing into nested types. */
+ private def reorderStructByName(
Review Comment:
reorderStructByName crashes when target struct has fields absent from source
Should we support this?
The same issue applies to MapType value reordering in reorderFieldsByName.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]