voonhous commented on code in PR #18065:
URL: https://github.com/apache/hudi/pull/18065#discussion_r3354522576
##########
hudi-client/hudi-spark-client/src/main/scala/org/apache/spark/sql/HoodieInternalRowUtils.scala:
##########
@@ -398,11 +399,47 @@ object HoodieInternalRowUtils {
(fieldUpdater, ordinal, value) =>
fieldUpdater.set(ordinal,
CatalystTypeConverters.convertToCatalyst(java.sql.Date.valueOf(value.toString)))
+ // Handle conversion from VariantType to variant struct representation
+ case (newStructType: StructType, _) if
sparkAdapter.isVariantType(prevDataType) &&
looksLikeVariantStruct(newStructType) =>
+ (fieldUpdater, ordinal, value) => {
+ if (value == null) {
+ fieldUpdater.setNullAt(ordinal)
+ } else {
+ val row = sparkAdapter.convertVariantToStruct(value, newStructType)
+ fieldUpdater.set(ordinal, row)
+ }
+ }
+
+ // Handle conversion from variant struct representation to VariantType
+ case (_, prevStructType: StructType) if
sparkAdapter.isVariantType(newDataType) &&
looksLikeVariantStruct(prevStructType) =>
+ (fieldUpdater, ordinal, value) => {
+ if (value == null) {
+ fieldUpdater.setNullAt(ordinal)
+ } else {
+ val row = value.asInstanceOf[InternalRow]
+ val variant = sparkAdapter.convertStructToVariant(row,
prevStructType)
+ fieldUpdater.set(ordinal, variant)
+ }
+ }
+
case (_, _) =>
throw new IllegalArgumentException(s"$prevDataType and $newDataType
are incompatible")
}
}
+ /**
+ * Checks if a StructType looks like a variant representation (has value and
metadata binary fields).
+ * This is a structural check that doesn't rely on metadata, useful during
schema reconciliation
+ * when toggling between shredded/unshredded formats or merging data with
different representations.
+ */
+ private def looksLikeVariantStruct(structType: StructType): Boolean = {
+ structType.fields.length >= 2 &&
Review Comment:
This was addressed in a later revision. The purely-structural
`looksLikeVariantStruct` heuristic has been removed - variant typing is now
driven by the `hudi_type=VARIANT` metadata annotation
(`HoodieSchema.TYPE_METADATA_FIELD`) in `HoodieSparkSchemaConverters`, rather
than by field shape. So a user-defined struct with `value`/`metadata` binary
fields but without the variant metadata annotation is no longer falsely
matched, which is exactly the more reliable metadata-based check you suggested.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]