HyukjinKwon commented on a change in pull request #34025:
URL: https://github.com/apache/spark/pull/34025#discussion_r710728629
##########
File path:
sql/catalyst/src/main/scala/org/apache/spark/sql/types/StructType.scala
##########
@@ -559,52 +559,82 @@ object StructType extends AbstractDataType {
case _ => dt
}
+ /**
+ * This leverages `merge` to merge data types for UNION operator by
specializing
+ * the handling of struct types to follow UNION semantics.
+ */
+ private[sql] def unionLikeMerge(left: DataType, right: DataType): DataType =
+ mergeInternal(left, right, (s1: StructType, s2: StructType) => {
+ val leftFields = s1.fields
+ val rightFields = s2.fields
+ require(leftFields.size == rightFields.size, "To merge nullability, " +
+ "two structs must have same number of fields.")
+
+ val newFields = leftFields.zip(rightFields).map {
+ case (leftField@StructField(_, leftType, leftNullable, _),
+ _@StructField(_, rightType, rightNullable, _)) =>
+ leftField.copy(
+ dataType = unionLikeMerge(leftType, rightType),
+ nullable = leftNullable || rightNullable)
+ }.toSeq
+ StructType(newFields)
+ })
+
private[sql] def merge(left: DataType, right: DataType): DataType =
+ mergeInternal(left, right, (s1: StructType, s2: StructType) => {
+ val leftFields = s1.fields
+ val rightFields = s2.fields
+ val newFields = mutable.ArrayBuffer.empty[StructField]
+
+ val rightMapped = fieldsMap(rightFields)
+ leftFields.foreach {
+ case leftField @ StructField(leftName, leftType, leftNullable, _) =>
+ rightMapped.get(leftName)
+ .map { case rightField @ StructField(rightName, rightType,
rightNullable, _) =>
+ try {
+ leftField.copy(
+ dataType = merge(leftType, rightType),
+ nullable = leftNullable || rightNullable)
+ } catch {
+ case NonFatal(e) =>
+ throw
QueryExecutionErrors.failedMergingFieldsError(leftName, rightName, e)
+ }
+ }
+ .orElse {
+ Some(leftField)
+ }
+ .foreach(newFields += _)
+ }
+
+ val leftMapped = fieldsMap(leftFields)
+ rightFields
+ .filterNot(f => leftMapped.get(f.name).nonEmpty)
+ .foreach { f =>
+ newFields += f
+ }
+
+ StructType(newFields.toSeq)
+ })
+
+ private[sql] def mergeInternal(
Review comment:
nit:
```suggestion
private def mergeInternal(
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]