This is an automated email from the ASF dual-hosted git repository.
ruifengz pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new eaa3378e3453 [SPARK-55366][SQL][PYTHON][FOLLOW-UP] Relax the
duplicated field name check
eaa3378e3453 is described below
commit eaa3378e3453c73e9303e7eee18625f0dd3eb053
Author: Ruifeng Zheng <[email protected]>
AuthorDate: Mon Feb 9 09:07:35 2026 +0800
[SPARK-55366][SQL][PYTHON][FOLLOW-UP] Relax the duplicated field name check
### What changes were proposed in this pull request?
Relax the duplicated field name check
### Why are the changes needed?
I just noticed that the `failDuplicatedFieldNames(dt: DataType)` introduced
in https://github.com/apache/spark/pull/54153 is more stricter than
`toArrowSchema` with `errorOnDuplicatedFieldNames` because the first level is
not checked.
Respects original logic to avoid unexpected changes
### Does this PR introduce _any_ user-facing change?
no
### How was this patch tested?
ci
### Was this patch authored or co-authored using generative AI tooling?
no
Closes #54205 from zhengruifeng/fix_duplicated_schema.
Authored-by: Ruifeng Zheng <[email protected]>
Signed-off-by: Ruifeng Zheng <[email protected]>
---
.../scala/org/apache/spark/sql/util/ArrowUtils.scala | 20 ++++++++++++++------
1 file changed, 14 insertions(+), 6 deletions(-)
diff --git a/sql/api/src/main/scala/org/apache/spark/sql/util/ArrowUtils.scala
b/sql/api/src/main/scala/org/apache/spark/sql/util/ArrowUtils.scala
index e6fa93af64de..92b52d4ae634 100644
--- a/sql/api/src/main/scala/org/apache/spark/sql/util/ArrowUtils.scala
+++ b/sql/api/src/main/scala/org/apache/spark/sql/util/ArrowUtils.scala
@@ -339,24 +339,32 @@ private[sql] object ArrowUtils {
}.asJava)
}
+ /**
+ * Check the schema and fail once an insider struct type contains duplicated
field names. Note
+ * that the first level accepts duplicated names.
+ */
+ def failDuplicatedFieldNames(schema: StructType): Unit = {
+ schema.fields.foreach { field =>
failDuplicatedFieldNamesImpl(field.dataType) }
+ }
+
/**
* Check the schema and fail once a struct type contains duplicated field
names.
*/
- def failDuplicatedFieldNames(dt: DataType): Unit = {
+ private def failDuplicatedFieldNamesImpl(dt: DataType): Unit = {
dt match {
case st: StructType =>
if (st.names.toSet.size != st.names.length) {
throw ExecutionErrors.duplicatedFieldNameInArrowStructError(
st.names.toImmutableArraySeq)
}
- st.fields.foreach { field => failDuplicatedFieldNames(field.dataType) }
+ st.fields.foreach { field =>
failDuplicatedFieldNamesImpl(field.dataType) }
case arr: ArrayType =>
- failDuplicatedFieldNames(arr.elementType)
+ failDuplicatedFieldNamesImpl(arr.elementType)
case map: MapType =>
- failDuplicatedFieldNames(map.keyType)
- failDuplicatedFieldNames(map.valueType)
+ failDuplicatedFieldNamesImpl(map.keyType)
+ failDuplicatedFieldNamesImpl(map.valueType)
case udt: UserDefinedType[_] =>
- failDuplicatedFieldNames(udt.sqlType)
+ failDuplicatedFieldNamesImpl(udt.sqlType)
case _ =>
}
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]