This is an automated email from the ASF dual-hosted git repository.

gurwls223 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new 0219eb5984f [SPARK-45661][SQL][PYTHON] Add toNullable in StructType, 
MapType and ArrayType
0219eb5984f is described below

commit 0219eb5984f0f4a7209deb091b713ded10aebba3
Author: Hyukjin Kwon <gurwls...@apache.org>
AuthorDate: Thu Oct 26 09:30:59 2023 +0900

    [SPARK-45661][SQL][PYTHON] Add toNullable in StructType, MapType and 
ArrayType
    
    ### What changes were proposed in this pull request?
    
    This PR proposes to add:
    
    - `StructType.toNullable`
    - `MapType.toNullable`
    - `ArrayType.toNullable`
    
    that returns a nullable schema.
    
    ### Why are the changes needed?
    
    See 
https://stackoverflow.com/questions/33193958/change-nullable-property-of-column-in-spark-dataframe
 as an example.
    
    ### Does this PR introduce _any_ user-facing change?
    
    Yes, it adds new API in both Scala and Python:
    - `StructType.toNullable`
    - `MapType.toNullable`
    - `ArrayType.toNullable`
    
    ### How was this patch tested?
    
    For Scala, it just adds an alias.
    For Python side, doctests were added.
    
    ### Was this patch authored or co-authored using generative AI tooling?
    
    No.
    
    Closes #43523 from HyukjinKwon/SPARK-45661.
    
    Authored-by: Hyukjin Kwon <gurwls...@apache.org>
    Signed-off-by: Hyukjin Kwon <gurwls...@apache.org>
---
 python/pyspark/sql/types.py                        | 124 +++++++++++++++++++++
 .../org/apache/spark/sql/types/ArrayType.scala     |   8 ++
 .../scala/org/apache/spark/sql/types/MapType.scala |   8 ++
 .../org/apache/spark/sql/types/StructType.scala    |   8 ++
 4 files changed, 148 insertions(+)

diff --git a/python/pyspark/sql/types.py b/python/pyspark/sql/types.py
index 01db75b2500..d6862d7178a 100644
--- a/python/pyspark/sql/types.py
+++ b/python/pyspark/sql/types.py
@@ -139,6 +139,9 @@ class DataType:
         """
         return obj
 
+    def _as_nullable(self) -> "DataType":
+        return self
+
     @classmethod
     def fromDDL(cls, ddl: str) -> "DataType":
         """
@@ -593,6 +596,41 @@ class ArrayType(DataType):
     def simpleString(self) -> str:
         return "array<%s>" % self.elementType.simpleString()
 
+    def _as_nullable(self) -> "ArrayType":
+        return ArrayType(self.elementType._as_nullable(), containsNull=True)
+
+    def toNullable(self) -> "ArrayType":
+        """
+        Returns the same data type but set all nullability fields are true
+        (`StructField.nullable`, `ArrayType.containsNull`, and 
`MapType.valueContainsNull`).
+
+        .. versionadded:: 4.0.0
+
+        Returns
+        -------
+        :class:`ArrayType`
+
+        Examples
+        --------
+        Example 1: Simple nullability conversion
+
+        >>> ArrayType(IntegerType(), containsNull=False).toNullable()
+        ArrayType(IntegerType(), True)
+
+        Example 2: Nested nullability conversion
+
+        >>> ArrayType(
+        ...     StructType([
+        ...         StructField("b", IntegerType(), nullable=False),
+        ...         StructField("c", ArrayType(IntegerType(), 
containsNull=False))
+        ...     ]),
+        ...     containsNull=False
+        ... ).toNullable()
+        ArrayType(StructType([StructField('b', IntegerType(), True),
+        StructField('c', ArrayType(IntegerType(), True), True)]), True)
+        """
+        return self._as_nullable()
+
     def __repr__(self) -> str:
         return "ArrayType(%s, %s)" % (self.elementType, str(self.containsNull))
 
@@ -671,6 +709,44 @@ class MapType(DataType):
     def simpleString(self) -> str:
         return "map<%s,%s>" % (self.keyType.simpleString(), 
self.valueType.simpleString())
 
+    def _as_nullable(self) -> "MapType":
+        return MapType(
+            self.keyType._as_nullable(), self.valueType._as_nullable(), 
valueContainsNull=True
+        )
+
+    def toNullable(self) -> "MapType":
+        """
+        Returns the same data type but set all nullability fields are true
+        (`StructField.nullable`, `ArrayType.containsNull`, and 
`MapType.valueContainsNull`).
+
+        .. versionadded:: 4.0.0
+
+        Returns
+        -------
+        :class:`MapType`
+
+        Examples
+        --------
+        Example 1: Simple nullability conversion
+
+        >>> MapType(IntegerType(), StringType(), 
valueContainsNull=False).toNullable()
+        MapType(IntegerType(), StringType(), True)
+
+        Example 2: Nested nullability conversion
+
+        >>> MapType(
+        ...     StringType(),
+        ...     MapType(
+        ...         IntegerType(),
+        ...         ArrayType(IntegerType(), containsNull=False),
+        ...         valueContainsNull=False
+        ...     ),
+        ...     valueContainsNull=False
+        ... ).toNullable()
+        MapType(StringType(), MapType(IntegerType(), ArrayType(IntegerType(), 
True), True), True)
+        """
+        return self._as_nullable()
+
     def __repr__(self) -> str:
         return "MapType(%s, %s, %s)" % (self.keyType, self.valueType, 
str(self.valueContainsNull))
 
@@ -978,6 +1054,54 @@ class StructType(DataType):
     def simpleString(self) -> str:
         return "struct<%s>" % (",".join(f.simpleString() for f in self))
 
+    def _as_nullable(self) -> "StructType":
+        fields = []
+        for field in self.fields:
+            fields.append(
+                StructField(
+                    field.name,
+                    field.dataType._as_nullable(),
+                    nullable=True,
+                    metadata=field.metadata,
+                )
+            )
+        return StructType(fields)
+
+    def toNullable(self) -> "StructType":
+        """
+        Returns the same data type but set all nullability fields are true
+        (`StructField.nullable`, `ArrayType.containsNull`, and 
`MapType.valueContainsNull`).
+
+        .. versionadded:: 4.0.0
+
+        Returns
+        -------
+        :class:`StructType`
+
+        Examples
+        --------
+        Example 1: Simple nullability conversion
+
+        >>> StructType([StructField("a", IntegerType(), 
nullable=False)]).toNullable()
+        StructType([StructField('a', IntegerType(), True)])
+
+        Example 2: Nested nullability conversion
+
+        >>> StructType([
+        ...     StructField("a",
+        ...         StructType([
+        ...             StructField("b", IntegerType(), nullable=False),
+        ...             StructField("c", StructType([
+        ...                 StructField("d", IntegerType(), nullable=False)
+        ...             ]))
+        ...         ]),
+        ...         nullable=False)
+        ... ]).toNullable()
+        StructType([StructField('a', StructType([StructField('b', 
IntegerType(), True),
+        StructField('c', StructType([StructField('d', IntegerType(), True)]), 
True)]), True)])
+        """
+        return self._as_nullable()
+
     def __repr__(self) -> str:
         return "StructType([%s])" % ", ".join(str(field) for field in self)
 
diff --git a/sql/api/src/main/scala/org/apache/spark/sql/types/ArrayType.scala 
b/sql/api/src/main/scala/org/apache/spark/sql/types/ArrayType.scala
index a5226870097..e5af472d90e 100644
--- a/sql/api/src/main/scala/org/apache/spark/sql/types/ArrayType.scala
+++ b/sql/api/src/main/scala/org/apache/spark/sql/types/ArrayType.scala
@@ -96,6 +96,14 @@ case class ArrayType(elementType: DataType, containsNull: 
Boolean) extends DataT
   override private[spark] def asNullable: ArrayType =
     ArrayType(elementType.asNullable, containsNull = true)
 
+  /**
+   * Returns the same data type but set all nullability fields are true
+   * (`StructField.nullable`, `ArrayType.containsNull`, and 
`MapType.valueContainsNull`).
+   *
+   * @since 4.0.0
+   */
+  def toNullable: ArrayType = asNullable
+
   override private[spark] def existsRecursively(f: (DataType) => Boolean): 
Boolean = {
     f(this) || elementType.existsRecursively(f)
   }
diff --git a/sql/api/src/main/scala/org/apache/spark/sql/types/MapType.scala 
b/sql/api/src/main/scala/org/apache/spark/sql/types/MapType.scala
index ce0c76dbe4f..dba870466fc 100644
--- a/sql/api/src/main/scala/org/apache/spark/sql/types/MapType.scala
+++ b/sql/api/src/main/scala/org/apache/spark/sql/types/MapType.scala
@@ -76,6 +76,14 @@ case class MapType(
   override private[spark] def asNullable: MapType =
     MapType(keyType.asNullable, valueType.asNullable, valueContainsNull = true)
 
+  /**
+   * Returns the same data type but set all nullability fields are true
+   * (`StructField.nullable`, `ArrayType.containsNull`, and 
`MapType.valueContainsNull`).
+   *
+   * @since 4.0.0
+   */
+  def toNullable: MapType = asNullable
+
   override private[spark] def existsRecursively(f: (DataType) => Boolean): 
Boolean = {
     f(this) || keyType.existsRecursively(f) || valueType.existsRecursively(f)
   }
diff --git a/sql/api/src/main/scala/org/apache/spark/sql/types/StructType.scala 
b/sql/api/src/main/scala/org/apache/spark/sql/types/StructType.scala
index f1771d933bb..5fe6b0a5f00 100644
--- a/sql/api/src/main/scala/org/apache/spark/sql/types/StructType.scala
+++ b/sql/api/src/main/scala/org/apache/spark/sql/types/StructType.scala
@@ -490,6 +490,14 @@ case class StructType(fields: Array[StructField]) extends 
DataType with Seq[Stru
     StructType(newFields)
   }
 
+  /**
+   * Returns the same data type but set all nullability fields are true
+   * (`StructField.nullable`, `ArrayType.containsNull`, and 
`MapType.valueContainsNull`).
+   *
+   * @since 4.0.0
+   */
+  def toNullable: StructType = asNullable
+
   override private[spark] def existsRecursively(f: (DataType) => Boolean): 
Boolean = {
     f(this) || fields.exists(field => field.dataType.existsRecursively(f))
   }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

Reply via email to