This is an automated email from the ASF dual-hosted git repository.

gurwls223 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new fdcf975a2d11 [SPARK-48414][PYTHON] Fix breaking change in python's 
`fromJson`
fdcf975a2d11 is described below

commit fdcf975a2d11fe4f28219a5e85fc25164a8c8831
Author: Stefan Kandic <[email protected]>
AuthorDate: Wed Jul 24 10:43:38 2024 +0900

    [SPARK-48414][PYTHON] Fix breaking change in python's `fromJson`
    
    ### What changes were proposed in this pull request?
    
    Fix breaking change in `fromJson` method by having default param values.
    
    ### Why are the changes needed?
    
    In order to not break clients that don't care about collations.
    
    ### Does this PR introduce _any_ user-facing change?
    
    No.
    
    ### How was this patch tested?
    
    Existing UTs.
    
    ### Was this patch authored or co-authored using generative AI tooling?
    
    No.
    
    Closes #46737 from stefankandic/fromJsonBreakingChange.
    
    Authored-by: Stefan Kandic <[email protected]>
    Signed-off-by: Hyukjin Kwon <[email protected]>
---
 python/pyspark/sql/tests/test_types.py | 32 ++++++++++++++++++++++++++++++++
 python/pyspark/sql/types.py            | 18 +++++++++++-------
 2 files changed, 43 insertions(+), 7 deletions(-)

diff --git a/python/pyspark/sql/tests/test_types.py 
b/python/pyspark/sql/tests/test_types.py
index 0d0550e42161..dc974fae90fb 100644
--- a/python/pyspark/sql/tests/test_types.py
+++ b/python/pyspark/sql/tests/test_types.py
@@ -818,6 +818,38 @@ class TypesTestsMixin:
             PySparkTypeError, lambda: 
_parse_datatype_json_string(collations_in_nested_map_json)
         )
 
+    def test_array_type_from_json(self):
+        arrayWithoutCollations = ArrayType(StringType(), True)
+        arrayWithCollations = ArrayType(StringType("UNICODE"), True)
+        array_json = {"type": "array", "elementType": "string", 
"containsNull": True}
+        collationsMap = {"element": "UNICODE"}
+
+        self.assertEqual(arrayWithoutCollations, 
ArrayType.fromJson(array_json))
+        self.assertEqual(
+            arrayWithCollations,
+            ArrayType.fromJson(array_json, fieldPath="", 
collationsMap=collationsMap),
+        )
+        self.assertEqual(
+            arrayWithCollations, ArrayType.fromJson(array_json, 
collationsMap=collationsMap)
+        )
+
+    def test_map_type_from_json(self):
+        mapWithoutCollations = MapType(StringType(), StringType(), True)
+        mapWithCollations = MapType(StringType("UNICODE"), 
StringType("UNICODE"), True)
+        map_json = {
+            "type": "map",
+            "keyType": "string",
+            "valueType": "string",
+            "valueContainsNull": True,
+        }
+        collationsMap = {"key": "UNICODE", "value": "UNICODE"}
+
+        self.assertEqual(mapWithoutCollations, MapType.fromJson(map_json))
+        self.assertEqual(
+            mapWithCollations, MapType.fromJson(map_json, fieldPath="", 
collationsMap=collationsMap)
+        )
+        self.assertEqual(mapWithCollations, MapType.fromJson(map_json, 
collationsMap=collationsMap))
+
     def test_schema_with_bad_collations_provider(self):
         from pyspark.sql.types import _parse_datatype_json_string, 
_COLLATIONS_METADATA_KEY
 
diff --git a/python/pyspark/sql/types.py b/python/pyspark/sql/types.py
index 39763fe8d0a6..67728d4a5cdb 100644
--- a/python/pyspark/sql/types.py
+++ b/python/pyspark/sql/types.py
@@ -762,11 +762,13 @@ class ArrayType(DataType):
     def fromJson(
         cls,
         json: Dict[str, Any],
-        fieldPath: str,
-        collationsMap: Optional[Dict[str, str]],
+        fieldPath: str = "",
+        collationsMap: Optional[Dict[str, str]] = None,
     ) -> "ArrayType":
         elementType = _parse_datatype_json_value(
-            json["elementType"], fieldPath + ".element", collationsMap
+            json["elementType"],
+            "element" if fieldPath == "" else fieldPath + ".element",
+            collationsMap,
         )
         return ArrayType(elementType, json["containsNull"])
 
@@ -902,12 +904,14 @@ class MapType(DataType):
     def fromJson(
         cls,
         json: Dict[str, Any],
-        fieldPath: str,
-        collationsMap: Optional[Dict[str, str]],
+        fieldPath: str = "",
+        collationsMap: Optional[Dict[str, str]] = None,
     ) -> "MapType":
-        keyType = _parse_datatype_json_value(json["keyType"], fieldPath + 
".key", collationsMap)
+        keyType = _parse_datatype_json_value(
+            json["keyType"], "key" if fieldPath == "" else fieldPath + ".key", 
collationsMap
+        )
         valueType = _parse_datatype_json_value(
-            json["valueType"], fieldPath + ".value", collationsMap
+            json["valueType"], "value" if fieldPath == "" else fieldPath + 
".value", collationsMap
         )
         return MapType(
             keyType,


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to