This is an automated email from the ASF dual-hosted git repository.

zhengruifeng pushed a commit to branch branch-4.x
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/branch-4.x by this push:
     new dc02995290e4 [SPARK-56954][PYTHON] Annotate that we support MapType 
for from_json
dc02995290e4 is described below

commit dc02995290e4be0f27f581d799dd71625e3f8d5d
Author: Tian Gao <[email protected]>
AuthorDate: Thu May 21 10:49:14 2026 +0800

    [SPARK-56954][PYTHON] Annotate that we support MapType for from_json
    
    ### What changes were proposed in this pull request?
    
    * Add annotation for `from_json` that we support `MapType` as schema input.
    * Clean up the docs so it's consistent.
    * Add a test for `MapType`.
    
    ### Why are the changes needed?
    
    We already supported the feature, just need the annotation so it's clear to 
the users and also type checkers won't complaint.
    
    ### Does this PR introduce _any_ user-facing change?
    
    No.
    
    ### How was this patch tested?
    
    CI for both lint and new test.
    
    ### Was this patch authored or co-authored using generative AI tooling?
    
    No.
    
    Closes #55997 from gaogaotiantian/from-json-map-type.
    
    Authored-by: Tian Gao <[email protected]>
    Signed-off-by: Ruifeng Zheng <[email protected]>
    (cherry picked from commit 2ffd11dbf178cab85e53963e94246f4eb3caab81)
    Signed-off-by: Ruifeng Zheng <[email protected]>
---
 python/pyspark/sql/connect/functions/builtin.py           | 3 ++-
 python/pyspark/sql/functions/builtin.py                   | 7 ++++---
 python/pyspark/sql/tests/connect/test_connect_function.py | 2 ++
 python/pyspark/sql/tests/test_functions.py                | 1 +
 4 files changed, 9 insertions(+), 4 deletions(-)

diff --git a/python/pyspark/sql/connect/functions/builtin.py 
b/python/pyspark/sql/connect/functions/builtin.py
index 0ea0fe65a0ff..fd88faef1047 100644
--- a/python/pyspark/sql/connect/functions/builtin.py
+++ b/python/pyspark/sql/connect/functions/builtin.py
@@ -63,6 +63,7 @@ from pyspark.sql.types import (
     DataType,
     StructType,
     ArrayType,
+    MapType,
     StringType,
 )
 from pyspark.sql.utils import enum_to_value as _enum_to_value
@@ -1974,7 +1975,7 @@ from_csv.__doc__ = pysparkfuncs.from_csv.__doc__
 
 def from_json(
     col: "ColumnOrName",
-    schema: Union[ArrayType, StructType, Column, str],
+    schema: Union[ArrayType, StructType, MapType, Column, str],
     options: Optional[Mapping[str, str]] = None,
 ) -> Column:
     if isinstance(schema, (str, Column)):
diff --git a/python/pyspark/sql/functions/builtin.py 
b/python/pyspark/sql/functions/builtin.py
index eccb7d768e88..25a2eeb96494 100644
--- a/python/pyspark/sql/functions/builtin.py
+++ b/python/pyspark/sql/functions/builtin.py
@@ -49,6 +49,7 @@ from pyspark.sql.types import (
     DataType,
     StringType,
     StructType,
+    MapType,
     NumericType,
     _from_numpy_type,
 )
@@ -21044,7 +21045,7 @@ def json_tuple(col: "ColumnOrName", *fields: str) -> 
Column:
 @_try_remote_functions
 def from_json(
     col: "ColumnOrName",
-    schema: Union[ArrayType, StructType, Column, str],
+    schema: Union[ArrayType, StructType, MapType, Column, str],
     options: Optional[Mapping[str, str]] = None,
 ) -> Column:
     """
@@ -21061,8 +21062,8 @@ def from_json(
     ----------
     col : :class:`~pyspark.sql.Column` or str
         a column or column name in JSON format
-    schema : :class:`DataType` or str
-        a StructType, ArrayType of StructType or Python string literal with a 
DDL-formatted string
+    schema : :class:`StructType`, :class:`ArrayType`, :class:`MapType`, or str
+        a StructType, ArrayType of StructType, MapType, or Python string 
literal with a DDL-formatted string
         to use when parsing the json column
     options : dict, optional
         options to control parsing. accepts the same options as the json 
datasource.
diff --git a/python/pyspark/sql/tests/connect/test_connect_function.py 
b/python/pyspark/sql/tests/connect/test_connect_function.py
index ed653333b6c5..a2344064bb6f 100644
--- a/python/pyspark/sql/tests/connect/test_connect_function.py
+++ b/python/pyspark/sql/tests/connect/test_connect_function.py
@@ -26,6 +26,7 @@ from pyspark.sql.types import (
     StructField,
     ArrayType,
     IntegerType,
+    MapType,
 )
 from pyspark.testing import assertDataFrameEqual
 from pyspark.testing.pandasutils import PandasOnSparkTestUtils
@@ -1825,6 +1826,7 @@ class SparkConnectFunctionTests(ReusedMixedTestCase, 
PandasOnSparkTestUtils):
             "MAP<STRING,INT>",
             StructType([StructField("a", IntegerType())]),
             ArrayType(StructType([StructField("a", IntegerType())])),
+            MapType(StringType(), IntegerType()),
         ]:
             self.compare_by_show(
                 cdf.select(CF.from_json(cdf.a, schema)),
diff --git a/python/pyspark/sql/tests/test_functions.py 
b/python/pyspark/sql/tests/test_functions.py
index 978476d8e0b2..ceba0f03ae25 100644
--- a/python/pyspark/sql/tests/test_functions.py
+++ b/python/pyspark/sql/tests/test_functions.py
@@ -146,6 +146,7 @@ class FunctionsTestsMixin:
             "ByteType",  # should be imported from pyspark.sql.types
             "Column",  # should be imported from pyspark.sql
             "DataType",  # should be imported from pyspark.sql.types
+            "MapType",  # should be imported from pyspark.sql.types
             "NumericType",  # should be imported from pyspark.sql.types
             "PySparkTypeError",  # should be imported from pyspark.errors
             "PySparkValueError",  # should be imported from pyspark.errors


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to