This is an automated email from the ASF dual-hosted git repository.

ruifengz pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new fa66d28f0e34 [SPARK-55465][PYTHON] Support GeometryType in 
convert_numpy
fa66d28f0e34 is described below

commit fa66d28f0e34342b72706210d685180c084938d9
Author: Fangchen Li <[email protected]>
AuthorDate: Thu Mar 5 15:26:44 2026 +0800

    [SPARK-55465][PYTHON] Support GeometryType in convert_numpy
    
    ### What changes were proposed in this pull request?
    
    Support GeometryType in convert_numpy
    
    ### Why are the changes needed?
    
    Part of the new arrow-to-pandas-converter
    
    ### Does this PR introduce _any_ user-facing change?
    
    No.
    
    ### How was this patch tested?
    
    Unittests added.
    
    ### Was this patch authored or co-authored using generative AI tooling?
    
    Generated-by: Claude Opus 4.6
    
    Closes #54612 from fangchenli/SPARK-55465-convert-numpy-geometry.
    
    Authored-by: Fangchen Li <[email protected]>
    Signed-off-by: Ruifeng Zheng <[email protected]>
---
 python/pyspark/sql/conversion.py            |  7 ++++-
 python/pyspark/sql/tests/test_conversion.py | 40 +++++++++++++++++++++++++++++
 2 files changed, 46 insertions(+), 1 deletion(-)

diff --git a/python/pyspark/sql/conversion.py b/python/pyspark/sql/conversion.py
index aa22594585bb..7e6287fce07e 100644
--- a/python/pyspark/sql/conversion.py
+++ b/python/pyspark/sql/conversion.py
@@ -1607,6 +1607,7 @@ class ArrowArrayToPandasConversion:
             UserDefinedType,
             VariantType,
             GeographyType,
+            GeometryType,
         )
         if df_for_struct and isinstance(spark_type, StructType):
             return all(isinstance(f.dataType, supported_types) for f in 
spark_type.fields)
@@ -1725,13 +1726,17 @@ class ArrowArrayToPandasConversion:
             series = series.map(
                 lambda v: Geography.fromWKB(v["wkb"], v["srid"]) if v is not 
None else None
             )
+        elif isinstance(spark_type, GeometryType):
+            series = arr.to_pandas()
+            series = series.map(
+                lambda v: Geometry.fromWKB(v["wkb"], v["srid"]) if v is not 
None else None
+            )
         # elif isinstance(
         #     spark_type,
         #     (
         #         ArrayType,
         #         MapType,
         #         StructType,
-        #         GeometryType,
         #     ),
         # ):
         # TODO(SPARK-55324): Support complex types
diff --git a/python/pyspark/sql/tests/test_conversion.py 
b/python/pyspark/sql/tests/test_conversion.py
index 04c22fb31ae9..261b81a407b5 100644
--- a/python/pyspark/sql/tests/test_conversion.py
+++ b/python/pyspark/sql/tests/test_conversion.py
@@ -34,6 +34,7 @@ from pyspark.sql.types import (
     DoubleType,
     Geography,
     GeographyType,
+    Geometry,
     GeometryType,
     IntegerType,
     LongType,
@@ -696,6 +697,45 @@ class ArrowArrayToPandasConversionTests(unittest.TestCase):
         )
         self.assertEqual(len(result), 0)
 
+    def test_geometry_convert_numpy(self):
+        import pyarrow as pa
+
+        geometry_type = pa.struct(
+            [
+                pa.field("srid", pa.int32(), nullable=False),
+                pa.field(
+                    "wkb",
+                    pa.binary(),
+                    nullable=False,
+                    metadata={b"geometry": b"true", b"srid": b"0"},
+                ),
+            ]
+        )
+
+        # basic conversion with nulls
+        # POINT(1.0, 2.0) and POINT(17.0, 7.0) in WKB format
+        wkb1 = bytes.fromhex("0101000000000000000000F03F0000000000000040")
+        wkb2 = bytes.fromhex("010100000000000000000031400000000000001c40")
+        arr = pa.array(
+            [
+                {"srid": 0, "wkb": wkb1},
+                None,
+                {"srid": 0, "wkb": wkb2},
+            ],
+            type=geometry_type,
+        )
+        result = ArrowArrayToPandasConversion.convert_numpy(arr, 
GeometryType(0), ser_name="g")
+        self.assertEqual(result.iloc[0], Geometry(wkb1, 0))
+        self.assertIsNone(result.iloc[1])
+        self.assertEqual(result.iloc[2], Geometry(wkb2, 0))
+        self.assertEqual(result.name, "g")
+
+        # empty
+        result = ArrowArrayToPandasConversion.convert_numpy(
+            pa.array([], type=geometry_type), GeometryType(0)
+        )
+        self.assertEqual(len(result), 0)
+
 
 if __name__ == "__main__":
     from pyspark.testing import main


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to