This is an automated email from the ASF dual-hosted git repository.

wenchen pushed a commit to branch branch-4.1
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/branch-4.1 by this push:
     new d548120eda7b [SPARK-54175][GEO][CONNECT] Add Geography and Geometry 
types to Spark Connect proto
d548120eda7b is described below

commit d548120eda7b0947cb47e22b93fb457a010eabf4
Author: Uros Bojanic <[email protected]>
AuthorDate: Tue Nov 4 14:15:54 2025 -0800

    [SPARK-54175][GEO][CONNECT] Add Geography and Geometry types to Spark 
Connect proto
    
    ### What changes were proposed in this pull request?
    Add `Geography` and `Geometry` types to Spark Connect proto.
    
    ### Why are the changes needed?
    Enabling geospatial types in Spark Connect.
    
    ### Does this PR introduce _any_ user-facing change?
    No.
    
    ### How was this patch tested?
    Regenerated the corresponding Spark Connect proto files.
    
    ### Was this patch authored or co-authored using generative AI tooling?
    No.
    
    Closes #52872 from uros-db/geo-spark-connect-proto.
    
    Authored-by: Uros Bojanic <[email protected]>
    Signed-off-by: Wenchen Fan <[email protected]>
    (cherry picked from commit 32060bf99fc834e197f3e39d8530cd97a39df4ff)
    Signed-off-by: Wenchen Fan <[email protected]>
---
 python/pyspark/sql/connect/proto/types_pb2.py      | 116 +++++++++++----------
 python/pyspark/sql/connect/proto/types_pb2.pyi     |  59 +++++++++++
 .../src/main/protobuf/spark/connect/types.proto    |  18 +++-
 3 files changed, 134 insertions(+), 59 deletions(-)

diff --git a/python/pyspark/sql/connect/proto/types_pb2.py 
b/python/pyspark/sql/connect/proto/types_pb2.py
index 9a52129103ad..4e35f6b8911a 100644
--- a/python/pyspark/sql/connect/proto/types_pb2.py
+++ b/python/pyspark/sql/connect/proto/types_pb2.py
@@ -35,7 +35,7 @@ _sym_db = _symbol_database.Default()
 
 
 DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(
-    
b"\n\x19spark/connect/types.proto\x12\rspark.connect\"\xac#\n\x08\x44\x61taType\x12\x32\n\x04null\x18\x01
 
\x01(\x0b\x32\x1c.spark.connect.DataType.NULLH\x00R\x04null\x12\x38\n\x06\x62inary\x18\x02
 
\x01(\x0b\x32\x1e.spark.connect.DataType.BinaryH\x00R\x06\x62inary\x12;\n\x07\x62oolean\x18\x03
 
\x01(\x0b\x32\x1f.spark.connect.DataType.BooleanH\x00R\x07\x62oolean\x12\x32\n\x04\x62yte\x18\x04
 
\x01(\x0b\x32\x1c.spark.connect.DataType.ByteH\x00R\x04\x62yte\x12\x35\n\x05short\x18\x05
 \x01(\x [...]
+    
b"\n\x19spark/connect/types.proto\x12\rspark.connect\"\xd8%\n\x08\x44\x61taType\x12\x32\n\x04null\x18\x01
 
\x01(\x0b\x32\x1c.spark.connect.DataType.NULLH\x00R\x04null\x12\x38\n\x06\x62inary\x18\x02
 
\x01(\x0b\x32\x1e.spark.connect.DataType.BinaryH\x00R\x06\x62inary\x12;\n\x07\x62oolean\x18\x03
 
\x01(\x0b\x32\x1f.spark.connect.DataType.BooleanH\x00R\x07\x62oolean\x12\x32\n\x04\x62yte\x18\x04
 
\x01(\x0b\x32\x1c.spark.connect.DataType.ByteH\x00R\x04\x62yte\x12\x35\n\x05short\x18\x05
 \x01(\x [...]
 )
 
 _globals = globals()
@@ -47,59 +47,63 @@ if not _descriptor._USE_C_DESCRIPTORS:
         "DESCRIPTOR"
     ]._serialized_options = 
b"\n\036org.apache.spark.connect.protoP\001Z\022internal/generated"
     _globals["_DATATYPE"]._serialized_start = 45
-    _globals["_DATATYPE"]._serialized_end = 4569
-    _globals["_DATATYPE_BOOLEAN"]._serialized_start = 1647
-    _globals["_DATATYPE_BOOLEAN"]._serialized_end = 1714
-    _globals["_DATATYPE_BYTE"]._serialized_start = 1716
-    _globals["_DATATYPE_BYTE"]._serialized_end = 1780
-    _globals["_DATATYPE_SHORT"]._serialized_start = 1782
-    _globals["_DATATYPE_SHORT"]._serialized_end = 1847
-    _globals["_DATATYPE_INTEGER"]._serialized_start = 1849
-    _globals["_DATATYPE_INTEGER"]._serialized_end = 1916
-    _globals["_DATATYPE_LONG"]._serialized_start = 1918
-    _globals["_DATATYPE_LONG"]._serialized_end = 1982
-    _globals["_DATATYPE_FLOAT"]._serialized_start = 1984
-    _globals["_DATATYPE_FLOAT"]._serialized_end = 2049
-    _globals["_DATATYPE_DOUBLE"]._serialized_start = 2051
-    _globals["_DATATYPE_DOUBLE"]._serialized_end = 2117
-    _globals["_DATATYPE_STRING"]._serialized_start = 2119
-    _globals["_DATATYPE_STRING"]._serialized_end = 2215
-    _globals["_DATATYPE_BINARY"]._serialized_start = 2217
-    _globals["_DATATYPE_BINARY"]._serialized_end = 2283
-    _globals["_DATATYPE_NULL"]._serialized_start = 2285
-    _globals["_DATATYPE_NULL"]._serialized_end = 2349
-    _globals["_DATATYPE_TIMESTAMP"]._serialized_start = 2351
-    _globals["_DATATYPE_TIMESTAMP"]._serialized_end = 2420
-    _globals["_DATATYPE_DATE"]._serialized_start = 2422
-    _globals["_DATATYPE_DATE"]._serialized_end = 2486
-    _globals["_DATATYPE_TIMESTAMPNTZ"]._serialized_start = 2488
-    _globals["_DATATYPE_TIMESTAMPNTZ"]._serialized_end = 2560
-    _globals["_DATATYPE_TIME"]._serialized_start = 2562
-    _globals["_DATATYPE_TIME"]._serialized_end = 2675
-    _globals["_DATATYPE_CALENDARINTERVAL"]._serialized_start = 2677
-    _globals["_DATATYPE_CALENDARINTERVAL"]._serialized_end = 2753
-    _globals["_DATATYPE_YEARMONTHINTERVAL"]._serialized_start = 2756
-    _globals["_DATATYPE_YEARMONTHINTERVAL"]._serialized_end = 2935
-    _globals["_DATATYPE_DAYTIMEINTERVAL"]._serialized_start = 2938
-    _globals["_DATATYPE_DAYTIMEINTERVAL"]._serialized_end = 3115
-    _globals["_DATATYPE_CHAR"]._serialized_start = 3117
-    _globals["_DATATYPE_CHAR"]._serialized_end = 3205
-    _globals["_DATATYPE_VARCHAR"]._serialized_start = 3207
-    _globals["_DATATYPE_VARCHAR"]._serialized_end = 3298
-    _globals["_DATATYPE_DECIMAL"]._serialized_start = 3301
-    _globals["_DATATYPE_DECIMAL"]._serialized_end = 3454
-    _globals["_DATATYPE_STRUCTFIELD"]._serialized_start = 3457
-    _globals["_DATATYPE_STRUCTFIELD"]._serialized_end = 3618
-    _globals["_DATATYPE_STRUCT"]._serialized_start = 3620
-    _globals["_DATATYPE_STRUCT"]._serialized_end = 3747
-    _globals["_DATATYPE_ARRAY"]._serialized_start = 3750
-    _globals["_DATATYPE_ARRAY"]._serialized_end = 3912
-    _globals["_DATATYPE_MAP"]._serialized_start = 3915
-    _globals["_DATATYPE_MAP"]._serialized_end = 4134
-    _globals["_DATATYPE_VARIANT"]._serialized_start = 4136
-    _globals["_DATATYPE_VARIANT"]._serialized_end = 4203
-    _globals["_DATATYPE_UDT"]._serialized_start = 4206
-    _globals["_DATATYPE_UDT"]._serialized_end = 4495
-    _globals["_DATATYPE_UNPARSED"]._serialized_start = 4497
-    _globals["_DATATYPE_UNPARSED"]._serialized_end = 4549
+    _globals["_DATATYPE"]._serialized_end = 4869
+    _globals["_DATATYPE_BOOLEAN"]._serialized_start = 1778
+    _globals["_DATATYPE_BOOLEAN"]._serialized_end = 1845
+    _globals["_DATATYPE_BYTE"]._serialized_start = 1847
+    _globals["_DATATYPE_BYTE"]._serialized_end = 1911
+    _globals["_DATATYPE_SHORT"]._serialized_start = 1913
+    _globals["_DATATYPE_SHORT"]._serialized_end = 1978
+    _globals["_DATATYPE_INTEGER"]._serialized_start = 1980
+    _globals["_DATATYPE_INTEGER"]._serialized_end = 2047
+    _globals["_DATATYPE_LONG"]._serialized_start = 2049
+    _globals["_DATATYPE_LONG"]._serialized_end = 2113
+    _globals["_DATATYPE_FLOAT"]._serialized_start = 2115
+    _globals["_DATATYPE_FLOAT"]._serialized_end = 2180
+    _globals["_DATATYPE_DOUBLE"]._serialized_start = 2182
+    _globals["_DATATYPE_DOUBLE"]._serialized_end = 2248
+    _globals["_DATATYPE_STRING"]._serialized_start = 2250
+    _globals["_DATATYPE_STRING"]._serialized_end = 2346
+    _globals["_DATATYPE_BINARY"]._serialized_start = 2348
+    _globals["_DATATYPE_BINARY"]._serialized_end = 2414
+    _globals["_DATATYPE_NULL"]._serialized_start = 2416
+    _globals["_DATATYPE_NULL"]._serialized_end = 2480
+    _globals["_DATATYPE_TIMESTAMP"]._serialized_start = 2482
+    _globals["_DATATYPE_TIMESTAMP"]._serialized_end = 2551
+    _globals["_DATATYPE_DATE"]._serialized_start = 2553
+    _globals["_DATATYPE_DATE"]._serialized_end = 2617
+    _globals["_DATATYPE_TIMESTAMPNTZ"]._serialized_start = 2619
+    _globals["_DATATYPE_TIMESTAMPNTZ"]._serialized_end = 2691
+    _globals["_DATATYPE_TIME"]._serialized_start = 2693
+    _globals["_DATATYPE_TIME"]._serialized_end = 2806
+    _globals["_DATATYPE_CALENDARINTERVAL"]._serialized_start = 2808
+    _globals["_DATATYPE_CALENDARINTERVAL"]._serialized_end = 2884
+    _globals["_DATATYPE_YEARMONTHINTERVAL"]._serialized_start = 2887
+    _globals["_DATATYPE_YEARMONTHINTERVAL"]._serialized_end = 3066
+    _globals["_DATATYPE_DAYTIMEINTERVAL"]._serialized_start = 3069
+    _globals["_DATATYPE_DAYTIMEINTERVAL"]._serialized_end = 3246
+    _globals["_DATATYPE_CHAR"]._serialized_start = 3248
+    _globals["_DATATYPE_CHAR"]._serialized_end = 3336
+    _globals["_DATATYPE_VARCHAR"]._serialized_start = 3338
+    _globals["_DATATYPE_VARCHAR"]._serialized_end = 3429
+    _globals["_DATATYPE_DECIMAL"]._serialized_start = 3432
+    _globals["_DATATYPE_DECIMAL"]._serialized_end = 3585
+    _globals["_DATATYPE_STRUCTFIELD"]._serialized_start = 3588
+    _globals["_DATATYPE_STRUCTFIELD"]._serialized_end = 3749
+    _globals["_DATATYPE_STRUCT"]._serialized_start = 3751
+    _globals["_DATATYPE_STRUCT"]._serialized_end = 3878
+    _globals["_DATATYPE_ARRAY"]._serialized_start = 3881
+    _globals["_DATATYPE_ARRAY"]._serialized_end = 4043
+    _globals["_DATATYPE_MAP"]._serialized_start = 4046
+    _globals["_DATATYPE_MAP"]._serialized_end = 4265
+    _globals["_DATATYPE_GEOMETRY"]._serialized_start = 4267
+    _globals["_DATATYPE_GEOMETRY"]._serialized_end = 4355
+    _globals["_DATATYPE_GEOGRAPHY"]._serialized_start = 4357
+    _globals["_DATATYPE_GEOGRAPHY"]._serialized_end = 4446
+    _globals["_DATATYPE_VARIANT"]._serialized_start = 4448
+    _globals["_DATATYPE_VARIANT"]._serialized_end = 4515
+    _globals["_DATATYPE_UDT"]._serialized_start = 4518
+    _globals["_DATATYPE_UDT"]._serialized_end = 4807
+    _globals["_DATATYPE_UNPARSED"]._serialized_start = 4809
+    _globals["_DATATYPE_UNPARSED"]._serialized_end = 4861
 # @@protoc_insertion_point(module_scope)
diff --git a/python/pyspark/sql/connect/proto/types_pb2.pyi 
b/python/pyspark/sql/connect/proto/types_pb2.pyi
index d46770c4f888..3f625890a809 100644
--- a/python/pyspark/sql/connect/proto/types_pb2.pyi
+++ b/python/pyspark/sql/connect/proto/types_pb2.pyi
@@ -674,6 +674,46 @@ class DataType(google.protobuf.message.Message):
             ],
         ) -> None: ...
 
+    class Geometry(google.protobuf.message.Message):
+        DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+        SRID_FIELD_NUMBER: builtins.int
+        TYPE_VARIATION_REFERENCE_FIELD_NUMBER: builtins.int
+        srid: builtins.int
+        type_variation_reference: builtins.int
+        def __init__(
+            self,
+            *,
+            srid: builtins.int = ...,
+            type_variation_reference: builtins.int = ...,
+        ) -> None: ...
+        def ClearField(
+            self,
+            field_name: typing_extensions.Literal[
+                "srid", b"srid", "type_variation_reference", 
b"type_variation_reference"
+            ],
+        ) -> None: ...
+
+    class Geography(google.protobuf.message.Message):
+        DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+        SRID_FIELD_NUMBER: builtins.int
+        TYPE_VARIATION_REFERENCE_FIELD_NUMBER: builtins.int
+        srid: builtins.int
+        type_variation_reference: builtins.int
+        def __init__(
+            self,
+            *,
+            srid: builtins.int = ...,
+            type_variation_reference: builtins.int = ...,
+        ) -> None: ...
+        def ClearField(
+            self,
+            field_name: typing_extensions.Literal[
+                "srid", b"srid", "type_variation_reference", 
b"type_variation_reference"
+            ],
+        ) -> None: ...
+
     class Variant(google.protobuf.message.Message):
         DESCRIPTOR: google.protobuf.descriptor.Descriptor
 
@@ -821,6 +861,8 @@ class DataType(google.protobuf.message.Message):
     MAP_FIELD_NUMBER: builtins.int
     VARIANT_FIELD_NUMBER: builtins.int
     UDT_FIELD_NUMBER: builtins.int
+    GEOMETRY_FIELD_NUMBER: builtins.int
+    GEOGRAPHY_FIELD_NUMBER: builtins.int
     UNPARSED_FIELD_NUMBER: builtins.int
     TIME_FIELD_NUMBER: builtins.int
     @property
@@ -878,6 +920,11 @@ class DataType(google.protobuf.message.Message):
     def udt(self) -> global___DataType.UDT:
         """UserDefinedType"""
     @property
+    def geometry(self) -> global___DataType.Geometry:
+        """Geospatial types"""
+    @property
+    def geography(self) -> global___DataType.Geography: ...
+    @property
     def unparsed(self) -> global___DataType.Unparsed:
         """UnparsedDataType"""
     @property
@@ -909,6 +956,8 @@ class DataType(google.protobuf.message.Message):
         map: global___DataType.Map | None = ...,
         variant: global___DataType.Variant | None = ...,
         udt: global___DataType.UDT | None = ...,
+        geometry: global___DataType.Geometry | None = ...,
+        geography: global___DataType.Geography | None = ...,
         unparsed: global___DataType.Unparsed | None = ...,
         time: global___DataType.Time | None = ...,
     ) -> None: ...
@@ -937,6 +986,10 @@ class DataType(google.protobuf.message.Message):
             b"double",
             "float",
             b"float",
+            "geography",
+            b"geography",
+            "geometry",
+            b"geometry",
             "integer",
             b"integer",
             "kind",
@@ -996,6 +1049,10 @@ class DataType(google.protobuf.message.Message):
             b"double",
             "float",
             b"float",
+            "geography",
+            b"geography",
+            "geometry",
+            b"geometry",
             "integer",
             b"integer",
             "kind",
@@ -1058,6 +1115,8 @@ class DataType(google.protobuf.message.Message):
             "map",
             "variant",
             "udt",
+            "geometry",
+            "geography",
             "unparsed",
             "time",
         ]
diff --git a/sql/connect/common/src/main/protobuf/spark/connect/types.proto 
b/sql/connect/common/src/main/protobuf/spark/connect/types.proto
index 1800e3885774..caaa2340f95d 100644
--- a/sql/connect/common/src/main/protobuf/spark/connect/types.proto
+++ b/sql/connect/common/src/main/protobuf/spark/connect/types.proto
@@ -67,15 +67,17 @@ message DataType {
     // UserDefinedType
     UDT udt = 23;
 
+    // Geospatial types
+    Geometry geometry = 26;
+
+    Geography geography = 27;
+
     // UnparsedDataType
     Unparsed unparsed = 24;
 
     Time time = 28;
   }
 
-  // Reserved for geometry and geography types
-  reserved 26, 27;
-
   message Boolean {
     uint32 type_variation_reference = 1;
   }
@@ -192,6 +194,16 @@ message DataType {
     uint32 type_variation_reference = 4;
   }
 
+  message Geometry {
+    int32 srid = 1;
+    uint32 type_variation_reference = 2;
+  }
+
+  message Geography {
+    int32 srid = 1;
+    uint32 type_variation_reference = 2;
+  }
+
   message Variant {
     uint32 type_variation_reference = 1;
   }


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to