This is an automated email from the ASF dual-hosted git repository.
wenchen pushed a commit to branch branch-4.1
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-4.1 by this push:
new d548120eda7b [SPARK-54175][GEO][CONNECT] Add Geography and Geometry
types to Spark Connect proto
d548120eda7b is described below
commit d548120eda7b0947cb47e22b93fb457a010eabf4
Author: Uros Bojanic <[email protected]>
AuthorDate: Tue Nov 4 14:15:54 2025 -0800
[SPARK-54175][GEO][CONNECT] Add Geography and Geometry types to Spark
Connect proto
### What changes were proposed in this pull request?
Add `Geography` and `Geometry` types to Spark Connect proto.
### Why are the changes needed?
Enabling geospatial types in Spark Connect.
### Does this PR introduce _any_ user-facing change?
No.
### How was this patch tested?
Regenerated the corresponding Spark Connect proto files.
### Was this patch authored or co-authored using generative AI tooling?
No.
Closes #52872 from uros-db/geo-spark-connect-proto.
Authored-by: Uros Bojanic <[email protected]>
Signed-off-by: Wenchen Fan <[email protected]>
(cherry picked from commit 32060bf99fc834e197f3e39d8530cd97a39df4ff)
Signed-off-by: Wenchen Fan <[email protected]>
---
python/pyspark/sql/connect/proto/types_pb2.py | 116 +++++++++++----------
python/pyspark/sql/connect/proto/types_pb2.pyi | 59 +++++++++++
.../src/main/protobuf/spark/connect/types.proto | 18 +++-
3 files changed, 134 insertions(+), 59 deletions(-)
diff --git a/python/pyspark/sql/connect/proto/types_pb2.py
b/python/pyspark/sql/connect/proto/types_pb2.py
index 9a52129103ad..4e35f6b8911a 100644
--- a/python/pyspark/sql/connect/proto/types_pb2.py
+++ b/python/pyspark/sql/connect/proto/types_pb2.py
@@ -35,7 +35,7 @@ _sym_db = _symbol_database.Default()
DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(
-
b"\n\x19spark/connect/types.proto\x12\rspark.connect\"\xac#\n\x08\x44\x61taType\x12\x32\n\x04null\x18\x01
\x01(\x0b\x32\x1c.spark.connect.DataType.NULLH\x00R\x04null\x12\x38\n\x06\x62inary\x18\x02
\x01(\x0b\x32\x1e.spark.connect.DataType.BinaryH\x00R\x06\x62inary\x12;\n\x07\x62oolean\x18\x03
\x01(\x0b\x32\x1f.spark.connect.DataType.BooleanH\x00R\x07\x62oolean\x12\x32\n\x04\x62yte\x18\x04
\x01(\x0b\x32\x1c.spark.connect.DataType.ByteH\x00R\x04\x62yte\x12\x35\n\x05short\x18\x05
\x01(\x [...]
+
b"\n\x19spark/connect/types.proto\x12\rspark.connect\"\xd8%\n\x08\x44\x61taType\x12\x32\n\x04null\x18\x01
\x01(\x0b\x32\x1c.spark.connect.DataType.NULLH\x00R\x04null\x12\x38\n\x06\x62inary\x18\x02
\x01(\x0b\x32\x1e.spark.connect.DataType.BinaryH\x00R\x06\x62inary\x12;\n\x07\x62oolean\x18\x03
\x01(\x0b\x32\x1f.spark.connect.DataType.BooleanH\x00R\x07\x62oolean\x12\x32\n\x04\x62yte\x18\x04
\x01(\x0b\x32\x1c.spark.connect.DataType.ByteH\x00R\x04\x62yte\x12\x35\n\x05short\x18\x05
\x01(\x [...]
)
_globals = globals()
@@ -47,59 +47,63 @@ if not _descriptor._USE_C_DESCRIPTORS:
"DESCRIPTOR"
]._serialized_options =
b"\n\036org.apache.spark.connect.protoP\001Z\022internal/generated"
_globals["_DATATYPE"]._serialized_start = 45
- _globals["_DATATYPE"]._serialized_end = 4569
- _globals["_DATATYPE_BOOLEAN"]._serialized_start = 1647
- _globals["_DATATYPE_BOOLEAN"]._serialized_end = 1714
- _globals["_DATATYPE_BYTE"]._serialized_start = 1716
- _globals["_DATATYPE_BYTE"]._serialized_end = 1780
- _globals["_DATATYPE_SHORT"]._serialized_start = 1782
- _globals["_DATATYPE_SHORT"]._serialized_end = 1847
- _globals["_DATATYPE_INTEGER"]._serialized_start = 1849
- _globals["_DATATYPE_INTEGER"]._serialized_end = 1916
- _globals["_DATATYPE_LONG"]._serialized_start = 1918
- _globals["_DATATYPE_LONG"]._serialized_end = 1982
- _globals["_DATATYPE_FLOAT"]._serialized_start = 1984
- _globals["_DATATYPE_FLOAT"]._serialized_end = 2049
- _globals["_DATATYPE_DOUBLE"]._serialized_start = 2051
- _globals["_DATATYPE_DOUBLE"]._serialized_end = 2117
- _globals["_DATATYPE_STRING"]._serialized_start = 2119
- _globals["_DATATYPE_STRING"]._serialized_end = 2215
- _globals["_DATATYPE_BINARY"]._serialized_start = 2217
- _globals["_DATATYPE_BINARY"]._serialized_end = 2283
- _globals["_DATATYPE_NULL"]._serialized_start = 2285
- _globals["_DATATYPE_NULL"]._serialized_end = 2349
- _globals["_DATATYPE_TIMESTAMP"]._serialized_start = 2351
- _globals["_DATATYPE_TIMESTAMP"]._serialized_end = 2420
- _globals["_DATATYPE_DATE"]._serialized_start = 2422
- _globals["_DATATYPE_DATE"]._serialized_end = 2486
- _globals["_DATATYPE_TIMESTAMPNTZ"]._serialized_start = 2488
- _globals["_DATATYPE_TIMESTAMPNTZ"]._serialized_end = 2560
- _globals["_DATATYPE_TIME"]._serialized_start = 2562
- _globals["_DATATYPE_TIME"]._serialized_end = 2675
- _globals["_DATATYPE_CALENDARINTERVAL"]._serialized_start = 2677
- _globals["_DATATYPE_CALENDARINTERVAL"]._serialized_end = 2753
- _globals["_DATATYPE_YEARMONTHINTERVAL"]._serialized_start = 2756
- _globals["_DATATYPE_YEARMONTHINTERVAL"]._serialized_end = 2935
- _globals["_DATATYPE_DAYTIMEINTERVAL"]._serialized_start = 2938
- _globals["_DATATYPE_DAYTIMEINTERVAL"]._serialized_end = 3115
- _globals["_DATATYPE_CHAR"]._serialized_start = 3117
- _globals["_DATATYPE_CHAR"]._serialized_end = 3205
- _globals["_DATATYPE_VARCHAR"]._serialized_start = 3207
- _globals["_DATATYPE_VARCHAR"]._serialized_end = 3298
- _globals["_DATATYPE_DECIMAL"]._serialized_start = 3301
- _globals["_DATATYPE_DECIMAL"]._serialized_end = 3454
- _globals["_DATATYPE_STRUCTFIELD"]._serialized_start = 3457
- _globals["_DATATYPE_STRUCTFIELD"]._serialized_end = 3618
- _globals["_DATATYPE_STRUCT"]._serialized_start = 3620
- _globals["_DATATYPE_STRUCT"]._serialized_end = 3747
- _globals["_DATATYPE_ARRAY"]._serialized_start = 3750
- _globals["_DATATYPE_ARRAY"]._serialized_end = 3912
- _globals["_DATATYPE_MAP"]._serialized_start = 3915
- _globals["_DATATYPE_MAP"]._serialized_end = 4134
- _globals["_DATATYPE_VARIANT"]._serialized_start = 4136
- _globals["_DATATYPE_VARIANT"]._serialized_end = 4203
- _globals["_DATATYPE_UDT"]._serialized_start = 4206
- _globals["_DATATYPE_UDT"]._serialized_end = 4495
- _globals["_DATATYPE_UNPARSED"]._serialized_start = 4497
- _globals["_DATATYPE_UNPARSED"]._serialized_end = 4549
+ _globals["_DATATYPE"]._serialized_end = 4869
+ _globals["_DATATYPE_BOOLEAN"]._serialized_start = 1778
+ _globals["_DATATYPE_BOOLEAN"]._serialized_end = 1845
+ _globals["_DATATYPE_BYTE"]._serialized_start = 1847
+ _globals["_DATATYPE_BYTE"]._serialized_end = 1911
+ _globals["_DATATYPE_SHORT"]._serialized_start = 1913
+ _globals["_DATATYPE_SHORT"]._serialized_end = 1978
+ _globals["_DATATYPE_INTEGER"]._serialized_start = 1980
+ _globals["_DATATYPE_INTEGER"]._serialized_end = 2047
+ _globals["_DATATYPE_LONG"]._serialized_start = 2049
+ _globals["_DATATYPE_LONG"]._serialized_end = 2113
+ _globals["_DATATYPE_FLOAT"]._serialized_start = 2115
+ _globals["_DATATYPE_FLOAT"]._serialized_end = 2180
+ _globals["_DATATYPE_DOUBLE"]._serialized_start = 2182
+ _globals["_DATATYPE_DOUBLE"]._serialized_end = 2248
+ _globals["_DATATYPE_STRING"]._serialized_start = 2250
+ _globals["_DATATYPE_STRING"]._serialized_end = 2346
+ _globals["_DATATYPE_BINARY"]._serialized_start = 2348
+ _globals["_DATATYPE_BINARY"]._serialized_end = 2414
+ _globals["_DATATYPE_NULL"]._serialized_start = 2416
+ _globals["_DATATYPE_NULL"]._serialized_end = 2480
+ _globals["_DATATYPE_TIMESTAMP"]._serialized_start = 2482
+ _globals["_DATATYPE_TIMESTAMP"]._serialized_end = 2551
+ _globals["_DATATYPE_DATE"]._serialized_start = 2553
+ _globals["_DATATYPE_DATE"]._serialized_end = 2617
+ _globals["_DATATYPE_TIMESTAMPNTZ"]._serialized_start = 2619
+ _globals["_DATATYPE_TIMESTAMPNTZ"]._serialized_end = 2691
+ _globals["_DATATYPE_TIME"]._serialized_start = 2693
+ _globals["_DATATYPE_TIME"]._serialized_end = 2806
+ _globals["_DATATYPE_CALENDARINTERVAL"]._serialized_start = 2808
+ _globals["_DATATYPE_CALENDARINTERVAL"]._serialized_end = 2884
+ _globals["_DATATYPE_YEARMONTHINTERVAL"]._serialized_start = 2887
+ _globals["_DATATYPE_YEARMONTHINTERVAL"]._serialized_end = 3066
+ _globals["_DATATYPE_DAYTIMEINTERVAL"]._serialized_start = 3069
+ _globals["_DATATYPE_DAYTIMEINTERVAL"]._serialized_end = 3246
+ _globals["_DATATYPE_CHAR"]._serialized_start = 3248
+ _globals["_DATATYPE_CHAR"]._serialized_end = 3336
+ _globals["_DATATYPE_VARCHAR"]._serialized_start = 3338
+ _globals["_DATATYPE_VARCHAR"]._serialized_end = 3429
+ _globals["_DATATYPE_DECIMAL"]._serialized_start = 3432
+ _globals["_DATATYPE_DECIMAL"]._serialized_end = 3585
+ _globals["_DATATYPE_STRUCTFIELD"]._serialized_start = 3588
+ _globals["_DATATYPE_STRUCTFIELD"]._serialized_end = 3749
+ _globals["_DATATYPE_STRUCT"]._serialized_start = 3751
+ _globals["_DATATYPE_STRUCT"]._serialized_end = 3878
+ _globals["_DATATYPE_ARRAY"]._serialized_start = 3881
+ _globals["_DATATYPE_ARRAY"]._serialized_end = 4043
+ _globals["_DATATYPE_MAP"]._serialized_start = 4046
+ _globals["_DATATYPE_MAP"]._serialized_end = 4265
+ _globals["_DATATYPE_GEOMETRY"]._serialized_start = 4267
+ _globals["_DATATYPE_GEOMETRY"]._serialized_end = 4355
+ _globals["_DATATYPE_GEOGRAPHY"]._serialized_start = 4357
+ _globals["_DATATYPE_GEOGRAPHY"]._serialized_end = 4446
+ _globals["_DATATYPE_VARIANT"]._serialized_start = 4448
+ _globals["_DATATYPE_VARIANT"]._serialized_end = 4515
+ _globals["_DATATYPE_UDT"]._serialized_start = 4518
+ _globals["_DATATYPE_UDT"]._serialized_end = 4807
+ _globals["_DATATYPE_UNPARSED"]._serialized_start = 4809
+ _globals["_DATATYPE_UNPARSED"]._serialized_end = 4861
# @@protoc_insertion_point(module_scope)
diff --git a/python/pyspark/sql/connect/proto/types_pb2.pyi
b/python/pyspark/sql/connect/proto/types_pb2.pyi
index d46770c4f888..3f625890a809 100644
--- a/python/pyspark/sql/connect/proto/types_pb2.pyi
+++ b/python/pyspark/sql/connect/proto/types_pb2.pyi
@@ -674,6 +674,46 @@ class DataType(google.protobuf.message.Message):
],
) -> None: ...
+ class Geometry(google.protobuf.message.Message):
+ DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+ SRID_FIELD_NUMBER: builtins.int
+ TYPE_VARIATION_REFERENCE_FIELD_NUMBER: builtins.int
+ srid: builtins.int
+ type_variation_reference: builtins.int
+ def __init__(
+ self,
+ *,
+ srid: builtins.int = ...,
+ type_variation_reference: builtins.int = ...,
+ ) -> None: ...
+ def ClearField(
+ self,
+ field_name: typing_extensions.Literal[
+ "srid", b"srid", "type_variation_reference",
b"type_variation_reference"
+ ],
+ ) -> None: ...
+
+ class Geography(google.protobuf.message.Message):
+ DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+ SRID_FIELD_NUMBER: builtins.int
+ TYPE_VARIATION_REFERENCE_FIELD_NUMBER: builtins.int
+ srid: builtins.int
+ type_variation_reference: builtins.int
+ def __init__(
+ self,
+ *,
+ srid: builtins.int = ...,
+ type_variation_reference: builtins.int = ...,
+ ) -> None: ...
+ def ClearField(
+ self,
+ field_name: typing_extensions.Literal[
+ "srid", b"srid", "type_variation_reference",
b"type_variation_reference"
+ ],
+ ) -> None: ...
+
class Variant(google.protobuf.message.Message):
DESCRIPTOR: google.protobuf.descriptor.Descriptor
@@ -821,6 +861,8 @@ class DataType(google.protobuf.message.Message):
MAP_FIELD_NUMBER: builtins.int
VARIANT_FIELD_NUMBER: builtins.int
UDT_FIELD_NUMBER: builtins.int
+ GEOMETRY_FIELD_NUMBER: builtins.int
+ GEOGRAPHY_FIELD_NUMBER: builtins.int
UNPARSED_FIELD_NUMBER: builtins.int
TIME_FIELD_NUMBER: builtins.int
@property
@@ -878,6 +920,11 @@ class DataType(google.protobuf.message.Message):
def udt(self) -> global___DataType.UDT:
"""UserDefinedType"""
@property
+ def geometry(self) -> global___DataType.Geometry:
+ """Geospatial types"""
+ @property
+ def geography(self) -> global___DataType.Geography: ...
+ @property
def unparsed(self) -> global___DataType.Unparsed:
"""UnparsedDataType"""
@property
@@ -909,6 +956,8 @@ class DataType(google.protobuf.message.Message):
map: global___DataType.Map | None = ...,
variant: global___DataType.Variant | None = ...,
udt: global___DataType.UDT | None = ...,
+ geometry: global___DataType.Geometry | None = ...,
+ geography: global___DataType.Geography | None = ...,
unparsed: global___DataType.Unparsed | None = ...,
time: global___DataType.Time | None = ...,
) -> None: ...
@@ -937,6 +986,10 @@ class DataType(google.protobuf.message.Message):
b"double",
"float",
b"float",
+ "geography",
+ b"geography",
+ "geometry",
+ b"geometry",
"integer",
b"integer",
"kind",
@@ -996,6 +1049,10 @@ class DataType(google.protobuf.message.Message):
b"double",
"float",
b"float",
+ "geography",
+ b"geography",
+ "geometry",
+ b"geometry",
"integer",
b"integer",
"kind",
@@ -1058,6 +1115,8 @@ class DataType(google.protobuf.message.Message):
"map",
"variant",
"udt",
+ "geometry",
+ "geography",
"unparsed",
"time",
]
diff --git a/sql/connect/common/src/main/protobuf/spark/connect/types.proto
b/sql/connect/common/src/main/protobuf/spark/connect/types.proto
index 1800e3885774..caaa2340f95d 100644
--- a/sql/connect/common/src/main/protobuf/spark/connect/types.proto
+++ b/sql/connect/common/src/main/protobuf/spark/connect/types.proto
@@ -67,15 +67,17 @@ message DataType {
// UserDefinedType
UDT udt = 23;
+ // Geospatial types
+ Geometry geometry = 26;
+
+ Geography geography = 27;
+
// UnparsedDataType
Unparsed unparsed = 24;
Time time = 28;
}
- // Reserved for geometry and geography types
- reserved 26, 27;
-
message Boolean {
uint32 type_variation_reference = 1;
}
@@ -192,6 +194,16 @@ message DataType {
uint32 type_variation_reference = 4;
}
+ message Geometry {
+ int32 srid = 1;
+ uint32 type_variation_reference = 2;
+ }
+
+ message Geography {
+ int32 srid = 1;
+ uint32 type_variation_reference = 2;
+ }
+
message Variant {
uint32 type_variation_reference = 1;
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]