This is an automated email from the ASF dual-hosted git repository.
gurwls223 pushed a commit to branch branch-4.0
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-4.0 by this push:
new ad83f71d78d2 [SPARK-50893][CONNECT] Mark UDT.DataType optional
ad83f71d78d2 is described below
commit ad83f71d78d25fd1b579b58f4c1605fb1399b22e
Author: Ruifeng Zheng <[email protected]>
AuthorDate: Tue Jan 21 08:48:55 2025 +0900
[SPARK-50893][CONNECT] Mark UDT.DataType optional
Mark UDT.DataType optional
this field is actually not required for Scala/Java UDT, e.g. the `VectorUDT`
No
existing protobuf breaking change test
No
Closes #49574 from zhengruifeng/connect_udt_sql_type.
Authored-by: Ruifeng Zheng <[email protected]>
Signed-off-by: Hyukjin Kwon <[email protected]>
(cherry picked from commit efeb1e01ce42e4c626522ab9dbecc0240f9ce507)
Signed-off-by: Hyukjin Kwon <[email protected]>
---
python/pyspark/sql/connect/proto/types_pb2.py | 10 +++++-----
python/pyspark/sql/connect/proto/types_pb2.pyi | 14 +++++++++++++-
.../common/src/main/protobuf/spark/connect/types.proto | 6 +++++-
3 files changed, 23 insertions(+), 7 deletions(-)
diff --git a/python/pyspark/sql/connect/proto/types_pb2.py
b/python/pyspark/sql/connect/proto/types_pb2.py
index 55f98717a5b0..2e0ef7048466 100644
--- a/python/pyspark/sql/connect/proto/types_pb2.py
+++ b/python/pyspark/sql/connect/proto/types_pb2.py
@@ -35,7 +35,7 @@ _sym_db = _symbol_database.Default()
DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(
-
b"\n\x19spark/connect/types.proto\x12\rspark.connect\"\xe7!\n\x08\x44\x61taType\x12\x32\n\x04null\x18\x01
\x01(\x0b\x32\x1c.spark.connect.DataType.NULLH\x00R\x04null\x12\x38\n\x06\x62inary\x18\x02
\x01(\x0b\x32\x1e.spark.connect.DataType.BinaryH\x00R\x06\x62inary\x12;\n\x07\x62oolean\x18\x03
\x01(\x0b\x32\x1f.spark.connect.DataType.BooleanH\x00R\x07\x62oolean\x12\x32\n\x04\x62yte\x18\x04
\x01(\x0b\x32\x1c.spark.connect.DataType.ByteH\x00R\x04\x62yte\x12\x35\n\x05short\x18\x05
\x01(\x [...]
+
b"\n\x19spark/connect/types.proto\x12\rspark.connect\"\xf9!\n\x08\x44\x61taType\x12\x32\n\x04null\x18\x01
\x01(\x0b\x32\x1c.spark.connect.DataType.NULLH\x00R\x04null\x12\x38\n\x06\x62inary\x18\x02
\x01(\x0b\x32\x1e.spark.connect.DataType.BinaryH\x00R\x06\x62inary\x12;\n\x07\x62oolean\x18\x03
\x01(\x0b\x32\x1f.spark.connect.DataType.BooleanH\x00R\x07\x62oolean\x12\x32\n\x04\x62yte\x18\x04
\x01(\x0b\x32\x1c.spark.connect.DataType.ByteH\x00R\x04\x62yte\x12\x35\n\x05short\x18\x05
\x01(\x [...]
)
_globals = globals()
@@ -47,7 +47,7 @@ if not _descriptor._USE_C_DESCRIPTORS:
"DESCRIPTOR"
]._serialized_options =
b"\n\036org.apache.spark.connect.protoP\001Z\022internal/generated"
_globals["_DATATYPE"]._serialized_start = 45
- _globals["_DATATYPE"]._serialized_end = 4372
+ _globals["_DATATYPE"]._serialized_end = 4390
_globals["_DATATYPE_BOOLEAN"]._serialized_start = 1595
_globals["_DATATYPE_BOOLEAN"]._serialized_end = 1662
_globals["_DATATYPE_BYTE"]._serialized_start = 1664
@@ -97,7 +97,7 @@ if not _descriptor._USE_C_DESCRIPTORS:
_globals["_DATATYPE_VARIANT"]._serialized_start = 3969
_globals["_DATATYPE_VARIANT"]._serialized_end = 4036
_globals["_DATATYPE_UDT"]._serialized_start = 4039
- _globals["_DATATYPE_UDT"]._serialized_end = 4310
- _globals["_DATATYPE_UNPARSED"]._serialized_start = 4312
- _globals["_DATATYPE_UNPARSED"]._serialized_end = 4364
+ _globals["_DATATYPE_UDT"]._serialized_end = 4328
+ _globals["_DATATYPE_UNPARSED"]._serialized_start = 4330
+ _globals["_DATATYPE_UNPARSED"]._serialized_end = 4382
# @@protoc_insertion_point(module_scope)
diff --git a/python/pyspark/sql/connect/proto/types_pb2.pyi
b/python/pyspark/sql/connect/proto/types_pb2.pyi
index b37621104537..fcf35b8c1f19 100644
--- a/python/pyspark/sql/connect/proto/types_pb2.pyi
+++ b/python/pyspark/sql/connect/proto/types_pb2.pyi
@@ -667,10 +667,14 @@ class DataType(google.protobuf.message.Message):
SQL_TYPE_FIELD_NUMBER: builtins.int
type: builtins.str
jvm_class: builtins.str
+ """Required for Scala/Java UDT"""
python_class: builtins.str
+ """Required for Python UDT"""
serialized_python_class: builtins.str
+ """Required for Python UDT"""
@property
- def sql_type(self) -> global___DataType: ...
+ def sql_type(self) -> global___DataType:
+ """Required for Python UDT"""
def __init__(
self,
*,
@@ -689,6 +693,8 @@ class DataType(google.protobuf.message.Message):
b"_python_class",
"_serialized_python_class",
b"_serialized_python_class",
+ "_sql_type",
+ b"_sql_type",
"jvm_class",
b"jvm_class",
"python_class",
@@ -708,6 +714,8 @@ class DataType(google.protobuf.message.Message):
b"_python_class",
"_serialized_python_class",
b"_serialized_python_class",
+ "_sql_type",
+ b"_sql_type",
"jvm_class",
b"jvm_class",
"python_class",
@@ -735,6 +743,10 @@ class DataType(google.protobuf.message.Message):
"_serialized_python_class", b"_serialized_python_class"
],
) -> typing_extensions.Literal["serialized_python_class"] | None: ...
+ @typing.overload
+ def WhichOneof(
+ self, oneof_group: typing_extensions.Literal["_sql_type",
b"_sql_type"]
+ ) -> typing_extensions.Literal["sql_type"] | None: ...
class Unparsed(google.protobuf.message.Message):
DESCRIPTOR: google.protobuf.descriptor.Descriptor
diff --git a/sql/connect/common/src/main/protobuf/spark/connect/types.proto
b/sql/connect/common/src/main/protobuf/spark/connect/types.proto
index 4f768f201575..e1a111e5d691 100644
--- a/sql/connect/common/src/main/protobuf/spark/connect/types.proto
+++ b/sql/connect/common/src/main/protobuf/spark/connect/types.proto
@@ -188,10 +188,14 @@ message DataType {
message UDT {
string type = 1;
+ // Required for Scala/Java UDT
optional string jvm_class = 2;
+ // Required for Python UDT
optional string python_class = 3;
+ // Required for Python UDT
optional string serialized_python_class = 4;
- DataType sql_type = 5;
+ // Required for Python UDT
+ optional DataType sql_type = 5;
}
message Unparsed {
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]