This is an automated email from the ASF dual-hosted git repository.
gurwls223 pushed a commit to branch branch-3.5
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-3.5 by this push:
new 0db5bdecfa6c [SPARK-48639][CONNECT][PYTHON] Add Origin to
Relation.RelationCommon
0db5bdecfa6c is described below
commit 0db5bdecfa6cbfff1be7690bb783a858989776b9
Author: Hyukjin Kwon <[email protected]>
AuthorDate: Mon Jun 24 19:25:56 2024 +0900
[SPARK-48639][CONNECT][PYTHON] Add Origin to Relation.RelationCommon
This PR proposes to add `Origin` (from
https://github.com/apache/spark/pull/46789) to `Relation.RelationCommon`
To have the common protobuf message to keep the source code info.
No. This is not used.
CI should validate protobuf definition, and exiting tests should pass.
No.
Closes #47024 from HyukjinKwon/SPARK-48639.
Lead-authored-by: Hyukjin Kwon <[email protected]>
Co-authored-by: Hyukjin Kwon <[email protected]>
Signed-off-by: Hyukjin Kwon <[email protected]>
(cherry picked from commit 09cb5921b7d88e63e172aa6aea370319e8bbd2fa)
Signed-off-by: Hyukjin Kwon <[email protected]>
---
.../main/protobuf/spark/connect/relations.proto | 4 +-
python/pyspark/sql/connect/proto/relations_pb2.py | 264 ++++++++++-----------
python/pyspark/sql/connect/proto/relations_pb2.pyi | 9 +-
3 files changed, 135 insertions(+), 142 deletions(-)
diff --git
a/connector/connect/common/src/main/protobuf/spark/connect/relations.proto
b/connector/connect/common/src/main/protobuf/spark/connect/relations.proto
index f7f1315ede0f..8b384728983b 100644
--- a/connector/connect/common/src/main/protobuf/spark/connect/relations.proto
+++ b/connector/connect/common/src/main/protobuf/spark/connect/relations.proto
@@ -103,8 +103,8 @@ message Unknown {}
// Common metadata of all relations.
message RelationCommon {
- // (Required) Shared relation metadata.
- string source_info = 1;
+ // (Optional) Shared relation metadata.
+ reserved 1;
// (Optional) A per-client globally unique id for a given connect plan.
optional int64 plan_id = 2;
diff --git a/python/pyspark/sql/connect/proto/relations_pb2.py
b/python/pyspark/sql/connect/proto/relations_pb2.py
index 3f7e57949373..7dd494db8695 100644
--- a/python/pyspark/sql/connect/proto/relations_pb2.py
+++ b/python/pyspark/sql/connect/proto/relations_pb2.py
@@ -35,7 +35,7 @@ from pyspark.sql.connect.proto import catalog_pb2 as
spark_dot_connect_dot_catal
DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(
-
b'\n\x1dspark/connect/relations.proto\x12\rspark.connect\x1a\x19google/protobuf/any.proto\x1a\x1fspark/connect/expressions.proto\x1a\x19spark/connect/types.proto\x1a\x1bspark/connect/catalog.proto"\xe1\x18\n\x08Relation\x12\x35\n\x06\x63ommon\x18\x01
\x01(\x0b\x32\x1d.spark.connect.RelationCommonR\x06\x63ommon\x12)\n\x04read\x18\x02
\x01(\x0b\x32\x13.spark.connect.ReadH\x00R\x04read\x12\x32\n\x07project\x18\x03
\x01(\x0b\x32\x16.spark.connect.ProjectH\x00R\x07project\x12/\n\x06\x66il [...]
+
b'\n\x1dspark/connect/relations.proto\x12\rspark.connect\x1a\x19google/protobuf/any.proto\x1a\x1fspark/connect/expressions.proto\x1a\x19spark/connect/types.proto\x1a\x1bspark/connect/catalog.proto"\xe1\x18\n\x08Relation\x12\x35\n\x06\x63ommon\x18\x01
\x01(\x0b\x32\x1d.spark.connect.RelationCommonR\x06\x63ommon\x12)\n\x04read\x18\x02
\x01(\x0b\x32\x13.spark.connect.ReadH\x00R\x04read\x12\x32\n\x07project\x18\x03
\x01(\x0b\x32\x16.spark.connect.ProjectH\x00R\x07project\x12/\n\x06\x66il [...]
)
_builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, globals())
@@ -61,135 +61,135 @@ if _descriptor._USE_C_DESCRIPTORS == False:
_UNKNOWN._serialized_start = 3336
_UNKNOWN._serialized_end = 3345
_RELATIONCOMMON._serialized_start = 3347
- _RELATIONCOMMON._serialized_end = 3438
- _SQL._serialized_start = 3441
- _SQL._serialized_end = 3672
- _SQL_ARGSENTRY._serialized_start = 3582
- _SQL_ARGSENTRY._serialized_end = 3672
- _READ._serialized_start = 3675
- _READ._serialized_end = 4338
- _READ_NAMEDTABLE._serialized_start = 3853
- _READ_NAMEDTABLE._serialized_end = 4045
- _READ_NAMEDTABLE_OPTIONSENTRY._serialized_start = 3987
- _READ_NAMEDTABLE_OPTIONSENTRY._serialized_end = 4045
- _READ_DATASOURCE._serialized_start = 4048
- _READ_DATASOURCE._serialized_end = 4325
- _READ_DATASOURCE_OPTIONSENTRY._serialized_start = 3987
- _READ_DATASOURCE_OPTIONSENTRY._serialized_end = 4045
- _PROJECT._serialized_start = 4340
- _PROJECT._serialized_end = 4457
- _FILTER._serialized_start = 4459
- _FILTER._serialized_end = 4571
- _JOIN._serialized_start = 4574
- _JOIN._serialized_end = 5235
- _JOIN_JOINDATATYPE._serialized_start = 4913
- _JOIN_JOINDATATYPE._serialized_end = 5005
- _JOIN_JOINTYPE._serialized_start = 5008
- _JOIN_JOINTYPE._serialized_end = 5216
- _SETOPERATION._serialized_start = 5238
- _SETOPERATION._serialized_end = 5717
- _SETOPERATION_SETOPTYPE._serialized_start = 5554
- _SETOPERATION_SETOPTYPE._serialized_end = 5668
- _LIMIT._serialized_start = 5719
- _LIMIT._serialized_end = 5795
- _OFFSET._serialized_start = 5797
- _OFFSET._serialized_end = 5876
- _TAIL._serialized_start = 5878
- _TAIL._serialized_end = 5953
- _AGGREGATE._serialized_start = 5956
- _AGGREGATE._serialized_end = 6538
- _AGGREGATE_PIVOT._serialized_start = 6295
- _AGGREGATE_PIVOT._serialized_end = 6406
- _AGGREGATE_GROUPTYPE._serialized_start = 6409
- _AGGREGATE_GROUPTYPE._serialized_end = 6538
- _SORT._serialized_start = 6541
- _SORT._serialized_end = 6701
- _DROP._serialized_start = 6704
- _DROP._serialized_end = 6845
- _DEDUPLICATE._serialized_start = 6848
- _DEDUPLICATE._serialized_end = 7088
- _LOCALRELATION._serialized_start = 7090
- _LOCALRELATION._serialized_end = 7179
- _CACHEDLOCALRELATION._serialized_start = 7181
- _CACHEDLOCALRELATION._serialized_end = 7253
- _CACHEDREMOTERELATION._serialized_start = 7255
- _CACHEDREMOTERELATION._serialized_end = 7310
- _SAMPLE._serialized_start = 7313
- _SAMPLE._serialized_end = 7586
- _RANGE._serialized_start = 7589
- _RANGE._serialized_end = 7734
- _SUBQUERYALIAS._serialized_start = 7736
- _SUBQUERYALIAS._serialized_end = 7850
- _REPARTITION._serialized_start = 7853
- _REPARTITION._serialized_end = 7995
- _SHOWSTRING._serialized_start = 7998
- _SHOWSTRING._serialized_end = 8140
- _HTMLSTRING._serialized_start = 8142
- _HTMLSTRING._serialized_end = 8256
- _STATSUMMARY._serialized_start = 8258
- _STATSUMMARY._serialized_end = 8350
- _STATDESCRIBE._serialized_start = 8352
- _STATDESCRIBE._serialized_end = 8433
- _STATCROSSTAB._serialized_start = 8435
- _STATCROSSTAB._serialized_end = 8536
- _STATCOV._serialized_start = 8538
- _STATCOV._serialized_end = 8634
- _STATCORR._serialized_start = 8637
- _STATCORR._serialized_end = 8774
- _STATAPPROXQUANTILE._serialized_start = 8777
- _STATAPPROXQUANTILE._serialized_end = 8941
- _STATFREQITEMS._serialized_start = 8943
- _STATFREQITEMS._serialized_end = 9068
- _STATSAMPLEBY._serialized_start = 9071
- _STATSAMPLEBY._serialized_end = 9380
- _STATSAMPLEBY_FRACTION._serialized_start = 9272
- _STATSAMPLEBY_FRACTION._serialized_end = 9371
- _NAFILL._serialized_start = 9383
- _NAFILL._serialized_end = 9517
- _NADROP._serialized_start = 9520
- _NADROP._serialized_end = 9654
- _NAREPLACE._serialized_start = 9657
- _NAREPLACE._serialized_end = 9953
- _NAREPLACE_REPLACEMENT._serialized_start = 9812
- _NAREPLACE_REPLACEMENT._serialized_end = 9953
- _TODF._serialized_start = 9955
- _TODF._serialized_end = 10043
- _WITHCOLUMNSRENAMED._serialized_start = 10046
- _WITHCOLUMNSRENAMED._serialized_end = 10285
- _WITHCOLUMNSRENAMED_RENAMECOLUMNSMAPENTRY._serialized_start = 10218
- _WITHCOLUMNSRENAMED_RENAMECOLUMNSMAPENTRY._serialized_end = 10285
- _WITHCOLUMNS._serialized_start = 10287
- _WITHCOLUMNS._serialized_end = 10406
- _WITHWATERMARK._serialized_start = 10409
- _WITHWATERMARK._serialized_end = 10543
- _HINT._serialized_start = 10546
- _HINT._serialized_end = 10678
- _UNPIVOT._serialized_start = 10681
- _UNPIVOT._serialized_end = 11008
- _UNPIVOT_VALUES._serialized_start = 10938
- _UNPIVOT_VALUES._serialized_end = 10997
- _TOSCHEMA._serialized_start = 11010
- _TOSCHEMA._serialized_end = 11116
- _REPARTITIONBYEXPRESSION._serialized_start = 11119
- _REPARTITIONBYEXPRESSION._serialized_end = 11322
- _MAPPARTITIONS._serialized_start = 11325
- _MAPPARTITIONS._serialized_end = 11506
- _GROUPMAP._serialized_start = 11509
- _GROUPMAP._serialized_end = 12144
- _COGROUPMAP._serialized_start = 12147
- _COGROUPMAP._serialized_end = 12673
- _APPLYINPANDASWITHSTATE._serialized_start = 12676
- _APPLYINPANDASWITHSTATE._serialized_end = 13033
- _COMMONINLINEUSERDEFINEDTABLEFUNCTION._serialized_start = 13036
- _COMMONINLINEUSERDEFINEDTABLEFUNCTION._serialized_end = 13280
- _PYTHONUDTF._serialized_start = 13283
- _PYTHONUDTF._serialized_end = 13460
- _COLLECTMETRICS._serialized_start = 13463
- _COLLECTMETRICS._serialized_end = 13599
- _PARSE._serialized_start = 13602
- _PARSE._serialized_end = 13990
- _PARSE_OPTIONSENTRY._serialized_start = 3987
- _PARSE_OPTIONSENTRY._serialized_end = 4045
- _PARSE_PARSEFORMAT._serialized_start = 13891
- _PARSE_PARSEFORMAT._serialized_end = 13979
+ _RELATIONCOMMON._serialized_end = 3411
+ _SQL._serialized_start = 3414
+ _SQL._serialized_end = 3645
+ _SQL_ARGSENTRY._serialized_start = 3555
+ _SQL_ARGSENTRY._serialized_end = 3645
+ _READ._serialized_start = 3648
+ _READ._serialized_end = 4311
+ _READ_NAMEDTABLE._serialized_start = 3826
+ _READ_NAMEDTABLE._serialized_end = 4018
+ _READ_NAMEDTABLE_OPTIONSENTRY._serialized_start = 3960
+ _READ_NAMEDTABLE_OPTIONSENTRY._serialized_end = 4018
+ _READ_DATASOURCE._serialized_start = 4021
+ _READ_DATASOURCE._serialized_end = 4298
+ _READ_DATASOURCE_OPTIONSENTRY._serialized_start = 3960
+ _READ_DATASOURCE_OPTIONSENTRY._serialized_end = 4018
+ _PROJECT._serialized_start = 4313
+ _PROJECT._serialized_end = 4430
+ _FILTER._serialized_start = 4432
+ _FILTER._serialized_end = 4544
+ _JOIN._serialized_start = 4547
+ _JOIN._serialized_end = 5208
+ _JOIN_JOINDATATYPE._serialized_start = 4886
+ _JOIN_JOINDATATYPE._serialized_end = 4978
+ _JOIN_JOINTYPE._serialized_start = 4981
+ _JOIN_JOINTYPE._serialized_end = 5189
+ _SETOPERATION._serialized_start = 5211
+ _SETOPERATION._serialized_end = 5690
+ _SETOPERATION_SETOPTYPE._serialized_start = 5527
+ _SETOPERATION_SETOPTYPE._serialized_end = 5641
+ _LIMIT._serialized_start = 5692
+ _LIMIT._serialized_end = 5768
+ _OFFSET._serialized_start = 5770
+ _OFFSET._serialized_end = 5849
+ _TAIL._serialized_start = 5851
+ _TAIL._serialized_end = 5926
+ _AGGREGATE._serialized_start = 5929
+ _AGGREGATE._serialized_end = 6511
+ _AGGREGATE_PIVOT._serialized_start = 6268
+ _AGGREGATE_PIVOT._serialized_end = 6379
+ _AGGREGATE_GROUPTYPE._serialized_start = 6382
+ _AGGREGATE_GROUPTYPE._serialized_end = 6511
+ _SORT._serialized_start = 6514
+ _SORT._serialized_end = 6674
+ _DROP._serialized_start = 6677
+ _DROP._serialized_end = 6818
+ _DEDUPLICATE._serialized_start = 6821
+ _DEDUPLICATE._serialized_end = 7061
+ _LOCALRELATION._serialized_start = 7063
+ _LOCALRELATION._serialized_end = 7152
+ _CACHEDLOCALRELATION._serialized_start = 7154
+ _CACHEDLOCALRELATION._serialized_end = 7226
+ _CACHEDREMOTERELATION._serialized_start = 7228
+ _CACHEDREMOTERELATION._serialized_end = 7283
+ _SAMPLE._serialized_start = 7286
+ _SAMPLE._serialized_end = 7559
+ _RANGE._serialized_start = 7562
+ _RANGE._serialized_end = 7707
+ _SUBQUERYALIAS._serialized_start = 7709
+ _SUBQUERYALIAS._serialized_end = 7823
+ _REPARTITION._serialized_start = 7826
+ _REPARTITION._serialized_end = 7968
+ _SHOWSTRING._serialized_start = 7971
+ _SHOWSTRING._serialized_end = 8113
+ _HTMLSTRING._serialized_start = 8115
+ _HTMLSTRING._serialized_end = 8229
+ _STATSUMMARY._serialized_start = 8231
+ _STATSUMMARY._serialized_end = 8323
+ _STATDESCRIBE._serialized_start = 8325
+ _STATDESCRIBE._serialized_end = 8406
+ _STATCROSSTAB._serialized_start = 8408
+ _STATCROSSTAB._serialized_end = 8509
+ _STATCOV._serialized_start = 8511
+ _STATCOV._serialized_end = 8607
+ _STATCORR._serialized_start = 8610
+ _STATCORR._serialized_end = 8747
+ _STATAPPROXQUANTILE._serialized_start = 8750
+ _STATAPPROXQUANTILE._serialized_end = 8914
+ _STATFREQITEMS._serialized_start = 8916
+ _STATFREQITEMS._serialized_end = 9041
+ _STATSAMPLEBY._serialized_start = 9044
+ _STATSAMPLEBY._serialized_end = 9353
+ _STATSAMPLEBY_FRACTION._serialized_start = 9245
+ _STATSAMPLEBY_FRACTION._serialized_end = 9344
+ _NAFILL._serialized_start = 9356
+ _NAFILL._serialized_end = 9490
+ _NADROP._serialized_start = 9493
+ _NADROP._serialized_end = 9627
+ _NAREPLACE._serialized_start = 9630
+ _NAREPLACE._serialized_end = 9926
+ _NAREPLACE_REPLACEMENT._serialized_start = 9785
+ _NAREPLACE_REPLACEMENT._serialized_end = 9926
+ _TODF._serialized_start = 9928
+ _TODF._serialized_end = 10016
+ _WITHCOLUMNSRENAMED._serialized_start = 10019
+ _WITHCOLUMNSRENAMED._serialized_end = 10258
+ _WITHCOLUMNSRENAMED_RENAMECOLUMNSMAPENTRY._serialized_start = 10191
+ _WITHCOLUMNSRENAMED_RENAMECOLUMNSMAPENTRY._serialized_end = 10258
+ _WITHCOLUMNS._serialized_start = 10260
+ _WITHCOLUMNS._serialized_end = 10379
+ _WITHWATERMARK._serialized_start = 10382
+ _WITHWATERMARK._serialized_end = 10516
+ _HINT._serialized_start = 10519
+ _HINT._serialized_end = 10651
+ _UNPIVOT._serialized_start = 10654
+ _UNPIVOT._serialized_end = 10981
+ _UNPIVOT_VALUES._serialized_start = 10911
+ _UNPIVOT_VALUES._serialized_end = 10970
+ _TOSCHEMA._serialized_start = 10983
+ _TOSCHEMA._serialized_end = 11089
+ _REPARTITIONBYEXPRESSION._serialized_start = 11092
+ _REPARTITIONBYEXPRESSION._serialized_end = 11295
+ _MAPPARTITIONS._serialized_start = 11298
+ _MAPPARTITIONS._serialized_end = 11479
+ _GROUPMAP._serialized_start = 11482
+ _GROUPMAP._serialized_end = 12117
+ _COGROUPMAP._serialized_start = 12120
+ _COGROUPMAP._serialized_end = 12646
+ _APPLYINPANDASWITHSTATE._serialized_start = 12649
+ _APPLYINPANDASWITHSTATE._serialized_end = 13006
+ _COMMONINLINEUSERDEFINEDTABLEFUNCTION._serialized_start = 13009
+ _COMMONINLINEUSERDEFINEDTABLEFUNCTION._serialized_end = 13253
+ _PYTHONUDTF._serialized_start = 13256
+ _PYTHONUDTF._serialized_end = 13433
+ _COLLECTMETRICS._serialized_start = 13436
+ _COLLECTMETRICS._serialized_end = 13572
+ _PARSE._serialized_start = 13575
+ _PARSE._serialized_end = 13963
+ _PARSE_OPTIONSENTRY._serialized_start = 3960
+ _PARSE_OPTIONSENTRY._serialized_end = 4018
+ _PARSE_PARSEFORMAT._serialized_start = 13864
+ _PARSE_PARSEFORMAT._serialized_end = 13952
# @@protoc_insertion_point(module_scope)
diff --git a/python/pyspark/sql/connect/proto/relations_pb2.pyi
b/python/pyspark/sql/connect/proto/relations_pb2.pyi
index 007b92ef5f42..1c0036afbc43 100644
--- a/python/pyspark/sql/connect/proto/relations_pb2.pyi
+++ b/python/pyspark/sql/connect/proto/relations_pb2.pyi
@@ -579,26 +579,19 @@ class RelationCommon(google.protobuf.message.Message):
DESCRIPTOR: google.protobuf.descriptor.Descriptor
- SOURCE_INFO_FIELD_NUMBER: builtins.int
PLAN_ID_FIELD_NUMBER: builtins.int
- source_info: builtins.str
- """(Required) Shared relation metadata."""
plan_id: builtins.int
"""(Optional) A per-client globally unique id for a given connect plan."""
def __init__(
self,
*,
- source_info: builtins.str = ...,
plan_id: builtins.int | None = ...,
) -> None: ...
def HasField(
self, field_name: typing_extensions.Literal["_plan_id", b"_plan_id",
"plan_id", b"plan_id"]
) -> builtins.bool: ...
def ClearField(
- self,
- field_name: typing_extensions.Literal[
- "_plan_id", b"_plan_id", "plan_id", b"plan_id", "source_info",
b"source_info"
- ],
+ self, field_name: typing_extensions.Literal["_plan_id", b"_plan_id",
"plan_id", b"plan_id"]
) -> None: ...
def WhichOneof(
self, oneof_group: typing_extensions.Literal["_plan_id", b"_plan_id"]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]