HyukjinKwon commented on code in PR #38603:
URL: https://github.com/apache/spark/pull/38603#discussion_r1019792212
##########
python/pyspark/sql/protobuf/functions.py:
##########
@@ -95,22 +98,40 @@ def from_protobuf(
+------------------+
|{2, Alice, 109200}|
+------------------+
+ >>> data = [([(1668035962, 2020)])]
+ >>> ddl_schema = "value struct<seconds: LONG, nanos: INT>"
+ >>> df = spark.createDataFrame(data, ddl_schema)
+ >>> message_class_name =
"org.sparkproject.spark-protobuf.protobuf.Timestamp"
+ >>> to_proto_df = df.select(to_protobuf(df.value,
message_class_name).alias("value"))
+ >>> from_proto_df = to_proto_df.select(
+ ... from_protobuf(to_proto_df.value,
message_class_name).alias("value"))
+ >>> from_proto_df.show(truncate=False)
+ +------------------+
+ |value |
+ +------------------+
+ |{1668035962, 2020}|
+ +------------------+
"""
sc = SparkContext._active_spark_context
assert sc is not None and sc._jvm is not None
try:
- jc = sc._jvm.org.apache.spark.sql.protobuf.functions.from_protobuf(
- _to_java_column(data), messageName, descFilePath, options or {}
- )
+ if descFilePath is not None:
+ jc = sc._jvm.org.apache.spark.sql.protobuf.functions.from_protobuf(
+ _to_java_column(data), messageName, descFilePath, options or {}
+ )
+ else:
+ jc = sc._jvm.org.apache.spark.sql.protobuf.functions.from_protobuf(
+ _to_java_column(data), messageName
+ )
except TypeError as e:
if str(e) == "'JavaPackage' object is not callable":
_print_missing_jar("Protobuf", "protobuf", "protobuf", sc.version)
raise
return Column(jc)
-def to_protobuf(data: "ColumnOrName", messageName: str, descFilePath: str) ->
Column:
+def to_protobuf(data: "ColumnOrName", messageName: str, descFilePath = None)
-> Column:
Review Comment:
```suggestion
def to_protobuf(data: "ColumnOrName", messageName: str, descFilePath:
Optional[str] = None) -> Column:
```
##########
python/pyspark/sql/protobuf/functions.py:
##########
@@ -121,7 +142,10 @@ def to_protobuf(data: "ColumnOrName", messageName: str,
descFilePath: str) -> Co
data : :class:`~pyspark.sql.Column` or str
the data column.
messageName: str
Review Comment:
```suggestion
messageName: str, optional
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]