(spark) branch master updated: [SPARK-49286][CONNECT][SQL] Move Avro/Protobuf functions to sql/api

hvanhovell Thu, 26 Sep 2024 11:33:14 -0700

This is an automated email from the ASF dual-hosted git repository.

hvanhovell pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git



The following commit(s) were added to refs/heads/master by this push:
     new 7b20e5841a85 [SPARK-49286][CONNECT][SQL] Move Avro/Protobuf functions 
to sql/api
7b20e5841a85 is described below

commit 7b20e5841a856cd0d81821e330b3ec33098bb9be
Author: Herman van Hovell <[email protected]>
AuthorDate: Thu Sep 26 09:28:16 2024 -0400

    [SPARK-49286][CONNECT][SQL] Move Avro/Protobuf functions to sql/api
    
    ### What changes were proposed in this pull request?
    This PR moves avro and protobuf functions to sql/api.
    
    ### Why are the changes needed?
    We are creating a unified Scala SQL interface.
    
    ### Does this PR introduce _any_ user-facing change?
    No.
    
    ### How was this patch tested?
    Existing tests.
    
    ### Was this patch authored or co-authored using generative AI tooling?
    No.
    
    Closes #48258 from hvanhovell/SPARK-49286.
    
    Authored-by: Herman van Hovell <[email protected]>
    Signed-off-by: Herman van Hovell <[email protected]>
---
 .../org/apache/spark/sql/avro/functions.scala      |  93 ------
 .../org/apache/spark/sql/protobuf/functions.scala  | 324 ---------------------
 project/MimaExcludes.scala                         |   6 +
 .../org/apache/spark/sql/avro/functions.scala      |   8 +-
 .../org/apache/spark/sql/protobuf/functions.scala  |  82 +++---
 5 files changed, 50 insertions(+), 463 deletions(-)

diff --git 
a/connector/avro/src/main/scala/org/apache/spark/sql/avro/functions.scala 
b/connector/avro/src/main/scala/org/apache/spark/sql/avro/functions.scala
deleted file mode 100755
index 828a609a10e9..000000000000
--- a/connector/avro/src/main/scala/org/apache/spark/sql/avro/functions.scala
+++ /dev/null
@@ -1,93 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.avro
-
-import scala.jdk.CollectionConverters._
-
-import org.apache.spark.annotation.Experimental
-import org.apache.spark.sql.Column
-import org.apache.spark.sql.internal.ExpressionUtils.{column, expression}
-
-
-// scalastyle:off: object.name
-object functions {
-// scalastyle:on: object.name
-
-  /**
-   * Converts a binary column of avro format into its corresponding catalyst 
value. The specified
-   * schema must match the read data, otherwise the behavior is undefined: it 
may fail or return
-   * arbitrary result.
-   *
-   * @param data the binary column.
-   * @param jsonFormatSchema the avro schema in JSON string format.
-   *
-   * @since 3.0.0
-   */
-  @Experimental
-  def from_avro(
-      data: Column,
-      jsonFormatSchema: String): Column = {
-    AvroDataToCatalyst(data, jsonFormatSchema, Map.empty)
-  }
-
-  /**
-   * Converts a binary column of Avro format into its corresponding catalyst 
value.
-   * The specified schema must match actual schema of the read data, otherwise 
the behavior
-   * is undefined: it may fail or return arbitrary result.
-   * To deserialize the data with a compatible and evolved schema, the 
expected Avro schema can be
-   * set via the option avroSchema.
-   *
-   * @param data the binary column.
-   * @param jsonFormatSchema the avro schema in JSON string format.
-   * @param options options to control how the Avro record is parsed.
-   *
-   * @since 3.0.0
-   */
-  @Experimental
-  def from_avro(
-      data: Column,
-      jsonFormatSchema: String,
-      options: java.util.Map[String, String]): Column = {
-    AvroDataToCatalyst(data, jsonFormatSchema, options.asScala.toMap)
-  }
-
-  /**
-   * Converts a column into binary of avro format.
-   *
-   * @param data the data column.
-   *
-   * @since 3.0.0
-   */
-  @Experimental
-  def to_avro(data: Column): Column = {
-    CatalystDataToAvro(data, None)
-  }
-
-  /**
-   * Converts a column into binary of avro format.
-   *
-   * @param data the data column.
-   * @param jsonFormatSchema user-specified output avro schema in JSON string 
format.
-   *
-   * @since 3.0.0
-   */
-  @Experimental
-  def to_avro(data: Column, jsonFormatSchema: String): Column = {
-    CatalystDataToAvro(data, Some(jsonFormatSchema))
-  }
-}
diff --git 
a/connector/protobuf/src/main/scala/org/apache/spark/sql/protobuf/functions.scala
 
b/connector/protobuf/src/main/scala/org/apache/spark/sql/protobuf/functions.scala
deleted file mode 100644
index 3b0def8fc73f..000000000000
--- 
a/connector/protobuf/src/main/scala/org/apache/spark/sql/protobuf/functions.scala
+++ /dev/null
@@ -1,324 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.spark.sql.protobuf
-
-import scala.jdk.CollectionConverters._
-
-import org.apache.spark.annotation.Experimental
-import org.apache.spark.sql.Column
-import org.apache.spark.sql.functions.lit
-import org.apache.spark.sql.protobuf.utils.ProtobufUtils
-
-// scalastyle:off: object.name
-object functions {
-// scalastyle:on: object.name
-
-  /**
-   * Converts a binary column of Protobuf format into its corresponding 
catalyst value. The
-   * Protobuf definition is provided through Protobuf <i>descriptor file</i>.
-   *
-   * @param data
-   *   the binary column.
-   * @param messageName
-   *   the protobuf message name to look for in descriptor file.
-   * @param descFilePath
-   *   The Protobuf descriptor file. This file is usually created using 
`protoc` with
-   *   `--descriptor_set_out` and `--include_imports` options.
-   * @param options
-   * @since 3.4.0
-   */
-  @Experimental
-  def from_protobuf(
-      data: Column,
-      messageName: String,
-      descFilePath: String,
-      options: java.util.Map[String, String]): Column = {
-    val descriptorFileContent = 
ProtobufUtils.readDescriptorFileContent(descFilePath)
-    from_protobuf(data, messageName, descriptorFileContent, options)
-  }
-
-  /**
-   * Converts a binary column of Protobuf format into its corresponding 
catalyst value.The
-   * Protobuf definition is provided through Protobuf `FileDescriptorSet`.
-   *
-   * @param data
-   *   the binary column.
-   * @param messageName
-   *   the protobuf MessageName to look for in the descriptor set.
-   * @param binaryFileDescriptorSet
-   *   Serialized Protobuf descriptor (`FileDescriptorSet`). Typically 
contents of file created
-   *   using `protoc` with `--descriptor_set_out` and `--include_imports` 
options.
-   *   @param options
-   * @since 3.5.0
-   */
-  @Experimental
-  def from_protobuf(
-      data: Column,
-      messageName: String,
-      binaryFileDescriptorSet: Array[Byte],
-      options: java.util.Map[String, String]): Column = {
-    Column.fnWithOptions(
-      "from_protobuf",
-      options.asScala.iterator,
-      data,
-      lit(messageName),
-      lit(binaryFileDescriptorSet)
-    )
-  }
-
-  /**
-   * Converts a binary column of Protobuf format into its corresponding 
catalyst value. The
-   * Protobuf definition is provided through Protobuf <i>descriptor file</i>.
-   *
-   * @param data
-   *   the binary column.
-   * @param messageName
-   *   the protobuf MessageName to look for in descriptor file.
-   * @param descFilePath
-   *   The Protobuf descriptor file. This file is usually created using 
`protoc` with
-   *   `--descriptor_set_out` and `--include_imports` options.
-   * @since 3.4.0
-   */
-  @Experimental
-  def from_protobuf(data: Column, messageName: String, descFilePath: String): 
Column = {
-    val fileContent = ProtobufUtils.readDescriptorFileContent(descFilePath)
-    from_protobuf(data, messageName, fileContent)
-  }
-
-  /**
-   * Converts a binary column of Protobuf format into its corresponding 
catalyst value.The
-   * Protobuf definition is provided through Protobuf `FileDescriptorSet`.
-   *
-   * @param data
-   *   the binary column.
-   * @param messageName
-   *   the protobuf MessageName to look for in the descriptor set.
-   * @param binaryFileDescriptorSet
-   *   Serialized Protobuf descriptor (`FileDescriptorSet`). Typically 
contents of file created
-   *   using `protoc` with `--descriptor_set_out` and `--include_imports` 
options.
-   * @since 3.5.0
-   */
-  @Experimental
-  def from_protobuf(data: Column, messageName: String, 
binaryFileDescriptorSet: Array[Byte])
-  : Column = {
-    Column.fn(
-      "from_protobuf",
-      data,
-      lit(messageName),
-      lit(binaryFileDescriptorSet)
-    )
-  }
-
-  /**
-   * Converts a binary column of Protobuf format into its corresponding 
catalyst value.
-   * `messageClassName` points to Protobuf Java class. The jar containing Java 
class should be
-   * shaded. Specifically, `com.google.protobuf.*` should be shaded to
-   * `org.sparkproject.spark_protobuf.protobuf.*`.
-   * https://github.com/rangadi/shaded-protobuf-classes is useful to create 
shaded jar from
-   * Protobuf files.
-   *
-   * @param data
-   *   the binary column.
-   * @param messageClassName
-   *   The full name for Protobuf Java class. E.g. 
<code>com.example.protos.ExampleEvent</code>.
-   *   The jar with these classes needs to be shaded as described above.
-   * @since 3.4.0
-   */
-  @Experimental
-  def from_protobuf(data: Column, messageClassName: String): Column = {
-    Column.fn(
-      "from_protobuf",
-      data,
-      lit(messageClassName)
-    )
-  }
-
-  /**
-   * Converts a binary column of Protobuf format into its corresponding 
catalyst value.
-   * `messageClassName` points to Protobuf Java class. The jar containing Java 
class should be
-   * shaded. Specifically, `com.google.protobuf.*` should be shaded to
-   * `org.sparkproject.spark_protobuf.protobuf.*`.
-   * https://github.com/rangadi/shaded-protobuf-classes is useful to create 
shaded jar from
-   * Protobuf files.
-   *
-   * @param data
-   *   the binary column.
-   * @param messageClassName
-   *   The full name for Protobuf Java class. E.g. 
<code>com.example.protos.ExampleEvent</code>.
-   *   The jar with these classes needs to be shaded as described above.
-   * @param options
-   * @since 3.4.0
-   */
-  @Experimental
-  def from_protobuf(
-    data: Column,
-    messageClassName: String,
-    options: java.util.Map[String, String]): Column = {
-    Column.fnWithOptions(
-      "from_protobuf",
-      options.asScala.iterator,
-      data,
-      lit(messageClassName)
-    )
-  }
-
-  /**
-   * Converts a column into binary of protobuf format. The Protobuf definition 
is provided
-   * through Protobuf <i>descriptor file</i>.
-   *
-   * @param data
-   *   the data column.
-   * @param messageName
-   *   the protobuf MessageName to look for in descriptor file.
-   * @param descFilePath
-   *   The Protobuf descriptor file. This file is usually created using 
`protoc` with
-   *   `--descriptor_set_out` and `--include_imports` options.
-   * @since 3.4.0
-   */
-  @Experimental
-  def to_protobuf(data: Column, messageName: String, descFilePath: String): 
Column = {
-    to_protobuf(data, messageName, descFilePath, Map.empty[String, 
String].asJava)
-  }
-
-  /**
-   * Converts a column into binary of protobuf format.The Protobuf definition 
is provided
-   * through Protobuf `FileDescriptorSet`.
-   *
-   * @param data
-   *   the binary column.
-   * @param messageName
-   *   the protobuf MessageName to look for in the descriptor set.
-   * @param binaryFileDescriptorSet
-   *   Serialized Protobuf descriptor (`FileDescriptorSet`). Typically 
contents of file created
-   *   using `protoc` with `--descriptor_set_out` and `--include_imports` 
options.
-   *
-   * @since 3.5.0
-   */
-  @Experimental
-  def to_protobuf(data: Column, messageName: String, binaryFileDescriptorSet: 
Array[Byte])
-  : Column = {
-    Column.fn(
-      "to_protobuf",
-      data,
-      lit(messageName),
-      lit(binaryFileDescriptorSet)
-    )
-  }
-  /**
-   * Converts a column into binary of protobuf format. The Protobuf definition 
is provided
-   * through Protobuf <i>descriptor file</i>.
-   *
-   * @param data
-   *   the data column.
-   * @param messageName
-   *   the protobuf MessageName to look for in descriptor file.
-   * @param descFilePath
-   *   the protobuf descriptor file.
-   * @param options
-   * @since 3.4.0
-   */
-  @Experimental
-  def to_protobuf(
-    data: Column,
-    messageName: String,
-    descFilePath: String,
-    options: java.util.Map[String, String]): Column = {
-    val fileContent = ProtobufUtils.readDescriptorFileContent(descFilePath)
-    to_protobuf(data, messageName, fileContent, options)
-  }
-
-  /**
-   * Converts a column into binary of protobuf format.The Protobuf definition 
is provided
-   * through Protobuf `FileDescriptorSet`.
-   *
-   * @param data
-   *   the binary column.
-   * @param messageName
-   *   the protobuf MessageName to look for in the descriptor set.
-   * @param binaryFileDescriptorSet
-   *   Serialized Protobuf descriptor (`FileDescriptorSet`). Typically 
contents of file created
-   *   using `protoc` with `--descriptor_set_out` and `--include_imports` 
options.
-   * @param options
-   * @since 3.5.0
-   */
-  @Experimental
-  def to_protobuf(
-    data: Column,
-    messageName: String,
-    binaryFileDescriptorSet: Array[Byte],
-    options: java.util.Map[String, String]
-  ): Column = {
-    Column.fnWithOptions(
-      "to_protobuf",
-      options.asScala.iterator,
-      data,
-      lit(messageName),
-      lit(binaryFileDescriptorSet)
-    )
-  }
-
-  /**
-   * Converts a column into binary of protobuf format.
-   * `messageClassName` points to Protobuf Java class. The jar containing Java 
class should be
-   * shaded. Specifically, `com.google.protobuf.*` should be shaded to
-   * `org.sparkproject.spark_protobuf.protobuf.*`.
-   * https://github.com/rangadi/shaded-protobuf-classes is useful to create 
shaded jar from
-   * Protobuf files.
-   *
-   * @param data
-   *   the data column.
-   * @param messageClassName
-   *   The full name for Protobuf Java class. E.g. 
<code>com.example.protos.ExampleEvent</code>.
-   *   The jar with these classes needs to be shaded as described above.
-   * @since 3.4.0
-   */
-  @Experimental
-  def to_protobuf(data: Column, messageClassName: String): Column = {
-    Column.fn(
-      "to_protobuf",
-      data,
-      lit(messageClassName)
-    )
-  }
-
-  /**
-   * Converts a column into binary of protobuf format.
-   * `messageClassName` points to Protobuf Java class. The jar containing Java 
class should be
-   * shaded. Specifically, `com.google.protobuf.*` should be shaded to
-   * `org.sparkproject.spark_protobuf.protobuf.*`.
-   * https://github.com/rangadi/shaded-protobuf-classes is useful to create 
shaded jar from
-   * Protobuf files.
-   *
-   * @param data
-   *   the data column.
-   * @param messageClassName
-   *   The full name for Protobuf Java class. E.g. 
<code>com.example.protos.ExampleEvent</code>.
-   *   The jar with these classes needs to be shaded as described above.
-   * @param options
-   * @since 3.4.0
-   */
-  @Experimental
-  def to_protobuf(data: Column, messageClassName: String, options: 
java.util.Map[String, String])
-  : Column = {
-    Column.fnWithOptions(
-      "to_protobuf",
-      options.asScala.iterator,
-      data,
-      lit(messageClassName)
-    )
-  }
-}
diff --git a/project/MimaExcludes.scala b/project/MimaExcludes.scala
index 0bd0121e6e14..41f547a43b69 100644
--- a/project/MimaExcludes.scala
+++ b/project/MimaExcludes.scala
@@ -178,6 +178,12 @@ object MimaExcludes {
 
     // SPARK-49282: Shared SparkSessionBuilder
     
ProblemFilters.exclude[MissingTypesProblem]("org.apache.spark.sql.SparkSession$Builder"),
+
+    // SPARK-49286: Avro/Protobuf functions in sql/api
+    
ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.avro.functions"),
+    
ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.avro.functions$"),
+    
ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.protobuf.functions"),
+    
ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.protobuf.functions$"),
   ) ++ loggingExcludes("org.apache.spark.sql.DataFrameReader") ++
     loggingExcludes("org.apache.spark.sql.streaming.DataStreamReader") ++
     loggingExcludes("org.apache.spark.sql.SparkSession#Builder")
diff --git 
a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/avro/functions.scala
 b/sql/api/src/main/scala/org/apache/spark/sql/avro/functions.scala
similarity index 97%
rename from 
connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/avro/functions.scala
rename to sql/api/src/main/scala/org/apache/spark/sql/avro/functions.scala
index e80bccfee4c9..fffad557aca5 100644
--- 
a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/avro/functions.scala
+++ b/sql/api/src/main/scala/org/apache/spark/sql/avro/functions.scala
@@ -37,7 +37,7 @@ object functions {
    * @param jsonFormatSchema
    *   the avro schema in JSON string format.
    *
-   * @since 3.5.0
+   * @since 3.0.0
    */
   @Experimental
   def from_avro(data: Column, jsonFormatSchema: String): Column = {
@@ -57,7 +57,7 @@ object functions {
    * @param options
    *   options to control how the Avro record is parsed.
    *
-   * @since 3.5.0
+   * @since 3.0.0
    */
   @Experimental
   def from_avro(
@@ -73,7 +73,7 @@ object functions {
    * @param data
    *   the data column.
    *
-   * @since 3.5.0
+   * @since 3.0.0
    */
   @Experimental
   def to_avro(data: Column): Column = {
@@ -88,7 +88,7 @@ object functions {
    * @param jsonFormatSchema
    *   user-specified output avro schema in JSON string format.
    *
-   * @since 3.5.0
+   * @since 3.0.0
    */
   @Experimental
   def to_avro(data: Column, jsonFormatSchema: String): Column = {
diff --git 
a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/protobuf/functions.scala
 b/sql/api/src/main/scala/org/apache/spark/sql/protobuf/functions.scala
similarity index 90%
rename from 
connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/protobuf/functions.scala
rename to sql/api/src/main/scala/org/apache/spark/sql/protobuf/functions.scala
index 2c953fbd07b9..ea9e3c429d65 100644
--- 
a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/protobuf/functions.scala
+++ b/sql/api/src/main/scala/org/apache/spark/sql/protobuf/functions.scala
@@ -18,7 +18,6 @@ package org.apache.spark.sql.protobuf
 
 import java.io.FileNotFoundException
 import java.nio.file.{Files, NoSuchFileException, Paths}
-import java.util.Collections
 
 import scala.jdk.CollectionConverters._
 import scala.util.control.NonFatal
@@ -30,7 +29,7 @@ import org.apache.spark.sql.functions.lit
 
 // scalastyle:off: object.name
 object functions {
-  // scalastyle:on: object.name
+// scalastyle:on: object.name
 
   /**
    * Converts a binary column of Protobuf format into its corresponding 
catalyst value. The
@@ -44,7 +43,7 @@ object functions {
    *   The Protobuf descriptor file. This file is usually created using 
`protoc` with
    *   `--descriptor_set_out` and `--include_imports` options.
    * @param options
-   * @since 3.5.0
+   * @since 3.4.0
    */
   @Experimental
   def from_protobuf(
@@ -52,8 +51,8 @@ object functions {
       messageName: String,
       descFilePath: String,
       options: java.util.Map[String, String]): Column = {
-    val binaryFileDescSet = readDescriptorFileContent(descFilePath)
-    from_protobuf(data, messageName, binaryFileDescSet, options)
+    val descriptorFileContent = readDescriptorFileContent(descFilePath)
+    from_protobuf(data, messageName, descriptorFileContent, options)
   }
 
   /**
@@ -95,31 +94,12 @@ object functions {
    * @param descFilePath
    *   The Protobuf descriptor file. This file is usually created using 
`protoc` with
    *   `--descriptor_set_out` and `--include_imports` options.
-   * @since 3.5.0
+   * @since 3.4.0
    */
   @Experimental
   def from_protobuf(data: Column, messageName: String, descFilePath: String): 
Column = {
-    from_protobuf(data, messageName, descFilePath, emptyOptions)
-  }
-
-  /**
-   * Converts a binary column of Protobuf format into its corresponding 
catalyst value.
-   * `messageClassName` points to Protobuf Java class. The jar containing Java 
class should be
-   * shaded. Specifically, `com.google.protobuf.*` should be shaded to
-   * `org.sparkproject.spark_protobuf.protobuf.*`.
-   * https://github.com/rangadi/shaded-protobuf-classes is useful to create 
shaded jar from
-   * Protobuf files.
-   *
-   * @param data
-   *   the binary column.
-   * @param messageClassName
-   *   The full name for Protobuf Java class. E.g. 
<code>com.example.protos.ExampleEvent</code>.
-   *   The jar with these classes needs to be shaded as described above.
-   * @since 3.5.0
-   */
-  @Experimental
-  def from_protobuf(data: Column, messageClassName: String): Column = {
-    Column.fn("from_protobuf", data, lit(messageClassName))
+    val fileContent = readDescriptorFileContent(descFilePath)
+    from_protobuf(data, messageName, fileContent)
   }
 
   /**
@@ -140,7 +120,27 @@ object functions {
       data: Column,
       messageName: String,
       binaryFileDescriptorSet: Array[Byte]): Column = {
-    from_protobuf(data, messageName, binaryFileDescriptorSet, emptyOptions)
+    Column.fn("from_protobuf", data, lit(messageName), 
lit(binaryFileDescriptorSet))
+  }
+
+  /**
+   * Converts a binary column of Protobuf format into its corresponding 
catalyst value.
+   * `messageClassName` points to Protobuf Java class. The jar containing Java 
class should be
+   * shaded. Specifically, `com.google.protobuf.*` should be shaded to
+   * `org.sparkproject.spark_protobuf.protobuf.*`.
+   * https://github.com/rangadi/shaded-protobuf-classes is useful to create 
shaded jar from
+   * Protobuf files.
+   *
+   * @param data
+   *   the binary column.
+   * @param messageClassName
+   *   The full name for Protobuf Java class. E.g. 
<code>com.example.protos.ExampleEvent</code>.
+   *   The jar with these classes needs to be shaded as described above.
+   * @since 3.4.0
+   */
+  @Experimental
+  def from_protobuf(data: Column, messageClassName: String): Column = {
+    Column.fn("from_protobuf", data, lit(messageClassName))
   }
 
   /**
@@ -157,7 +157,7 @@ object functions {
    *   The full name for Protobuf Java class. E.g. 
<code>com.example.protos.ExampleEvent</code>.
    *   The jar with these classes needs to be shaded as described above.
    * @param options
-   * @since 3.5.0
+   * @since 3.4.0
    */
   @Experimental
   def from_protobuf(
@@ -178,11 +178,11 @@ object functions {
    * @param descFilePath
    *   The Protobuf descriptor file. This file is usually created using 
`protoc` with
    *   `--descriptor_set_out` and `--include_imports` options.
-   * @since 3.5.0
+   * @since 3.4.0
    */
   @Experimental
   def to_protobuf(data: Column, messageName: String, descFilePath: String): 
Column = {
-    to_protobuf(data, messageName, descFilePath, emptyOptions)
+    to_protobuf(data, messageName, descFilePath, Map.empty[String, 
String].asJava)
   }
 
   /**
@@ -204,7 +204,7 @@ object functions {
       data: Column,
       messageName: String,
       binaryFileDescriptorSet: Array[Byte]): Column = {
-    to_protobuf(data, messageName, binaryFileDescriptorSet, emptyOptions)
+    Column.fn("to_protobuf", data, lit(messageName), 
lit(binaryFileDescriptorSet))
   }
 
   /**
@@ -216,10 +216,9 @@ object functions {
    * @param messageName
    *   the protobuf MessageName to look for in descriptor file.
    * @param descFilePath
-   *   The Protobuf descriptor file. This file is usually created using 
`protoc` with
-   *   `--descriptor_set_out` and `--include_imports` options.
+   *   the protobuf descriptor file.
    * @param options
-   * @since 3.5.0
+   * @since 3.4.0
    */
   @Experimental
   def to_protobuf(
@@ -227,8 +226,8 @@ object functions {
       messageName: String,
       descFilePath: String,
       options: java.util.Map[String, String]): Column = {
-    val binaryFileDescriptorSet = readDescriptorFileContent(descFilePath)
-    to_protobuf(data, messageName, binaryFileDescriptorSet, options)
+    val fileContent = readDescriptorFileContent(descFilePath)
+    to_protobuf(data, messageName, fileContent, options)
   }
 
   /**
@@ -271,7 +270,7 @@ object functions {
    * @param messageClassName
    *   The full name for Protobuf Java class. E.g. 
<code>com.example.protos.ExampleEvent</code>.
    *   The jar with these classes needs to be shaded as described above.
-   * @since 3.5.0
+   * @since 3.4.0
    */
   @Experimental
   def to_protobuf(data: Column, messageClassName: String): Column = {
@@ -291,7 +290,7 @@ object functions {
    *   The full name for Protobuf Java class. E.g. 
<code>com.example.protos.ExampleEvent</code>.
    *   The jar with these classes needs to be shaded as described above.
    * @param options
-   * @since 3.5.0
+   * @since 3.4.0
    */
   @Experimental
   def to_protobuf(
@@ -301,8 +300,6 @@ object functions {
     Column.fnWithOptions("to_protobuf", options.asScala.iterator, data, 
lit(messageClassName))
   }
 
-  private def emptyOptions: java.util.Map[String, String] = 
Collections.emptyMap[String, String]()
-
   // This method is copied from 
org.apache.spark.sql.protobuf.util.ProtobufUtils
   private def readDescriptorFileContent(filePath: String): Array[Byte] = {
     try {
@@ -312,7 +309,8 @@ object functions {
         throw CompilationErrors.cannotFindDescriptorFileError(filePath, ex)
       case ex: NoSuchFileException =>
         throw CompilationErrors.cannotFindDescriptorFileError(filePath, ex)
-      case NonFatal(ex) => throw CompilationErrors.descriptorParseError(ex)
+      case NonFatal(ex) =>
+        throw CompilationErrors.descriptorParseError(ex)
     }
   }
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

(spark) branch master updated: [SPARK-49286][CONNECT][SQL] Move Avro/Protobuf functions to sql/api

Reply via email to