rangadi commented on code in PR #41192:
URL: https://github.com/apache/spark/pull/41192#discussion_r1195774848


##########
connector/protobuf/src/main/scala/org/apache/spark/sql/protobuf/CatalystDataToProtobuf.scala:
##########
@@ -26,14 +26,14 @@ import org.apache.spark.sql.types.{BinaryType, DataType}
 private[protobuf] case class CatalystDataToProtobuf(
     child: Expression,
     messageName: String,
-    descFilePath: Option[String] = None,
+    fileDescriptorSetBytesOpt: Option[Array[Byte]] = None,

Review Comment:
   File path is not carried anymore. 



##########
core/src/main/resources/error/error-classes.json:
##########
@@ -125,7 +120,7 @@
   },
   "CANNOT_PARSE_PROTOBUF_DESCRIPTOR" : {
     "message" : [
-      "Error parsing file <descFilePath> descriptor byte[] into Descriptor 
object."
+      "Error parsing descriptor bytes into Protobuf FileDescriptorSet."

Review Comment:
   We don't have the file name for this error. 
   Failure to read from the file results in 
`QueryCompilationErrors.cannotFindDescriptorFileError()`



##########
connector/protobuf/src/main/scala/org/apache/spark/sql/protobuf/functions.scala:
##########
@@ -58,14 +84,33 @@ object functions {
    * @param messageName
    *   the protobuf MessageName to look for in descriptor file.
    * @param descFilePath
-   *   the protobuf descriptor file.
+   *   The Protobuf descriptor file. This file is usually created using 
`protoc` with
+   *   `--descriptor_set_out` and `--include_imports` options.
    * @since 3.4.0
    */
   @Experimental
   def from_protobuf(data: Column, messageName: String, descFilePath: String): 
Column = {
-    new Column(ProtobufDataToCatalyst(data.expr, messageName, descFilePath = 
Some(descFilePath)))
-    // TODO: Add an option for user to provide descriptor file content as a 
buffer. This
-    //       gives flexibility in how the content is fetched.
+    val fileContent = ProtobufUtils.readDescriptorFileContent(descFilePath)
+    new Column(ProtobufDataToCatalyst(data.expr, messageName, 
Some(fileContent)))
+  }
+
+  /**
+   * Converts a binary column of Protobuf format into its corresponding 
catalyst value.The
+   * Protobuf definition is provided through Protobuf `FileDescriptorSet`.
+   *
+   * @param data
+   *   the binary column.
+   * @param messageName
+   *   the protobuf MessageName to look for in the descriptor set.
+   * @param fileDescriptorSetBytes
+   *   Serialized Protobuf descriptor (`FileDescriptorSet`). Typically 
contents of file created
+   *   using `protoc` with `--descriptor_set_out` and `--include_imports` 
options.
+   * @since 3.5.0
+   */
+  @Experimental
+  def from_protobuf(data: Column, messageName: String, fileDescriptorSetBytes: 
Array[Byte])

Review Comment:
   This is an example new API that takes byte array as the argument. 
   I will send a separate PR for Python API.



##########
connector/protobuf/src/main/scala/org/apache/spark/sql/protobuf/utils/ProtobufUtils.scala:
##########
@@ -223,28 +226,28 @@ private[sql] object ProtobufUtils extends Logging {
     }
   }
 
-  private def parseFileDescriptorSet(descFilePath: String): 
List[Descriptors.FileDescriptor] = {
-    var fileDescriptorSet: DescriptorProtos.FileDescriptorSet = null
+  def readDescriptorFileContent(filePath: String): Array[Byte] = {

Review Comment:
   This util method is used in many places (including tests) to read the 
contents of the file. 



##########
core/src/main/resources/error/error-classes.json:
##########
@@ -47,11 +47,6 @@
     ],
     "sqlState" : "42846"
   },
-  "CANNOT_CONSTRUCT_PROTOBUF_DESCRIPTOR" : {

Review Comment:
   This error was never required. It was masking another structured error. 



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to