mposdev21 commented on code in PR #38286:
URL: https://github.com/apache/spark/pull/38286#discussion_r997366952


##########
connector/protobuf/src/test/scala/org/apache/spark/sql/protobuf/ProtobufCatalystDataConversionSuite.scala:
##########
@@ -99,26 +115,32 @@ class ProtobufCatalystDataConversionSuite
     StructType(StructField("bytes_type", BinaryType, nullable = true) :: Nil),
     StructType(StructField("string_type", StringType, nullable = true) :: Nil))
 
-  private val catalystTypesToProtoMessages: Map[DataType, String] = Map(
-    IntegerType -> "IntegerMsg",
-    DoubleType -> "DoubleMsg",
-    FloatType -> "FloatMsg",
-    BinaryType -> "BytesMsg",
-    StringType -> "StringMsg")
+  private val catalystTypesToProtoMessages: Map[DataType, (String, Any)] = Map(
+    IntegerType -> ("IntegerMsg", 0),
+    DoubleType -> ("DoubleMsg", 0.0d),
+    FloatType -> ("FloatMsg", 0.0f),
+    BinaryType -> ("BytesMsg", ByteString.empty().toByteArray),
+    StringType -> ("StringMsg", ""))
 
   testingTypes.foreach { dt =>
     val seed = 1 + scala.util.Random.nextInt((1024 - 1) + 1)
-    val filePath = testFile("protobuf/catalyst_types.desc").replace("file:/", 
"/")
     test(s"single $dt with seed $seed") {
+
+      val (messageName, defaultValue) = 
catalystTypesToProtoMessages(dt.fields(0).dataType)
+
       val rand = new scala.util.Random(seed)
-      val data = RandomDataGenerator.forType(dt, rand = rand).get.apply()
+      val generator = RandomDataGenerator.forType(dt, rand = rand).get
+      var data = generator()
+      while (data.asInstanceOf[Row].get(0) == defaultValue) // Do not use 
default values, since

Review Comment:
   @rangadi  were tests failing randomly ? I have not seen it fail in the many 
times I have run these tests.  May be this recent change:
           val value = if (field.isRepeated || field.hasDefaultValue || 
record.hasField(field)) {
             record.getField(field)
           } else null
   But from your comment, that does not make sense. Could you clarify ?



##########
connector/protobuf/src/test/scala/org/apache/spark/sql/protobuf/ProtobufFunctionsSuite.scala:
##########
@@ -56,44 +91,45 @@ class ProtobufFunctionsSuite extends QueryTest with 
SharedSparkSession with Seri
         
lit(1202.00).cast(org.apache.spark.sql.types.FloatType).as("float_value"),
         lit(true).as("bool_value"),
         lit("0".getBytes).as("bytes_value")).as("SimpleMessage"))
-    val protoStructDF = df.select(
-      functions.to_protobuf($"SimpleMessage", testFileDesc, 
"SimpleMessage").as("proto"))
-    val actualDf = protoStructDF.select(
-      functions.from_protobuf($"proto", testFileDesc, 
"SimpleMessage").as("proto.*"))
-    checkAnswer(actualDf, df)
+
+    checkWithFileAndClassName("SimpleMessage") {
+      case (name, descFilePathOpt) =>
+        val protoStructDF = df.select(
+          to_protobuf_wrapper($"SimpleMessage", name, 
descFilePathOpt).as("proto"))
+        val actualDf = protoStructDF.select(
+          from_protobuf_wrapper($"proto", name, descFilePathOpt).as("proto.*"))
+        checkAnswer(actualDf, df)
+    }
   }
 
   test("roundtrip in from_protobuf and to_protobuf - Repeated") {
-    val descriptor = ProtobufUtils.buildDescriptor(testFileDesc, 
"SimpleMessageRepeated")
 
-    val dynamicMessage = DynamicMessage
-      .newBuilder(descriptor)
-      .setField(descriptor.findFieldByName("key"), "key")
-      .setField(descriptor.findFieldByName("value"), "value")
-      .addRepeatedField(descriptor.findFieldByName("rbool_value"), false)
-      .addRepeatedField(descriptor.findFieldByName("rbool_value"), true)
-      .addRepeatedField(descriptor.findFieldByName("rdouble_value"), 
1092092.654d)
-      .addRepeatedField(descriptor.findFieldByName("rdouble_value"), 
1092093.654d)
-      .addRepeatedField(descriptor.findFieldByName("rfloat_value"), 10903.0f)
-      .addRepeatedField(descriptor.findFieldByName("rfloat_value"), 10902.0f)
-      .addRepeatedField(
-        descriptor.findFieldByName("rnested_enum"),
-        
descriptor.findEnumTypeByName("NestedEnum").findValueByName("ESTED_NOTHING"))
-      .addRepeatedField(
-        descriptor.findFieldByName("rnested_enum"),
-        
descriptor.findEnumTypeByName("NestedEnum").findValueByName("NESTED_FIRST"))
+    val protoMessage = SimpleMessageRepeated
+      .newBuilder()
+      .setKey("key")
+      .setValue("value")
+      .addRboolValue(false)
+      .addRboolValue(true)
+      .addRdoubleValue(1092092.654d)
+      .addRdoubleValue(1092093.654d)
+      .addRfloatValue(10903.0f)
+      .addRfloatValue(10902.0f)
+      .addRnestedEnum(NestedEnum.ESTED_NOTHING)
+      .addRnestedEnum(NestedEnum.NESTED_FIRST)
       .build()
 
-    val df = Seq(dynamicMessage.toByteArray).toDF("value")
-    val fromProtoDF = df.select(
-      functions.from_protobuf($"value", testFileDesc, 
"SimpleMessageRepeated").as("value_from"))
-    val toProtoDF = fromProtoDF.select(
-      functions.to_protobuf($"value_from", testFileDesc, 
"SimpleMessageRepeated").as("value_to"))
-    val toFromProtoDF = toProtoDF.select(
-      functions
-        .from_protobuf($"value_to", testFileDesc, "SimpleMessageRepeated")
-        .as("value_to_from"))
-    checkAnswer(fromProtoDF.select($"value_from.*"), 
toFromProtoDF.select($"value_to_from.*"))
+    val df = Seq(protoMessage.toByteArray).toDF("value")
+
+    checkWithFileAndClassName("SimpleMessageRepeated") {
+      case (name, descFilePathOpt) =>
+        val fromProtoDF = df.select(
+          from_protobuf_wrapper($"value", name, 
descFilePathOpt).as("value_from"))
+      val toProtoDF = fromProtoDF.select(
+        to_protobuf_wrapper($"value_from", name, 
descFilePathOpt).as("value_to"))
+      val toFromProtoDF = toProtoDF.select(
+        from_protobuf_wrapper($"value_to", name, 
descFilePathOpt).as("value_to_from"))
+      checkAnswer(fromProtoDF.select($"value_from.*"), 
toFromProtoDF.select($"value_to_from.*"))

Review Comment:
   Lines 127-131 are not indented properly.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to