hvanhovell commented on code in PR #50023:
URL: https://github.com/apache/spark/pull/50023#discussion_r1970795330
##########
sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala:
##########
@@ -2841,6 +2860,265 @@ class DatasetSuite extends QueryTest
checkDataset(Seq(seqMutableSet).toDS(), seqMutableSet)
checkDataset(Seq(mapMutableSet).toDS(), mapMutableSet)
}
+
+ // below tests are related to SPARK-49960 and TransformingEncoder usage
+ test("Incorrect derived nullability with TransformingEncoder - non
nullable") {
+ val sparkI = spark
+ type T = Tuple2[Seq[Seq[Int]], Seq[Int]]
+ val data: Seq[T] = Seq( ( Seq( Seq(1, 2, 3) ), Seq(1, 2, 3) ) )
+ // for reference only
+ val sparkDataTypeOG = {
+ import sparkI.implicits._
+ val ds = spark.createDataset[Tuple2[Seq[Seq[Int]], Seq[Int]]](data)
+ ds.schema
+ }
+
+ val provider = () =>
+ new Codec[Seq[Int], Seq[Int]]{
+ override def encode(in: Seq[Int]): Seq[Int] = in
+ override def decode(out: Seq[Int]): Seq[Int] = out
+ }
+
+ val transformingSeq =
+ TransformingEncoder[Seq[Int], Seq[Int]](
+ implicitly[ClassTag[Seq[Int]]],
+ IterableEncoder[Seq[Int], Int](implicitly[ClassTag[Seq[Int]]],
+ PrimitiveIntEncoder, false, false),
+ provider
+ )
+
+ val enc =
+ ProductEncoder(
+ implicitly[ClassTag[T]],
+ Seq(
+ EncoderField("_1",
+ IterableEncoder[Seq[Seq[Int]],
Seq[Int]](implicitly[ClassTag[Seq[Seq[Int]]]],
+ transformingSeq, false, false), false, Metadata.empty),
+ EncoderField( "_2", transformingSeq, false, Metadata.empty)
+ ),
+ None
+ )
+ val sparkViaAgnostic = {
+ val ds = spark.createDataset(data)(enc)
+ ds.schema
+ }
+ // the nullability without TransformingEncoder nullability
(SerializerBuilderHelper)
+ // is incorrect (_2 is inferred as nullable)
+ assert(enc.dataType === sparkViaAgnostic)
+ }
+
+ def provider[A]: () => Codec[V[A], A] = () =>
+ new Codec[V[A], A]{
+ override def encode(in: V[A]): A = in.v
+ override def decode(out: A): V[A] = V(out)
+ }
+
+ def transforming[A](underlying: AgnosticEncoder[A]):
TransformingEncoder[V[A], A] =
+ TransformingEncoder[V[A], A](
+ implicitly[ClassTag[V[A]]],
+ underlying,
+ provider
+ )
+
+ val V_INT = StructType(Seq(StructField("v", IntegerType, nullable = false)))
+
+ // "value" usage for single field, a wrapping nullable type is required
+ val OPTION_OF_V_INT = StructType(Seq(StructField("value",
+ V_INT, nullable = true)))
+
+ // product encoder for a non-nullable V
+ val V_OF_INT =
+ ProductEncoder(
+ classTag[V[Int]],
+ Seq(EncoderField("v", PrimitiveIntEncoder, nullable = false,
Metadata.empty)),
+ None
+ )
+
+ test("""Encoder derivation with nested TransformingEncoder of
OptionEncoder""".stripMargin) {
+ val sparkI = spark
+ type T = V[V[Option[V[Int]]]]
+ val data: Seq[T] = Seq(V(V(None)), V(V(Some(V(1)))))
+ // for reference - datatype will introduce nested classes as expected
+ val sparkDataTypeOG = {
+ import sparkI.implicits._
+ val ds = spark.createDataset[V[V[Option[V[Int]]]]](data)
+ ds.schema
+ }
+
+ /* attempt to behave as if value class semantics except the last product,
+ using a final transforming instead of a product serializes */
+ val enc =
+ transforming(
+ transforming(
+ OptionEncoder(
+ // works
+ // transforming(PrimitiveIntEncoder)
Review Comment:
Bug?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]