Github user xuanyuanking commented on a diff in the pull request: https://github.com/apache/spark/pull/22878#discussion_r229775793 --- Diff: external/avro/src/test/scala/org/apache/spark/sql/avro/AvroSuite.scala --- @@ -1374,4 +1377,185 @@ class AvroSuite extends QueryTest with SharedSQLContext with SQLTestUtils { |} """.stripMargin) } + + test("generic record converts to row and back") { + val nested = + SchemaBuilder.record("simple_record").fields() + .name("nested1").`type`("int").withDefault(0) + .name("nested2").`type`("string").withDefault("string").endRecord() + val mapDefault = new java.util.HashMap[String, String]() + mapDefault.put("a", "A") + val schema = SchemaBuilder.record("record").fields() + .name("boolean").`type`("boolean").withDefault(false) + .name("int").`type`("int").withDefault(0) + .name("long").`type`("long").withDefault(0L) + .name("float").`type`("float").withDefault(0.0F) + .name("double").`type`("double").withDefault(0.0) + .name("string").`type`("string").withDefault("string") + .name("bytes").`type`("bytes").withDefault(java.nio.ByteBuffer.wrap("bytes".getBytes)) + .name("nested").`type`(nested).withDefault(new GenericRecordBuilder(nested).build) + .name("enum").`type`( + SchemaBuilder.enumeration("simple_enums") + .symbols("SPADES", "HEARTS", "CLUBS", "DIAMONDS")).withDefault("SPADES") + .name("int_array").`type`( + SchemaBuilder.array().items().`type`("int")).withDefault(java.util.Arrays.asList(1, 2, 3)) + .name("string_array").`type`( + SchemaBuilder.array().items().`type`("string")) + .withDefault(java.util.Arrays.asList("a", "b", "c")) + .name("record_array").`type`( + SchemaBuilder.array.items.`type`(nested)) + .withDefault(java.util.Arrays.asList( + new GenericRecordBuilder(nested).build, + new GenericRecordBuilder(nested).build)) + .name("enum_array").`type`( + SchemaBuilder.array.items.`type`( + SchemaBuilder.enumeration("simple_enums") + .symbols("SPADES", "HEARTS", "CLUBS", "DIAMONDS"))) + .withDefault(java.util.Arrays.asList("SPADES", "HEARTS", "SPADES")) + .name("fixed_array").`type`( + SchemaBuilder.array.items().`type`( + SchemaBuilder.fixed("simple_fixed").size(3))) + .withDefault(java.util.Arrays.asList("foo", "bar", "baz")) + .name("fixed").`type`(SchemaBuilder.fixed("simple_fixed").size(16)) + .withDefault("string_length_16") + .name("map").`type`( + SchemaBuilder.map().values().`type`("string")) + .withDefault(mapDefault) + .endRecord() + val encoder = AvroEncoder.of[GenericData.Record](schema) + val expressionEncoder = encoder.asInstanceOf[ExpressionEncoder[GenericData.Record]] + val record = new GenericRecordBuilder(schema).build + val row = expressionEncoder.toRow(record) + val recordFromRow = expressionEncoder.resolveAndBind().fromRow(row) + assert(record.toString == recordFromRow.toString) --- End diff -- In order not to let reviewer confuse, add more notes here, after adding [map type](https://github.com/apache/spark/pull/22878/files#diff-9364b0610f92b3cc35a4bc43a80751bfR1421) in this case, record.get(15).equals(recordFromRow.get(15)) is false, this is because key/value in map of record is `Utf8` while `CharSequence` in recordFromRow, directly call map.equals got false. So here check the result by string. Avro GenericData.compare(): https://github.com/apache/avro/blob/8d2a2ce10db3fdef107f834a0fe0c9297b043a94/lang/java/avro/src/main/java/org/apache/avro/generic/GenericData.java#L965
--- --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org