This is an automated email from the ASF dual-hosted git repository.
gabor pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/parquet-mr.git
The following commit(s) were added to refs/heads/master by this push:
new cc8bdf1 PARQUET-952: Avro union with single type fails with 'is not a
group' (#459)
cc8bdf1 is described below
commit cc8bdf1d13639d12d02170d40cc4890180bbabc5
Author: nandorKollar <[email protected]>
AuthorDate: Mon Jun 18 09:47:25 2018 +0200
PARQUET-952: Avro union with single type fails with 'is not a group' (#459)
---
.../apache/parquet/avro/AvroSchemaConverter.java | 22 ++++++++++-----
.../org/apache/parquet/avro/TestReadWrite.java | 31 ++++++++++++++++++++++
2 files changed, 46 insertions(+), 7 deletions(-)
diff --git
a/parquet-avro/src/main/java/org/apache/parquet/avro/AvroSchemaConverter.java
b/parquet-avro/src/main/java/org/apache/parquet/avro/AvroSchemaConverter.java
index 70b6525..1bb12b9 100644
---
a/parquet-avro/src/main/java/org/apache/parquet/avro/AvroSchemaConverter.java
+++
b/parquet-avro/src/main/java/org/apache/parquet/avro/AvroSchemaConverter.java
@@ -195,8 +195,11 @@ public class AvroSchemaConverter {
private Type convertUnion(String fieldName, Schema schema, Type.Repetition
repetition) {
List<Schema> nonNullSchemas = new
ArrayList<Schema>(schema.getTypes().size());
+ // Found any schemas in the union? Required for the edge case, where the
union contains only a single type.
+ boolean foundNullSchema = false;
for (Schema childSchema : schema.getTypes()) {
if (childSchema.getType().equals(Schema.Type.NULL)) {
+ foundNullSchema = true;
if (Type.Repetition.REQUIRED == repetition) {
repetition = Type.Repetition.OPTIONAL;
}
@@ -211,16 +214,21 @@ public class AvroSchemaConverter {
throw new UnsupportedOperationException("Cannot convert Avro union of
only nulls");
case 1:
- return convertField(fieldName, nonNullSchemas.get(0), repetition);
+ return foundNullSchema ? convertField(fieldName,
nonNullSchemas.get(0), repetition) :
+ convertUnionToGroupType(fieldName, repetition, nonNullSchemas);
default: // complex union type
- List<Type> unionTypes = new ArrayList<Type>(nonNullSchemas.size());
- int index = 0;
- for (Schema childSchema : nonNullSchemas) {
- unionTypes.add( convertField("member" + index++, childSchema,
Type.Repetition.OPTIONAL));
- }
- return new GroupType(repetition, fieldName, unionTypes);
+ return convertUnionToGroupType(fieldName, repetition, nonNullSchemas);
+ }
+ }
+
+ private Type convertUnionToGroupType(String fieldName, Type.Repetition
repetition, List<Schema> nonNullSchemas) {
+ List<Type> unionTypes = new ArrayList<Type>(nonNullSchemas.size());
+ int index = 0;
+ for (Schema childSchema : nonNullSchemas) {
+ unionTypes.add( convertField("member" + index++, childSchema,
Type.Repetition.OPTIONAL));
}
+ return new GroupType(repetition, fieldName, unionTypes);
}
private Type convertField(Schema.Field field) {
diff --git
a/parquet-avro/src/test/java/org/apache/parquet/avro/TestReadWrite.java
b/parquet-avro/src/test/java/org/apache/parquet/avro/TestReadWrite.java
index 4fa71ea..2335e36 100644
--- a/parquet-avro/src/test/java/org/apache/parquet/avro/TestReadWrite.java
+++ b/parquet-avro/src/test/java/org/apache/parquet/avro/TestReadWrite.java
@@ -647,6 +647,37 @@ public class TestReadWrite {
}
+ @Test
+ public void testUnionWithSingleNonNullType() throws Exception {
+ Schema avroSchema = Schema.createRecord("SingleStringUnionRecord", null,
null, false);
+ avroSchema.setFields(
+ Collections.singletonList(new Schema.Field("value",
+ Schema.createUnion(Schema.create(Schema.Type.STRING)), null, null)));
+
+ File tmp = File.createTempFile(getClass().getSimpleName(), ".tmp");
+ tmp.deleteOnExit();
+ tmp.delete();
+ Path file = new Path(tmp.getPath());
+
+ // Parquet writer
+ ParquetWriter parquetWriter =
AvroParquetWriter.builder(file).withSchema(avroSchema)
+ .withConf(new Configuration())
+ .build();
+
+ GenericRecord record = new GenericRecordBuilder(avroSchema)
+ .set("value", "theValue")
+ .build();
+
+ parquetWriter.write(record);
+ parquetWriter.close();
+
+ AvroParquetReader<GenericRecord> reader = new
AvroParquetReader<GenericRecord>(testConf, file);
+ GenericRecord nextRecord = reader.read();
+
+ assertNotNull(nextRecord);
+ assertEquals(str("theValue"), nextRecord.get("value"));
+ }
+
/**
* Return a String or Utf8 depending on whether compatibility is on
*/
--
To stop receiving notification emails like this one, please contact
[email protected].