This is an automated email from the ASF dual-hosted git repository.
gabor pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/parquet-mr.git
The following commit(s) were added to refs/heads/master by this push:
new 84c954d PARQUET-1827: UUID type currently not supported by parquet-mr
(#778)
84c954d is described below
commit 84c954d8a4feef2d9bdad7a236a7268ef71a1c25
Author: Gabor Szadovszky <[email protected]>
AuthorDate: Thu Jun 4 08:56:31 2020 +0200
PARQUET-1827: UUID type currently not supported by parquet-mr (#778)
---
parquet-avro/README.md | 44 ++++
.../org/apache/parquet/avro/AvroConverters.java | 16 ++
.../apache/parquet/avro/AvroRecordConverter.java | 15 +-
.../apache/parquet/avro/AvroSchemaConverter.java | 34 ++-
.../org/apache/parquet/avro/AvroWriteSupport.java | 33 ++-
.../java/org/apache/parquet/avro/AvroTestUtil.java | 21 +-
.../parquet/avro/TestAvroSchemaConverter.java | 32 +++
.../parquet/avro/TestGenericLogicalTypes.java | 49 +++-
.../parquet/avro/TestReflectLogicalTypes.java | 289 ++++++++++++++++++++-
parquet-column/pom.xml | 7 +
.../parquet/schema/LogicalTypeAnnotation.java | 44 ++++
.../parquet/schema/PrimitiveStringifier.java | 27 ++
.../org/apache/parquet/schema/PrimitiveType.java | 7 +
.../main/java/org/apache/parquet/schema/Types.java | 45 ++--
.../parquet/schema/TestPrimitiveStringifier.java | 39 ++-
.../schema/TestTypeBuildersWithLogicalTypes.java | 14 +
.../format/converter/ParquetMetadataConverter.java | 15 ++
.../converter/TestParquetMetadataConverter.java | 3 +
.../parquet/statistics/TestColumnIndexes.java | 4 +
19 files changed, 687 insertions(+), 51 deletions(-)
diff --git a/parquet-avro/README.md b/parquet-avro/README.md
new file mode 100644
index 0000000..8338165
--- /dev/null
+++ b/parquet-avro/README.md
@@ -0,0 +1,44 @@
+<!--
+ ~ Licensed to the Apache Software Foundation (ASF) under one
+ ~ or more contributor license agreements. See the NOTICE file
+ ~ distributed with this work for additional information
+ ~ regarding copyright ownership. The ASF licenses this file
+ ~ to you under the Apache License, Version 2.0 (the
+ ~ "License"); you may not use this file except in compliance
+ ~ with the License. You may obtain a copy of the License at
+ ~
+ ~ http://www.apache.org/licenses/LICENSE-2.0
+ ~
+ ~ Unless required by applicable law or agreed to in writing,
+ ~ software distributed under the License is distributed on an
+ ~ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ ~ KIND, either express or implied. See the License for the
+ ~ specific language governing permissions and limitations
+ ~ under the License.
+ -->
+
+Apache Avro integration
+======
+
+**TODO**: Add description and examples how to use parquet-avro
+
+## Available options via Hadoop Configuration
+
+### Configuration for reading
+
+| Name | Type | Description
|
+|-----------------------------------------|-----------|----------------------------------------------------------------------|
+| `parquet.avro.data.supplier` | `Class` | The implementation of
the interface org.apache.parquet.avro.AvroDataSupplier. Available
implementations in the library: GenericDataSupplier, ReflectDataSupplier,
SpecificDataSupplier.<br/>The default value is
`org.apache.parquet.avro.SpecificDataSupplier` |
+| `parquet.avro.read.schema` | `String` | The Avro schema to be
used for reading. It shall be compatible with the file schema. The file schema
will be used directly if not set. |
+| `parquet.avro.projection` | `String` | The Avro schema to be
used for projection. |
+| `parquet.avro.compatible` | `boolean` | Flag for compatibility
mode. `true` for materializing Avro `IndexedRecord` objects, `false` for
materializing the related objects for either generic, specific, or reflect
records.<br/>The default value is `true`. |
+
+### Configuration for writing
+
+| Name | Type | Description
|
+|-----------------------------------------|-----------|----------------------------------------------------------------------|
+| `parquet.avro.write.data.supplier` | `Class` | The implementation of
the interface org.apache.parquet.avro.AvroDataSupplier. Available
implementations in the library: GenericDataSupplier, ReflectDataSupplier,
SpecificDataSupplier.<br/>The default value is
`org.apache.parquet.avro.SpecificDataSupplier` |
+| `parquet.avro.schema` | `String` | The Avro schema to be
used for generating the Parquet schema of the file. |
+| `parquet.avro.write-old-list-structure` | `boolean` | Flag whether to write
list structures in the old way (2 levels) or the new one (3 levels). When
writing at 2 levels no null values are available at the element level.<br/>The
default value is `true` |
+| `parquet.avro.add-list-element-records` | `boolean` | Flag whether to assume
that any repeated element in the schmea is a list element.<br/>The default
value is `true`. |
+| `parquet.avro.write-parquet-uuid` | `boolean` | Flag whether to write
the [Parquet UUID logical
type](https://github.com/apache/parquet-format/blob/master/LogicalTypes.md#uuid)
in case of an [Avro UUID
type](https://avro.apache.org/docs/current/spec.html#UUID) is present.<br/>The
default value is `false`. |
diff --git
a/parquet-avro/src/main/java/org/apache/parquet/avro/AvroConverters.java
b/parquet-avro/src/main/java/org/apache/parquet/avro/AvroConverters.java
index b9c07cf..c372258 100644
--- a/parquet-avro/src/main/java/org/apache/parquet/avro/AvroConverters.java
+++ b/parquet-avro/src/main/java/org/apache/parquet/avro/AvroConverters.java
@@ -29,6 +29,8 @@ import org.apache.parquet.io.ParquetDecodingException;
import org.apache.parquet.io.api.Binary;
import org.apache.parquet.io.api.GroupConverter;
import org.apache.parquet.io.api.PrimitiveConverter;
+import org.apache.parquet.schema.PrimitiveStringifier;
+import org.apache.parquet.schema.PrimitiveType;
public class AvroConverters {
@@ -314,4 +316,18 @@ public class AvroConverters {
return model.createFixed(null /* reuse */, binary.getBytes(), schema);
}
}
+
+ static final class FieldUUIDConverter extends BinaryConverter<String> {
+ private final PrimitiveStringifier stringifier;
+
+ public FieldUUIDConverter(ParentValueContainer parent, PrimitiveType type)
{
+ super(parent);
+ stringifier = type.stringifier();
+ }
+
+ @Override
+ public String convert(Binary binary) {
+ return stringifier.stringify(binary);
+ }
+ }
}
diff --git
a/parquet-avro/src/main/java/org/apache/parquet/avro/AvroRecordConverter.java
b/parquet-avro/src/main/java/org/apache/parquet/avro/AvroRecordConverter.java
index 45504b6..ea5b907 100644
---
a/parquet-avro/src/main/java/org/apache/parquet/avro/AvroRecordConverter.java
+++
b/parquet-avro/src/main/java/org/apache/parquet/avro/AvroRecordConverter.java
@@ -1,4 +1,4 @@
-/*
+/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
@@ -6,9 +6,9 @@
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
- *
+ *
* http://www.apache.org/licenses/LICENSE-2.0
- *
+ *
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
@@ -33,20 +33,17 @@ import java.lang.reflect.Modifier;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
-import java.util.HashSet;
import java.util.List;
import java.util.Map;
-import java.util.Set;
import java.util.LinkedHashMap;
import org.apache.avro.AvroTypeException;
import org.apache.avro.Conversion;
import org.apache.avro.LogicalType;
+import org.apache.avro.LogicalTypes;
import org.apache.avro.Schema;
-import org.apache.avro.SchemaCompatibility;
import org.apache.avro.generic.GenericData;
import org.apache.avro.reflect.AvroIgnore;
import org.apache.avro.reflect.AvroName;
-import org.apache.avro.reflect.AvroSchema;
import org.apache.avro.reflect.ReflectData;
import org.apache.avro.reflect.Stringable;
import org.apache.avro.specific.SpecificData;
@@ -54,7 +51,6 @@ import org.apache.avro.util.ClassUtils;
import org.apache.parquet.Preconditions;
import org.apache.parquet.avro.AvroConverters.FieldStringConverter;
import org.apache.parquet.avro.AvroConverters.FieldStringableConverter;
-import org.apache.parquet.filter2.predicate.SchemaCompatibilityValidator;
import org.apache.parquet.io.InvalidRecordException;
import org.apache.parquet.io.api.Converter;
import org.apache.parquet.io.api.GroupConverter;
@@ -280,6 +276,9 @@ class AvroRecordConverter<T> extends
AvroConverters.AvroGroupConverter {
}
return new AvroConverters.FieldByteBufferConverter(parent);
case STRING:
+ if (logicalType != null &&
logicalType.getName().equals(LogicalTypes.uuid().getName())) {
+ return new AvroConverters.FieldUUIDConverter(parent,
type.asPrimitiveType());
+ }
return newStringConverter(schema, model, parent);
case RECORD:
return new AvroRecordConverter(parent, type.asGroupType(), schema,
model);
diff --git
a/parquet-avro/src/main/java/org/apache/parquet/avro/AvroSchemaConverter.java
b/parquet-avro/src/main/java/org/apache/parquet/avro/AvroSchemaConverter.java
index 0cece97..c1bb3c5 100644
---
a/parquet-avro/src/main/java/org/apache/parquet/avro/AvroSchemaConverter.java
+++
b/parquet-avro/src/main/java/org/apache/parquet/avro/AvroSchemaConverter.java
@@ -31,6 +31,7 @@ import org.apache.parquet.schema.PrimitiveType;
import org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName;
import org.apache.parquet.schema.Type;
import org.apache.parquet.schema.Types;
+import
org.apache.parquet.schema.LogicalTypeAnnotation.UUIDLogicalTypeAnnotation;
import java.util.ArrayList;
import java.util.Arrays;
@@ -44,6 +45,8 @@ import static java.util.Optional.of;
import static org.apache.avro.JsonProperties.NULL_VALUE;
import static
org.apache.parquet.avro.AvroWriteSupport.WRITE_OLD_LIST_STRUCTURE;
import static
org.apache.parquet.avro.AvroWriteSupport.WRITE_OLD_LIST_STRUCTURE_DEFAULT;
+import static org.apache.parquet.avro.AvroWriteSupport.WRITE_PARQUET_UUID;
+import static
org.apache.parquet.avro.AvroWriteSupport.WRITE_PARQUET_UUID_DEFAULT;
import static org.apache.parquet.schema.LogicalTypeAnnotation.TimeUnit.MICROS;
import static org.apache.parquet.schema.LogicalTypeAnnotation.TimeUnit.MILLIS;
import static org.apache.parquet.schema.LogicalTypeAnnotation.dateType;
@@ -52,6 +55,7 @@ import static
org.apache.parquet.schema.LogicalTypeAnnotation.enumType;
import static org.apache.parquet.schema.LogicalTypeAnnotation.stringType;
import static org.apache.parquet.schema.LogicalTypeAnnotation.timeType;
import static org.apache.parquet.schema.LogicalTypeAnnotation.timestampType;
+import static org.apache.parquet.schema.LogicalTypeAnnotation.uuidType;
import static org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName.*;
import static org.apache.parquet.schema.Type.Repetition.REPEATED;
@@ -69,10 +73,10 @@ public class AvroSchemaConverter {
private final boolean assumeRepeatedIsListElement;
private final boolean writeOldListStructure;
+ private final boolean writeParquetUUID;
public AvroSchemaConverter() {
- this.assumeRepeatedIsListElement = ADD_LIST_ELEMENT_RECORDS_DEFAULT;
- this.writeOldListStructure = WRITE_OLD_LIST_STRUCTURE_DEFAULT;
+ this(ADD_LIST_ELEMENT_RECORDS_DEFAULT);
}
/**
@@ -84,6 +88,7 @@ public class AvroSchemaConverter {
AvroSchemaConverter(boolean assumeRepeatedIsListElement) {
this.assumeRepeatedIsListElement = assumeRepeatedIsListElement;
this.writeOldListStructure = WRITE_OLD_LIST_STRUCTURE_DEFAULT;
+ this.writeParquetUUID = WRITE_PARQUET_UUID_DEFAULT;
}
public AvroSchemaConverter(Configuration conf) {
@@ -91,6 +96,7 @@ public class AvroSchemaConverter {
ADD_LIST_ELEMENT_RECORDS, ADD_LIST_ELEMENT_RECORDS_DEFAULT);
this.writeOldListStructure = conf.getBoolean(
WRITE_OLD_LIST_STRUCTURE, WRITE_OLD_LIST_STRUCTURE_DEFAULT);
+ this.writeParquetUUID = conf.getBoolean(WRITE_PARQUET_UUID,
WRITE_PARQUET_UUID_DEFAULT);
}
/**
@@ -145,6 +151,7 @@ public class AvroSchemaConverter {
private Type convertField(String fieldName, Schema schema, Type.Repetition
repetition) {
Types.PrimitiveBuilder<PrimitiveType> builder;
Schema.Type type = schema.getType();
+ LogicalType logicalType = schema.getLogicalType();
if (type.equals(Schema.Type.BOOLEAN)) {
builder = Types.primitive(BOOLEAN, repetition);
} else if (type.equals(Schema.Type.INT)) {
@@ -158,7 +165,12 @@ public class AvroSchemaConverter {
} else if (type.equals(Schema.Type.BYTES)) {
builder = Types.primitive(BINARY, repetition);
} else if (type.equals(Schema.Type.STRING)) {
- builder = Types.primitive(BINARY, repetition).as(stringType());
+ if (logicalType != null &&
logicalType.getName().equals(LogicalTypes.uuid().getName()) &&
writeParquetUUID) {
+ builder = Types.primitive(FIXED_LEN_BYTE_ARRAY, repetition)
+ .length(LogicalTypeAnnotation.UUIDLogicalTypeAnnotation.BYTES);
+ } else {
+ builder = Types.primitive(BINARY, repetition).as(stringType());
+ }
} else if (type.equals(Schema.Type.RECORD)) {
return new GroupType(repetition, fieldName,
convertFields(schema.getFields()));
} else if (type.equals(Schema.Type.ENUM)) {
@@ -186,7 +198,6 @@ public class AvroSchemaConverter {
// schema translation can only be done for known logical types because this
// creates an equivalence
- LogicalType logicalType = schema.getLogicalType();
if (logicalType != null) {
if (logicalType instanceof LogicalTypes.Decimal) {
LogicalTypes.Decimal decimal = (LogicalTypes.Decimal) logicalType;
@@ -306,8 +317,12 @@ public class AvroSchemaConverter {
}
@Override
public Schema convertFIXED_LEN_BYTE_ARRAY(PrimitiveTypeName
primitiveTypeName) {
- int size = parquetType.asPrimitiveType().getTypeLength();
- return Schema.createFixed(parquetType.getName(), null, null,
size);
+ if (annotation instanceof
LogicalTypeAnnotation.UUIDLogicalTypeAnnotation) {
+ return Schema.create(Schema.Type.STRING);
+ } else {
+ int size = parquetType.asPrimitiveType().getTypeLength();
+ return Schema.createFixed(parquetType.getName(), null, null,
size);
+ }
}
@Override
public Schema convertBINARY(PrimitiveTypeName primitiveTypeName) {
@@ -419,6 +434,8 @@ public class AvroSchemaConverter {
return timestampType(true, MILLIS);
} else if (logicalType instanceof LogicalTypes.TimestampMicros) {
return timestampType(true, MICROS);
+ } else if (logicalType.getName().equals(LogicalTypes.uuid().getName()) &&
writeParquetUUID) {
+ return uuidType();
}
return null;
}
@@ -461,6 +478,11 @@ public class AvroSchemaConverter {
}
return empty();
}
+
+ @Override
+ public Optional<LogicalType> visit(UUIDLogicalTypeAnnotation
uuidLogicalType) {
+ return of(LogicalTypes.uuid());
+ }
}).orElse(null);
}
diff --git
a/parquet-avro/src/main/java/org/apache/parquet/avro/AvroWriteSupport.java
b/parquet-avro/src/main/java/org/apache/parquet/avro/AvroWriteSupport.java
index 333a13d..4406587 100644
--- a/parquet-avro/src/main/java/org/apache/parquet/avro/AvroWriteSupport.java
+++ b/parquet-avro/src/main/java/org/apache/parquet/avro/AvroWriteSupport.java
@@ -1,4 +1,4 @@
-/*
+/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
@@ -6,9 +6,9 @@
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
- *
+ *
* http://www.apache.org/licenses/LICENSE-2.0
- *
+ *
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
@@ -24,6 +24,8 @@ import java.util.Collection;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
+import java.util.UUID;
+
import org.apache.avro.Conversion;
import org.apache.avro.LogicalType;
import org.apache.avro.Schema;
@@ -36,6 +38,7 @@ import org.apache.parquet.hadoop.api.WriteSupport;
import org.apache.parquet.io.api.Binary;
import org.apache.parquet.io.api.RecordConsumer;
import org.apache.parquet.schema.GroupType;
+import
org.apache.parquet.schema.LogicalTypeAnnotation.UUIDLogicalTypeAnnotation;
import org.apache.parquet.schema.MessageType;
import org.apache.parquet.schema.Type;
import org.apache.hadoop.util.ReflectionUtils;
@@ -61,6 +64,8 @@ public class AvroWriteSupport<T> extends WriteSupport<T> {
public static final String WRITE_OLD_LIST_STRUCTURE =
"parquet.avro.write-old-list-structure";
static final boolean WRITE_OLD_LIST_STRUCTURE_DEFAULT = true;
+ public static final String WRITE_PARQUET_UUID =
"parquet.avro.write-parquet-uuid";
+ static final boolean WRITE_PARQUET_UUID_DEFAULT = false;
private static final String MAP_REPEATED_NAME = "key_value";
private static final String MAP_KEY_NAME = "key";
@@ -229,7 +234,7 @@ public class AvroWriteSupport<T> extends WriteSupport<T> {
recordConsumer.endGroup();
}
- private void writeUnion(GroupType parquetSchema, Schema avroSchema,
+ private void writeUnion(GroupType parquetSchema, Schema avroSchema,
Object value) {
recordConsumer.startGroup();
@@ -344,7 +349,11 @@ public class AvroWriteSupport<T> extends WriteSupport<T> {
}
break;
case STRING:
- recordConsumer.addBinary(fromAvroString(value));
+ if (type.asPrimitiveType().getLogicalTypeAnnotation() instanceof
UUIDLogicalTypeAnnotation) {
+ recordConsumer.addBinary(fromUUIDString(value));
+ } else {
+ recordConsumer.addBinary(fromAvroString(value));
+ }
break;
case RECORD:
writeRecord(type.asGroupType(), avroSchema, value);
@@ -364,6 +373,20 @@ public class AvroWriteSupport<T> extends WriteSupport<T> {
}
}
+ private Binary fromUUIDString(Object value) {
+ byte[] data = new byte[UUIDLogicalTypeAnnotation.BYTES];
+ UUID uuid = UUID.fromString(value.toString());
+ writeLong(data, 0, uuid.getMostSignificantBits());
+ writeLong(data, Long.BYTES, uuid.getLeastSignificantBits());
+ return Binary.fromConstantByteArray(data);
+ }
+
+ private void writeLong(byte[] array, int offset, long value) {
+ for (int i = 0; i < Long.BYTES; ++i) {
+ array[i + offset] = (byte) (value >>> ((Long.BYTES - i - 1) *
Byte.SIZE));
+ }
+ }
+
private Binary fromAvroString(Object value) {
if (value instanceof Utf8) {
Utf8 utf8 = (Utf8) value;
diff --git
a/parquet-avro/src/test/java/org/apache/parquet/avro/AvroTestUtil.java
b/parquet-avro/src/test/java/org/apache/parquet/avro/AvroTestUtil.java
index 717e36e..8a9e424 100644
--- a/parquet-avro/src/test/java/org/apache/parquet/avro/AvroTestUtil.java
+++ b/parquet-avro/src/test/java/org/apache/parquet/avro/AvroTestUtil.java
@@ -33,6 +33,7 @@ import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.parquet.hadoop.ParquetReader;
import org.apache.parquet.hadoop.ParquetWriter;
+import org.apache.parquet.hadoop.util.HadoopInputFile;
import org.junit.Assert;
import org.junit.rules.TemporaryFolder;
@@ -82,15 +83,17 @@ public class AvroTestUtil {
}
public static <D> List<D> read(GenericData model, Schema schema, File file)
throws IOException {
+ return read(new Configuration(false), model, schema, file);
+ }
+
+ public static <D> List<D> read(Configuration conf, GenericData model, Schema
schema, File file) throws IOException {
List<D> data = new ArrayList<D>();
- Configuration conf = new Configuration(false);
AvroReadSupport.setRequestedProjection(conf, schema);
AvroReadSupport.setAvroReadSchema(conf, schema);
try (ParquetReader<D> fileReader = AvroParquetReader
- .<D>builder(new Path(file.toString()))
+ .<D>builder(HadoopInputFile.fromPath(new Path(file.toString()), conf))
.withDataModel(model) // reflect disables compatibility
- .withConf(conf)
.build()) {
D datum;
while ((datum = fileReader.read()) != null) {
@@ -103,6 +106,12 @@ public class AvroTestUtil {
@SuppressWarnings("unchecked")
public static <D> File write(TemporaryFolder temp, GenericData model, Schema
schema, D... data) throws IOException {
+ return write(temp, new Configuration(false), model, schema, data);
+ }
+
+ @SuppressWarnings("unchecked")
+ public static <D> File write(TemporaryFolder temp, Configuration conf,
GenericData model, Schema schema, D... data)
+ throws IOException {
File file = temp.newFile();
Assert.assertTrue(file.delete());
@@ -118,4 +127,10 @@ public class AvroTestUtil {
return file;
}
+
+ public static Configuration conf(String name, boolean value) {
+ Configuration conf = new Configuration(false);
+ conf.setBoolean(name, value);
+ return conf;
+ }
}
diff --git
a/parquet-avro/src/test/java/org/apache/parquet/avro/TestAvroSchemaConverter.java
b/parquet-avro/src/test/java/org/apache/parquet/avro/TestAvroSchemaConverter.java
index f5b348b..786477b 100644
---
a/parquet-avro/src/test/java/org/apache/parquet/avro/TestAvroSchemaConverter.java
+++
b/parquet-avro/src/test/java/org/apache/parquet/avro/TestAvroSchemaConverter.java
@@ -37,6 +37,9 @@ import java.util.Collections;
import static org.apache.avro.Schema.Type.INT;
import static org.apache.avro.Schema.Type.LONG;
+import static org.apache.avro.Schema.Type.STRING;
+import static
org.apache.avro.SchemaCompatibility.checkReaderWriterCompatibility;
+import static
org.apache.avro.SchemaCompatibility.SchemaCompatibilityType.COMPATIBLE;
import static org.apache.parquet.avro.AvroTestUtil.field;
import static org.apache.parquet.avro.AvroTestUtil.optionalField;
import static org.apache.parquet.avro.AvroTestUtil.primitive;
@@ -766,6 +769,35 @@ public class TestAvroSchemaConverter {
testParquetToAvroConversion(NEW_BEHAVIOR, schema, parquetSchema);
}
+ @Test
+ public void testUUIDType() throws Exception {
+ Schema fromAvro = Schema.createRecord("myrecord", null, null, false,
+ Arrays.asList(new Schema.Field("uuid",
LogicalTypes.uuid().addToSchema(Schema.create(STRING)), null, null)));
+ String parquet = "message myrecord {\n" +
+ " required binary uuid (STRING);\n" +
+ "}\n";
+ Schema toAvro = Schema.createRecord("myrecord", null, null, false,
+ Arrays.asList(new Schema.Field("uuid", Schema.create(STRING), null,
null)));
+
+ testAvroToParquetConversion(fromAvro, parquet);
+ testParquetToAvroConversion(toAvro, parquet);
+
+ assertEquals(COMPATIBLE, checkReaderWriterCompatibility(fromAvro,
toAvro).getType());
+ }
+
+ @Test
+ public void testUUIDTypeWithParquetUUID() throws Exception {
+ Schema uuid = LogicalTypes.uuid().addToSchema(Schema.create(STRING));
+ Schema expected = Schema.createRecord("myrecord", null, null, false,
+ Arrays.asList(new Schema.Field("uuid", uuid, null, null)));
+
+
testRoundTripConversion(AvroTestUtil.conf(AvroWriteSupport.WRITE_PARQUET_UUID,
true),
+ expected,
+ "message myrecord {\n" +
+ " required fixed_len_byte_array(16) uuid (UUID);\n" +
+ "}\n");
+ }
+
public static Schema optional(Schema original) {
return Schema.createUnion(Lists.newArrayList(
Schema.create(Schema.Type.NULL),
diff --git
a/parquet-avro/src/test/java/org/apache/parquet/avro/TestGenericLogicalTypes.java
b/parquet-avro/src/test/java/org/apache/parquet/avro/TestGenericLogicalTypes.java
index eb2ef08..4ae4ed6 100644
---
a/parquet-avro/src/test/java/org/apache/parquet/avro/TestGenericLogicalTypes.java
+++
b/parquet-avro/src/test/java/org/apache/parquet/avro/TestGenericLogicalTypes.java
@@ -26,6 +26,7 @@ import org.apache.avro.Schema;
import org.apache.avro.generic.GenericData;
import org.apache.avro.generic.GenericRecord;
import org.apache.avro.util.Utf8;
+import org.apache.hadoop.conf.Configuration;
import org.junit.Assert;
import org.junit.BeforeClass;
import org.junit.Rule;
@@ -41,6 +42,7 @@ import java.util.List;
import java.util.UUID;
import static org.apache.avro.Schema.Type.STRING;
+import static org.apache.parquet.avro.AvroTestUtil.conf;
import static org.apache.parquet.avro.AvroTestUtil.field;
import static org.apache.parquet.avro.AvroTestUtil.instance;
import static org.apache.parquet.avro.AvroTestUtil.optionalField;
@@ -60,7 +62,6 @@ public class TestGenericLogicalTypes {
public static final BigDecimal D1 = new BigDecimal("-34.34");
public static final BigDecimal D2 = new BigDecimal("117230.00");
-
@BeforeClass
public static void addDecimalAndUUID() {
GENERIC.addLogicalTypeConversion(new Conversions.DecimalConversion());
@@ -93,6 +94,25 @@ public class TestGenericLogicalTypes {
}
@Test
+ public void testReadUUIDWithParquetUUID() throws IOException {
+ Schema uuidSchema = record("R",
+ field("uuid", LogicalTypes.uuid().addToSchema(Schema.create(STRING))));
+ GenericRecord u1 = instance(uuidSchema, "uuid", UUID.randomUUID());
+ GenericRecord u2 = instance(uuidSchema, "uuid", UUID.randomUUID());
+ File test = write(conf(AvroWriteSupport.WRITE_PARQUET_UUID, true),
uuidSchema, u1, u2);
+
+ Assert.assertEquals("Should read UUID objects",
+ Arrays.asList(u1, u2), read(GENERIC, uuidSchema, test));
+
+ GenericRecord s1 = instance(uuidSchema, "uuid", u1.get("uuid").toString());
+ GenericRecord s2 = instance(uuidSchema, "uuid", u2.get("uuid").toString());
+
+ Assert.assertEquals("Should read UUID as Strings",
+ Arrays.asList(s1, s2), read(GenericData.get(), uuidSchema, test));
+
+ }
+
+ @Test
public void testWriteUUIDReadStringSchema() throws IOException {
Schema uuidSchema = record("R",
field("uuid", LogicalTypes.uuid().addToSchema(Schema.create(STRING))));
@@ -130,13 +150,13 @@ public class TestGenericLogicalTypes {
Schema nullableUuidSchema = record("R",
optionalField("uuid",
LogicalTypes.uuid().addToSchema(Schema.create(STRING))));
GenericRecord u1 = instance(nullableUuidSchema, "uuid", UUID.randomUUID());
- GenericRecord u2 = instance(nullableUuidSchema, "uuid", UUID.randomUUID());
+ GenericRecord u2 = instance(nullableUuidSchema, "uuid", null);
Schema stringUuidSchema = Schema.create(STRING);
stringUuidSchema.addProp(GenericData.STRING_PROP, "String");
Schema nullableStringSchema = record("R", optionalField("uuid",
stringUuidSchema));
GenericRecord s1 = instance(nullableStringSchema, "uuid",
u1.get("uuid").toString());
- GenericRecord s2 = instance(nullableStringSchema, "uuid",
u2.get("uuid").toString());
+ GenericRecord s2 = instance(nullableStringSchema, "uuid", null);
File test = write(GENERIC, nullableUuidSchema, u1, u2);
Assert.assertEquals("Should read UUIDs as Strings",
@@ -144,6 +164,21 @@ public class TestGenericLogicalTypes {
}
@Test
+ public void testWriteNullableUUIDWithParuqetUUID() throws IOException {
+ Schema nullableUuidSchema = record("R",
+ optionalField("uuid",
LogicalTypes.uuid().addToSchema(Schema.create(STRING))));
+ GenericRecord u1 = instance(nullableUuidSchema, "uuid", UUID.randomUUID());
+ GenericRecord u2 = instance(nullableUuidSchema, "uuid", null);
+
+ GenericRecord s1 = instance(nullableUuidSchema, "uuid",
u1.get("uuid").toString());
+ GenericRecord s2 = instance(nullableUuidSchema, "uuid", null);
+
+ File test = write(GENERIC, nullableUuidSchema, u1, u2);
+ Assert.assertEquals("Should read UUIDs as Strings",
+ Arrays.asList(s1, s2), read(GenericData.get(), nullableUuidSchema,
test));
+ }
+
+ @Test
public void testReadDecimalFixed() throws IOException {
Schema fixedSchema = Schema.createFixed("aFixed", null, null, 4);
Schema fixedRecord = record("R", field("dec", fixedSchema));
@@ -246,8 +281,16 @@ public class TestGenericLogicalTypes {
return write(GenericData.get(), schema, data);
}
+ private <D> File write(Configuration conf, Schema schema, D... data) throws
IOException {
+ return write(conf, GenericData.get(), schema, data);
+ }
+
private <D> File write(GenericData model, Schema schema, D... data) throws
IOException {
return AvroTestUtil.write(temp, model, schema, data);
}
+ private <D> File write(Configuration conf, GenericData model, Schema schema,
D... data) throws IOException {
+ return AvroTestUtil.write(temp, conf, model, schema, data);
+ }
+
}
diff --git
a/parquet-avro/src/test/java/org/apache/parquet/avro/TestReflectLogicalTypes.java
b/parquet-avro/src/test/java/org/apache/parquet/avro/TestReflectLogicalTypes.java
index 142191f..31661c0 100644
---
a/parquet-avro/src/test/java/org/apache/parquet/avro/TestReflectLogicalTypes.java
+++
b/parquet-avro/src/test/java/org/apache/parquet/avro/TestReflectLogicalTypes.java
@@ -30,6 +30,7 @@ import org.apache.avro.generic.IndexedRecord;
import org.apache.avro.reflect.AvroSchema;
import org.apache.avro.reflect.ReflectData;
import org.apache.avro.specific.SpecificData;
+import org.apache.hadoop.conf.Configuration;
import org.junit.Assert;
import org.junit.BeforeClass;
import org.junit.Rule;
@@ -41,8 +42,10 @@ import java.math.BigDecimal;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
+import java.util.Objects;
import java.util.UUID;
+import static org.apache.parquet.avro.AvroTestUtil.conf;
import static org.apache.parquet.avro.AvroTestUtil.read;
/**
@@ -358,6 +361,43 @@ public class TestReflectLogicalTypes {
}
@Test
+ public void testReadUUIDWithParquetUUID() throws IOException {
+ Schema uuidSchema = SchemaBuilder.record(RecordWithUUID.class.getName())
+ .fields().requiredString("uuid").endRecord();
+ LogicalTypes.uuid().addToSchema(uuidSchema.getField("uuid").schema());
+
+ UUID u1 = UUID.randomUUID();
+ UUID u2 = UUID.randomUUID();
+
+ RecordWithStringUUID r1 = new RecordWithStringUUID();
+ r1.uuid = u1.toString();
+ RecordWithStringUUID r2 = new RecordWithStringUUID();
+ r2.uuid = u2.toString();
+
+ List<RecordWithUUID> expected = Arrays.asList(
+ new RecordWithUUID(), new RecordWithUUID());
+ expected.get(0).uuid = u1;
+ expected.get(1).uuid = u2;
+
+ File test = write(
+ AvroTestUtil.conf(AvroWriteSupport.WRITE_PARQUET_UUID, true),
+ uuidSchema, r1, r2);
+
+ Assert.assertEquals("Should convert Strings to UUIDs",
+ expected, read(REFLECT, uuidSchema, test));
+
+ // verify that the field's type overrides the logical type
+ Schema uuidStringSchema = SchemaBuilder
+ .record(RecordWithStringUUID.class.getName())
+ .fields().requiredString("uuid").endRecord();
+
LogicalTypes.uuid().addToSchema(uuidStringSchema.getField("uuid").schema());
+
+ Assert.assertEquals("Should not convert to UUID if accessor is String",
+ Arrays.asList(r1, r2),
+ read(REFLECT, uuidStringSchema, test));
+ }
+
+ @Test
public void testWriteUUID() throws IOException {
Schema uuidSchema = SchemaBuilder.record(RecordWithUUID.class.getName())
.fields().requiredString("uuid").endRecord();
@@ -394,6 +434,41 @@ public class TestReflectLogicalTypes {
}
@Test
+ public void testWriteUUIDWithParuetUUID() throws IOException {
+ Schema uuidSchema = SchemaBuilder.record(RecordWithUUID.class.getName())
+ .fields().requiredString("uuid").endRecord();
+ LogicalTypes.uuid().addToSchema(uuidSchema.getField("uuid").schema());
+
+ UUID u1 = UUID.randomUUID();
+ UUID u2 = UUID.randomUUID();
+
+ RecordWithUUID r1 = new RecordWithUUID();
+ r1.uuid = u1;
+ RecordWithUUID r2 = new RecordWithUUID();
+ r2.uuid = u2;
+
+ List<RecordWithStringUUID> expected = Arrays.asList(
+ new RecordWithStringUUID(), new RecordWithStringUUID());
+ expected.get(0).uuid = u1.toString();
+ expected.get(1).uuid = u2.toString();
+
+ File test = write(
+ AvroTestUtil.conf(AvroWriteSupport.WRITE_PARQUET_UUID, true),
+ REFLECT, uuidSchema, r1, r2);
+
+ Assert.assertEquals("Should read UUID objects",
+ Arrays.asList(r1, r2),
+ read(REFLECT, uuidSchema, test));
+
+ Schema uuidStringSchema =
SchemaBuilder.record(RecordWithStringUUID.class.getName())
+ .fields().requiredString("uuid").endRecord();
+
LogicalTypes.uuid().addToSchema(uuidStringSchema.getField("uuid").schema());
+ Assert.assertEquals("Should read uuid as Strings",
+ expected,
+ read(ReflectData.get(), uuidStringSchema, test));
+ }
+
+ @Test
public void testWriteNullableUUID() throws IOException {
Schema nullableUuidSchema =
SchemaBuilder.record(RecordWithUUID.class.getName())
.fields().optionalString("uuid").endRecord();
@@ -426,6 +501,45 @@ public class TestReflectLogicalTypes {
}
@Test
+ public void testWriteNullableUUIDWithParquetUUID() throws IOException {
+ Schema nullableUuidSchema =
SchemaBuilder.record(RecordWithUUID.class.getName())
+ .fields().optionalString("uuid").endRecord();
+ LogicalTypes.uuid().addToSchema(
+ nullableUuidSchema.getField("uuid").schema().getTypes().get(1));
+
+ UUID u1 = UUID.randomUUID();
+ UUID u2 = null;
+
+ RecordWithUUID r1 = new RecordWithUUID();
+ r1.uuid = u1;
+ RecordWithUUID r2 = new RecordWithUUID();
+ r2.uuid = u2;
+
+ List<RecordWithStringUUID> expected = Arrays.asList(
+ new RecordWithStringUUID(), new RecordWithStringUUID());
+ expected.get(0).uuid = u1.toString();
+ expected.get(1).uuid = null;
+
+ File test = write(
+ AvroTestUtil.conf(AvroWriteSupport.WRITE_PARQUET_UUID, true),
+ REFLECT, nullableUuidSchema, r1, r2);
+
+ Assert.assertEquals("Should read uuid as UUID objects",
+ Arrays.asList(r1, r2),
+ read(REFLECT, nullableUuidSchema, test));
+
+ Schema nullableUuidStringSchema = SchemaBuilder
+ .record(RecordWithStringUUID.class.getName())
+ .fields().optionalString("uuid").endRecord();
+ LogicalTypes.uuid().addToSchema(
+ nullableUuidStringSchema.getField("uuid").schema().getTypes().get(1));
+
+ Assert.assertEquals("Should read uuid as String without UUID conversion",
+ expected,
+ read(REFLECT, nullableUuidStringSchema, test));
+ }
+
+ @Test
public void testWriteUUIDMissingLogicalType() throws IOException {
Schema uuidSchema = SchemaBuilder.record(RecordWithUUID.class.getName())
.fields().requiredString("uuid").endRecord();
@@ -499,6 +613,42 @@ public class TestReflectLogicalTypes {
}
@Test
+ public void testReadUUIDGenericRecordWithParquetUUID() throws IOException {
+ Schema uuidSchema = SchemaBuilder.record("RecordWithUUID")
+ .fields().requiredString("uuid").endRecord();
+ LogicalTypes.uuid().addToSchema(uuidSchema.getField("uuid").schema());
+
+ UUID u1 = UUID.randomUUID();
+ UUID u2 = UUID.randomUUID();
+
+ RecordWithStringUUID r1 = new RecordWithStringUUID();
+ r1.uuid = u1.toString();
+ RecordWithStringUUID r2 = new RecordWithStringUUID();
+ r2.uuid = u2.toString();
+
+ List<GenericData.Record> expected = Arrays.asList(
+ new GenericData.Record(uuidSchema), new
GenericData.Record(uuidSchema));
+ expected.get(0).put("uuid", u1);
+ expected.get(1).put("uuid", u2);
+
+ File test = write(
+ AvroTestUtil.conf(AvroWriteSupport.WRITE_PARQUET_UUID, true),
+ uuidSchema, r1, r2);
+
+ Assert.assertEquals("Should convert Strings to UUIDs",
+ expected, read(REFLECT, uuidSchema, test));
+
+ Schema uuidStringSchema = SchemaBuilder
+ .record(RecordWithStringUUID.class.getName())
+ .fields().requiredString("uuid").endRecord();
+
LogicalTypes.uuid().addToSchema(uuidStringSchema.getField("uuid").schema());
+
+ Assert.assertEquals("Should not convert to UUID if accessor is String",
+ Arrays.asList(r1, r2),
+ read(REFLECT, uuidStringSchema, test));
+ }
+
+ @Test
public void testReadUUIDArray() throws IOException {
Schema uuidArraySchema =
SchemaBuilder.record(RecordWithUUIDArray.class.getName())
.fields()
@@ -524,6 +674,31 @@ public class TestReflectLogicalTypes {
}
@Test
+ public void testReadUUIDArrayWithParquetUUID() throws IOException {
+ Schema uuidArraySchema =
SchemaBuilder.record(RecordWithUUIDArray.class.getName())
+ .fields()
+ .name("uuids").type().array().items().stringType().noDefault()
+ .endRecord();
+ LogicalTypes.uuid().addToSchema(
+ uuidArraySchema.getField("uuids").schema().getElementType());
+
+ UUID u1 = UUID.randomUUID();
+ UUID u2 = UUID.randomUUID();
+
+ GenericRecord r = new GenericData.Record(uuidArraySchema);
+ r.put("uuids", Arrays.asList(u1.toString(), u2.toString()));
+
+ RecordWithUUIDArray expected = new RecordWithUUIDArray();
+ expected.uuids = new UUID[] {u1, u2};
+
+ File test = write(AvroTestUtil.conf(AvroWriteSupport.WRITE_PARQUET_UUID,
true), uuidArraySchema, r);
+
+ Assert.assertEquals("Should convert Strings to UUIDs",
+ expected,
+ read(REFLECT, uuidArraySchema, test).get(0));
+ }
+
+ @Test
public void testWriteUUIDArray() throws IOException {
Schema uuidArraySchema =
SchemaBuilder.record(RecordWithUUIDArray.class.getName())
.fields()
@@ -559,6 +734,43 @@ public class TestReflectLogicalTypes {
}
@Test
+ public void testWriteUUIDArrayWithParquetUUID() throws IOException {
+ Schema uuidArraySchema =
SchemaBuilder.record(RecordWithUUIDArray.class.getName())
+ .fields()
+ .name("uuids").type().array().items().stringType().noDefault()
+ .endRecord();
+ LogicalTypes.uuid().addToSchema(
+ uuidArraySchema.getField("uuids").schema().getElementType());
+
+ Schema stringArraySchema = SchemaBuilder.record("RecordWithUUIDArray")
+ .fields()
+ .name("uuids").type().array().items().stringType().noDefault()
+ .endRecord();
+ LogicalTypes.uuid().addToSchema(
+ stringArraySchema.getField("uuids").schema().getElementType());
+ stringArraySchema.getField("uuids").schema()
+ .addProp(SpecificData.CLASS_PROP, List.class.getName());
+
+ UUID u1 = UUID.randomUUID();
+ UUID u2 = UUID.randomUUID();
+
+ GenericRecord expected = new GenericData.Record(stringArraySchema);
+ List<String> uuids = new ArrayList<String>();
+ uuids.add(u1.toString());
+ uuids.add(u2.toString());
+ expected.put("uuids", uuids);
+
+ RecordWithUUIDArray r = new RecordWithUUIDArray();
+ r.uuids = new UUID[] {u1, u2};
+
+ File test = write(AvroTestUtil.conf(AvroWriteSupport.WRITE_PARQUET_UUID,
true), REFLECT, uuidArraySchema, r);
+
+ Assert.assertEquals("Should read UUIDs as Strings",
+ expected,
+ read(ReflectData.get(), stringArraySchema, test).get(0));
+ }
+
+ @Test
public void testReadUUIDList() throws IOException {
Schema uuidListSchema =
SchemaBuilder.record(RecordWithUUIDList.class.getName())
.fields()
@@ -585,6 +797,32 @@ public class TestReflectLogicalTypes {
}
@Test
+ public void testReadUUIDListWithParquetUUID() throws IOException {
+ Schema uuidListSchema =
SchemaBuilder.record(RecordWithUUIDList.class.getName())
+ .fields()
+ .name("uuids").type().array().items().stringType().noDefault()
+ .endRecord();
+ uuidListSchema.getField("uuids").schema().addProp(
+ SpecificData.CLASS_PROP, List.class.getName());
+ LogicalTypes.uuid().addToSchema(
+ uuidListSchema.getField("uuids").schema().getElementType());
+
+ UUID u1 = UUID.randomUUID();
+ UUID u2 = UUID.randomUUID();
+
+ GenericRecord r = new GenericData.Record(uuidListSchema);
+ r.put("uuids", Arrays.asList(u1.toString(), u2.toString()));
+
+ RecordWithUUIDList expected = new RecordWithUUIDList();
+ expected.uuids = Arrays.asList(u1, u2);
+
+ File test = write(AvroTestUtil.conf(AvroWriteSupport.WRITE_PARQUET_UUID,
true), uuidListSchema, r);
+
+ Assert.assertEquals("Should convert Strings to UUIDs",
+ expected, read(REFLECT, uuidListSchema, test).get(0));
+ }
+
+ @Test
public void testWriteUUIDList() throws IOException {
Schema uuidListSchema =
SchemaBuilder.record(RecordWithUUIDList.class.getName())
.fields()
@@ -618,14 +856,61 @@ public class TestReflectLogicalTypes {
read(REFLECT, stringArraySchema, test).get(0));
}
+ @Test
+ public void testWriteUUIDListWithParquetUUID() throws IOException {
+ Schema uuidListSchema =
SchemaBuilder.record(RecordWithUUIDList.class.getName())
+ .fields()
+ .name("uuids").type().array().items().stringType().noDefault()
+ .endRecord();
+ uuidListSchema.getField("uuids").schema().addProp(
+ SpecificData.CLASS_PROP, List.class.getName());
+ LogicalTypes.uuid().addToSchema(
+ uuidListSchema.getField("uuids").schema().getElementType());
+
+ Schema reflectSchema = SchemaBuilder.record("RecordWithUUIDArray")
+ .fields()
+ .name("uuids").type().array().items().stringType().noDefault()
+ .endRecord();
+ reflectSchema.getField("uuids").schema()
+ .addProp(SpecificData.CLASS_PROP, List.class.getName());
+ LogicalTypes.uuid().addToSchema(
+ reflectSchema.getField("uuids").schema().getElementType());
+
+ UUID u1 = UUID.randomUUID();
+ UUID u2 = UUID.randomUUID();
+
+ GenericRecord expected = new GenericData.Record(reflectSchema);
+ expected.put("uuids", Arrays.asList(u1, u2));
+
+ RecordWithUUIDList r = new RecordWithUUIDList();
+ r.uuids = Arrays.asList(u1, u2);
+
+ File test = write(AvroTestUtil.conf(AvroWriteSupport.WRITE_PARQUET_UUID,
true), REFLECT, uuidListSchema, r);
+
+ Assert.assertEquals("Should read UUID objects",
+ expected,
+ read(REFLECT, reflectSchema, test).get(0));
+ }
+
+ @SuppressWarnings("unchecked")
private <D> File write(Schema schema, D... data) throws IOException {
return write(ReflectData.get(), schema, data);
}
@SuppressWarnings("unchecked")
+ private <D> File write(Configuration conf, Schema schema, D... data) throws
IOException {
+ return write(conf, ReflectData.get(), schema, data);
+ }
+
+ @SuppressWarnings("unchecked")
private <D> File write(GenericData model, Schema schema, D... data) throws
IOException {
return AvroTestUtil.write(temp, model, schema, data);
}
+
+ @SuppressWarnings("unchecked")
+ private <D> File write(Configuration conf, GenericData model, Schema schema,
D... data) throws IOException {
+ return AvroTestUtil.write(temp, conf, model, schema, data);
+ }
}
class RecordWithUUID {
@@ -645,7 +930,7 @@ class RecordWithUUID {
return false;
}
RecordWithUUID that = (RecordWithUUID) obj;
- return this.uuid.equals(that.uuid);
+ return Objects.equals(this.uuid, that.uuid);
}
}
@@ -666,7 +951,7 @@ class RecordWithStringUUID {
return false;
}
RecordWithStringUUID that = (RecordWithStringUUID) obj;
- return this.uuid.equals(that.uuid);
+ return Objects.equals(this.uuid, that.uuid);
}
}
diff --git a/parquet-column/pom.xml b/parquet-column/pom.xml
index 9f7479a6..44fc1a2 100644
--- a/parquet-column/pom.xml
+++ b/parquet-column/pom.xml
@@ -43,6 +43,13 @@
</dependency>
<dependency>
<groupId>org.apache.parquet</groupId>
+ <artifactId>parquet-common</artifactId>
+ <version>${project.version}</version>
+ <type>test-jar</type>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.parquet</groupId>
<artifactId>parquet-encoding</artifactId>
<version>${project.version}</version>
</dependency>
diff --git
a/parquet-column/src/main/java/org/apache/parquet/schema/LogicalTypeAnnotation.java
b/parquet-column/src/main/java/org/apache/parquet/schema/LogicalTypeAnnotation.java
index 5f61ed6..495b6dc 100644
---
a/parquet-column/src/main/java/org/apache/parquet/schema/LogicalTypeAnnotation.java
+++
b/parquet-column/src/main/java/org/apache/parquet/schema/LogicalTypeAnnotation.java
@@ -130,6 +130,12 @@ public abstract class LogicalTypeAnnotation {
return bsonType();
}
},
+ UUID {
+ @Override
+ protected LogicalTypeAnnotation fromString(List<String> params) {
+ return uuidType();
+ }
+ },
INTERVAL {
@Override
protected LogicalTypeAnnotation fromString(List<String> params) {
@@ -286,6 +292,10 @@ public abstract class LogicalTypeAnnotation {
return BsonLogicalTypeAnnotation.INSTANCE;
}
+ public static UUIDLogicalTypeAnnotation uuidType() {
+ return UUIDLogicalTypeAnnotation.INSTANCE;
+ }
+
public static class StringLogicalTypeAnnotation extends
LogicalTypeAnnotation {
private static final StringLogicalTypeAnnotation INSTANCE = new
StringLogicalTypeAnnotation();
@@ -861,6 +871,36 @@ public abstract class LogicalTypeAnnotation {
}
}
+ public static class UUIDLogicalTypeAnnotation extends LogicalTypeAnnotation {
+ private static final UUIDLogicalTypeAnnotation INSTANCE = new
UUIDLogicalTypeAnnotation();
+ public static final int BYTES = 16;
+
+ private UUIDLogicalTypeAnnotation() {
+ }
+
+ @Override
+ @InterfaceAudience.Private
+ public OriginalType toOriginalType() {
+ // No OriginalType for UUID
+ return null;
+ }
+
+ @Override
+ public <T> Optional<T> accept(LogicalTypeAnnotationVisitor<T>
logicalTypeAnnotationVisitor) {
+ return logicalTypeAnnotationVisitor.visit(this);
+ }
+
+ @Override
+ LogicalTypeToken getType() {
+ return LogicalTypeToken.UUID;
+ }
+
+ @Override
+ PrimitiveStringifier valueStringifier(PrimitiveType primitiveType) {
+ return PrimitiveStringifier.UUID_STRINGIFIER;
+ }
+ }
+
// This logical type annotation is implemented to support backward
compatibility with ConvertedType.
// The new logical type representation in parquet-format doesn't have any
interval type,
// thus this annotation is mapped to UNKNOWN.
@@ -1009,6 +1049,10 @@ public abstract class LogicalTypeAnnotation {
return empty();
}
+ default Optional<T> visit(UUIDLogicalTypeAnnotation uuidLogicalType) {
+ return empty();
+ }
+
default Optional<T> visit(IntervalLogicalTypeAnnotation
intervalLogicalType) {
return empty();
}
diff --git
a/parquet-column/src/main/java/org/apache/parquet/schema/PrimitiveStringifier.java
b/parquet-column/src/main/java/org/apache/parquet/schema/PrimitiveStringifier.java
index 4705ad9..29c6235 100644
---
a/parquet-column/src/main/java/org/apache/parquet/schema/PrimitiveStringifier.java
+++
b/parquet-column/src/main/java/org/apache/parquet/schema/PrimitiveStringifier.java
@@ -32,6 +32,7 @@ import java.nio.ByteOrder;
import java.time.Instant;
import java.time.ZoneOffset;
import java.time.format.DateTimeFormatter;
+import java.util.UUID;
import java.util.concurrent.TimeUnit;
import javax.naming.OperationNotSupportedException;
@@ -421,4 +422,30 @@ public abstract class PrimitiveStringifier {
}
};
}
+
+ static final PrimitiveStringifier UUID_STRINGIFIER = new
PrimitiveStringifier("UUID_STRINGIFIER") {
+ private final char[] digit = "0123456789abcdef".toCharArray();
+ @Override
+ public String stringify(Binary value) {
+ byte[] bytes = value.getBytesUnsafe();
+ StringBuilder builder = new StringBuilder(36);
+ appendHex(bytes, 0, 4, builder);
+ builder.append('-');
+ appendHex(bytes, 4, 2, builder);
+ builder.append('-');
+ appendHex(bytes, 6, 2, builder);
+ builder.append('-');
+ appendHex(bytes, 8, 2, builder);
+ builder.append('-');
+ appendHex(bytes, 10, 6, builder);
+ return builder.toString();
+ }
+
+ private void appendHex(byte[] array, int offset, int length, StringBuilder
builder) {
+ for (int i = offset, n = offset + length; i < n; ++i) {
+ int value = array[i] & 0xff;
+ builder.append(digit[value >>> 4]).append(digit[value & 0x0f]);
+ }
+ }
+ };
}
diff --git
a/parquet-column/src/main/java/org/apache/parquet/schema/PrimitiveType.java
b/parquet-column/src/main/java/org/apache/parquet/schema/PrimitiveType.java
index 21502d8..026f418 100644
--- a/parquet-column/src/main/java/org/apache/parquet/schema/PrimitiveType.java
+++ b/parquet-column/src/main/java/org/apache/parquet/schema/PrimitiveType.java
@@ -32,6 +32,8 @@ import org.apache.parquet.io.api.Binary;
import org.apache.parquet.io.api.PrimitiveConverter;
import org.apache.parquet.io.api.RecordConsumer;
import org.apache.parquet.schema.ColumnOrder.ColumnOrderName;
+import
org.apache.parquet.schema.LogicalTypeAnnotation.LogicalTypeAnnotationVisitor;
+import
org.apache.parquet.schema.LogicalTypeAnnotation.UUIDLogicalTypeAnnotation;
import static java.util.Optional.empty;
import static java.util.Optional.of;
@@ -383,6 +385,11 @@ public final class PrimitiveType extends Type {
public Optional<PrimitiveComparator>
visit(LogicalTypeAnnotation.IntervalLogicalTypeAnnotation intervalLogicalType) {
return
of(PrimitiveComparator.UNSIGNED_LEXICOGRAPHICAL_BINARY_COMPARATOR);
}
+
+ @Override
+ public Optional<PrimitiveComparator> visit(UUIDLogicalTypeAnnotation
uuidLogicalType) {
+ return
of(PrimitiveComparator.UNSIGNED_LEXICOGRAPHICAL_BINARY_COMPARATOR);
+ }
}).orElseThrow(() -> new ShouldNeverHappenException(
"No comparator logic implemented for FIXED_LEN_BYTE_ARRAY logical
type: " + logicalType));
}
diff --git a/parquet-column/src/main/java/org/apache/parquet/schema/Types.java
b/parquet-column/src/main/java/org/apache/parquet/schema/Types.java
index c62010a..05db655 100644
--- a/parquet-column/src/main/java/org/apache/parquet/schema/Types.java
+++ b/parquet-column/src/main/java/org/apache/parquet/schema/Types.java
@@ -445,20 +445,22 @@ public class Types {
logicalTypeAnnotation.accept(new
LogicalTypeAnnotation.LogicalTypeAnnotationVisitor<Boolean>() {
@Override
public Optional<Boolean>
visit(LogicalTypeAnnotation.StringLogicalTypeAnnotation stringLogicalType) {
- checkBinaryPrimitiveType(stringLogicalType);
- return Optional.of(true);
+ return checkBinaryPrimitiveType(stringLogicalType);
}
@Override
public Optional<Boolean>
visit(LogicalTypeAnnotation.JsonLogicalTypeAnnotation jsonLogicalType) {
- checkBinaryPrimitiveType(jsonLogicalType);
- return Optional.of(true);
+ return checkBinaryPrimitiveType(jsonLogicalType);
}
@Override
public Optional<Boolean>
visit(LogicalTypeAnnotation.BsonLogicalTypeAnnotation bsonLogicalType) {
- checkBinaryPrimitiveType(bsonLogicalType);
- return Optional.of(true);
+ return checkBinaryPrimitiveType(bsonLogicalType);
+ }
+
+ @Override
+ public Optional<Boolean>
visit(LogicalTypeAnnotation.UUIDLogicalTypeAnnotation uuidLogicalType) {
+ return
checkFixedPrimitiveType(LogicalTypeAnnotation.UUIDLogicalTypeAnnotation.BYTES,
uuidLogicalType);
}
@Override
@@ -495,8 +497,7 @@ public class Types {
@Override
public Optional<Boolean>
visit(LogicalTypeAnnotation.DateLogicalTypeAnnotation dateLogicalType) {
- checkInt32PrimitiveType(dateLogicalType);
- return Optional.of(true);
+ return checkInt32PrimitiveType(dateLogicalType);
}
@Override
@@ -536,41 +537,43 @@ public class Types {
@Override
public Optional<Boolean>
visit(LogicalTypeAnnotation.TimestampLogicalTypeAnnotation
timestampLogicalType) {
- checkInt64PrimitiveType(timestampLogicalType);
- return Optional.of(true);
+ return checkInt64PrimitiveType(timestampLogicalType);
}
@Override
public Optional<Boolean>
visit(LogicalTypeAnnotation.IntervalLogicalTypeAnnotation intervalLogicalType) {
- Preconditions.checkState(
- (primitiveType == PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY) &&
- (length == 12),
- "INTERVAL can only annotate FIXED_LEN_BYTE_ARRAY(12)");
- return Optional.of(true);
+ return checkFixedPrimitiveType(12, intervalLogicalType);
}
@Override
public Optional<Boolean>
visit(LogicalTypeAnnotation.EnumLogicalTypeAnnotation enumLogicalType) {
+ return checkBinaryPrimitiveType(enumLogicalType);
+ }
+
+ private Optional<Boolean> checkFixedPrimitiveType(int l,
LogicalTypeAnnotation logicalTypeAnnotation) {
Preconditions.checkState(
- primitiveType == PrimitiveTypeName.BINARY,
- "ENUM can only annotate binary fields");
+ primitiveType == PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY &&
length == l,
+ logicalTypeAnnotation.toString() + " can only annotate
FIXED_LEN_BYTE_ARRAY(" + l + ')');
return Optional.of(true);
}
- private void checkBinaryPrimitiveType(LogicalTypeAnnotation
logicalTypeAnnotation) {
+ private Optional<Boolean>
checkBinaryPrimitiveType(LogicalTypeAnnotation logicalTypeAnnotation) {
Preconditions.checkState(
primitiveType == PrimitiveTypeName.BINARY,
- logicalTypeAnnotation.toString() + " can only annotate binary
fields");
+ logicalTypeAnnotation.toString() + " can only annotate BINARY");
+ return Optional.of(true);
}
- private void checkInt32PrimitiveType(LogicalTypeAnnotation
logicalTypeAnnotation) {
+ private Optional<Boolean>
checkInt32PrimitiveType(LogicalTypeAnnotation logicalTypeAnnotation) {
Preconditions.checkState(primitiveType == PrimitiveTypeName.INT32,
logicalTypeAnnotation.toString() + " can only annotate INT32");
+ return Optional.of(true);
}
- private void checkInt64PrimitiveType(LogicalTypeAnnotation
logicalTypeAnnotation) {
+ private Optional<Boolean>
checkInt64PrimitiveType(LogicalTypeAnnotation logicalTypeAnnotation) {
Preconditions.checkState(primitiveType == PrimitiveTypeName.INT64,
logicalTypeAnnotation.toString() + " can only annotate INT64");
+ return Optional.of(true);
}
}).orElseThrow(() -> new IllegalStateException(logicalTypeAnnotation +
" can not be applied to a primitive type"));
}
diff --git
a/parquet-column/src/test/java/org/apache/parquet/schema/TestPrimitiveStringifier.java
b/parquet-column/src/test/java/org/apache/parquet/schema/TestPrimitiveStringifier.java
index b5de4f8..ea8fcd4 100644
---
a/parquet-column/src/test/java/org/apache/parquet/schema/TestPrimitiveStringifier.java
+++
b/parquet-column/src/test/java/org/apache/parquet/schema/TestPrimitiveStringifier.java
@@ -52,6 +52,7 @@ import java.util.Set;
import java.util.TimeZone;
import java.util.concurrent.TimeUnit;
+import org.apache.parquet.TestUtils;
import org.apache.parquet.io.api.Binary;
import org.junit.Test;
@@ -84,8 +85,7 @@ public class TestPrimitiveStringifier {
assertEquals("null", stringifier.stringify(null));
assertEquals("0x", stringifier.stringify(Binary.EMPTY));
- assertEquals("0x0123456789ABCDEF",
stringifier.stringify(Binary.fromConstantByteArray(
- new byte[] { 0x01, 0x23, 0x45, 0x67, (byte) 0x89, (byte) 0xAB, (byte)
0xCD, (byte) 0xEF })));
+ assertEquals("0x0123456789ABCDEF", stringifier.stringify(toBinary(0x01,
0x23, 0x45, 0x67, 0x89, 0xAB, 0xCD, 0xEF)));
}
@Test
@@ -309,6 +309,40 @@ public class TestPrimitiveStringifier {
checkThrowingUnsupportedException(stringifier, Integer.TYPE, Long.TYPE,
Binary.class);
}
+ @Test
+ public void testUUIDStringifier() {
+ PrimitiveStringifier stringifier = PrimitiveStringifier.UUID_STRINGIFIER;
+
+ assertEquals("00112233-4455-6677-8899-aabbccddeeff", stringifier.stringify(
+ toBinary(0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77, 0x88, 0x99,
0xaa, 0xbb, 0xcc, 0xdd, 0xee, 0xff)));
+ assertEquals("00000000-0000-0000-0000-000000000000", stringifier.stringify(
+ toBinary(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00)));
+ assertEquals("ffffffff-ffff-ffff-ffff-ffffffffffff", stringifier.stringify(
+ toBinary(0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff)));
+ assertEquals("0eb1497c-19b6-42bc-b028-b4b612bed141", stringifier.stringify(
+ toBinary(0x0e, 0xb1, 0x49, 0x7c, 0x19, 0xb6, 0x42, 0xbc, 0xb0, 0x28,
0xb4, 0xb6, 0x12, 0xbe, 0xd1, 0x41)));
+
+ // Check that the stringifier does not care about the length, it always
takes the first 16 bytes
+ assertEquals("87a09cca-3b1e-4a0a-9c77-591924c3b57b", stringifier.stringify(
+ toBinary(0x87, 0xa0, 0x9c, 0xca, 0x3b, 0x1e, 0x4a, 0x0a, 0x9c, 0x77,
0x59, 0x19, 0x24, 0xc3, 0xb5, 0x7b, 0x00,
+ 0x00, 0x00)));
+
+ // As there is no validation implemented, if the 16 bytes is not
available, the array will be over-indexed
+ TestUtils.assertThrows("Expected exception for over-indexing",
ArrayIndexOutOfBoundsException.class,
+ () -> stringifier.stringify(
+ toBinary(0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77, 0x88,
0x99, 0xaa, 0xbb, 0xcc, 0xdd, 0xee)));
+
+ checkThrowingUnsupportedException(stringifier, Binary.class);
+ }
+
+ private Binary toBinary(int...bytes) {
+ byte[] array = new byte[bytes.length];
+ for (int i = 0; i < array.length; ++i) {
+ array[i] = (byte) bytes[i];
+ }
+ return Binary.fromConstantByteArray(array);
+ }
+
private void checkThrowingUnsupportedException(PrimitiveStringifier
stringifier, Class<?>... excludes) {
Set<Class<?>> set = new HashSet<>(asList(excludes));
if (!set.contains(Integer.TYPE)) {
@@ -354,5 +388,4 @@ public class TestPrimitiveStringifier {
}
}
}
-
}
diff --git
a/parquet-column/src/test/java/org/apache/parquet/schema/TestTypeBuildersWithLogicalTypes.java
b/parquet-column/src/test/java/org/apache/parquet/schema/TestTypeBuildersWithLogicalTypes.java
index fe13e60..c631867 100644
---
a/parquet-column/src/test/java/org/apache/parquet/schema/TestTypeBuildersWithLogicalTypes.java
+++
b/parquet-column/src/test/java/org/apache/parquet/schema/TestTypeBuildersWithLogicalTypes.java
@@ -36,6 +36,7 @@ import static
org.apache.parquet.schema.LogicalTypeAnnotation.jsonType;
import static org.apache.parquet.schema.LogicalTypeAnnotation.stringType;
import static org.apache.parquet.schema.LogicalTypeAnnotation.timeType;
import static org.apache.parquet.schema.LogicalTypeAnnotation.timestampType;
+import static org.apache.parquet.schema.LogicalTypeAnnotation.uuidType;
import static org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName.BINARY;
import static
org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName.BOOLEAN;
import static org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName.DOUBLE;
@@ -45,6 +46,7 @@ import static
org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName.INT32;
import static org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName.INT64;
import static org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName.INT96;
import static org.apache.parquet.schema.Type.Repetition.REQUIRED;
+import static org.junit.Assert.assertEquals;
public class TestTypeBuildersWithLogicalTypes {
@Test
@@ -389,6 +391,18 @@ public class TestTypeBuildersWithLogicalTypes {
.precision(5).named("aDecimal");
}
+ @Test
+ public void testUUIDLogicalType() {
+ assertEquals(
+ "required fixed_len_byte_array(16) uuid_field (UUID)",
+
Types.required(FIXED_LEN_BYTE_ARRAY).length(16).as(uuidType()).named("uuid_field").toString());
+
+ assertThrows("Should fail with invalid length",
IllegalStateException.class,
+ () ->
Types.required(FIXED_LEN_BYTE_ARRAY).length(10).as(uuidType()).named("uuid_field").toString());
+ assertThrows("Should fail with invalid type", IllegalStateException.class,
+ () ->
Types.required(BINARY).as(uuidType()).named("uuid_field").toString());
+ }
+
/**
* A convenience method to avoid a large number of @Test(expected=...) tests
* @param message A String message to describe this assertion
diff --git
a/parquet-hadoop/src/main/java/org/apache/parquet/format/converter/ParquetMetadataConverter.java
b/parquet-hadoop/src/main/java/org/apache/parquet/format/converter/ParquetMetadataConverter.java
index 0c6c2df..e2a93eb 100644
---
a/parquet-hadoop/src/main/java/org/apache/parquet/format/converter/ParquetMetadataConverter.java
+++
b/parquet-hadoop/src/main/java/org/apache/parquet/format/converter/ParquetMetadataConverter.java
@@ -95,6 +95,7 @@ import org.apache.parquet.format.SchemaElement;
import org.apache.parquet.format.Statistics;
import org.apache.parquet.format.Type;
import org.apache.parquet.format.TypeDefinedOrder;
+import org.apache.parquet.format.UUIDType;
import org.apache.parquet.hadoop.metadata.BlockMetaData;
import org.apache.parquet.hadoop.metadata.ColumnChunkMetaData;
import org.apache.parquet.hadoop.metadata.CompressionCodecName;
@@ -107,6 +108,8 @@ import
org.apache.parquet.internal.hadoop.metadata.IndexReference;
import org.apache.parquet.io.ParquetDecodingException;
import org.apache.parquet.io.api.Binary;
import org.apache.parquet.schema.ColumnOrder.ColumnOrderName;
+import
org.apache.parquet.schema.LogicalTypeAnnotation.LogicalTypeAnnotationVisitor;
+import
org.apache.parquet.schema.LogicalTypeAnnotation.UUIDLogicalTypeAnnotation;
import org.apache.parquet.schema.GroupType;
import org.apache.parquet.schema.MessageType;
import org.apache.parquet.schema.OriginalType;
@@ -453,6 +456,11 @@ public class ParquetMetadataConverter {
}
@Override
+ public Optional<LogicalType> visit(UUIDLogicalTypeAnnotation
uuidLogicalType) {
+ return of(LogicalType.UUID(new UUIDType()));
+ }
+
+ @Override
public Optional<LogicalType>
visit(LogicalTypeAnnotation.IntervalLogicalTypeAnnotation intervalLogicalType) {
return of(LogicalType.UNKNOWN(new NullType()));
}
@@ -838,6 +846,11 @@ public class ParquetMetadataConverter {
}
@Override
+ public Optional<SortOrder> visit(UUIDLogicalTypeAnnotation
uuidLogicalType) {
+ return of(SortOrder.UNSIGNED);
+ }
+
+ @Override
public Optional<SortOrder>
visit(LogicalTypeAnnotation.JsonLogicalTypeAnnotation jsonLogicalType) {
return of(SortOrder.UNSIGNED);
}
@@ -1013,6 +1026,8 @@ public class ParquetMetadataConverter {
case TIMESTAMP:
TimestampType timestamp = type.getTIMESTAMP();
return LogicalTypeAnnotation.timestampType(timestamp.isAdjustedToUTC,
convertTimeUnit(timestamp.unit));
+ case UUID:
+ return LogicalTypeAnnotation.uuidType();
default:
throw new RuntimeException("Unknown logical type " + type);
}
diff --git
a/parquet-hadoop/src/test/java/org/apache/parquet/format/converter/TestParquetMetadataConverter.java
b/parquet-hadoop/src/test/java/org/apache/parquet/format/converter/TestParquetMetadataConverter.java
index e9051ea..c8f631d 100644
---
a/parquet-hadoop/src/test/java/org/apache/parquet/format/converter/TestParquetMetadataConverter.java
+++
b/parquet-hadoop/src/test/java/org/apache/parquet/format/converter/TestParquetMetadataConverter.java
@@ -34,6 +34,7 @@ import static
org.apache.parquet.schema.LogicalTypeAnnotation.mapType;
import static org.apache.parquet.schema.LogicalTypeAnnotation.stringType;
import static org.apache.parquet.schema.LogicalTypeAnnotation.timeType;
import static org.apache.parquet.schema.LogicalTypeAnnotation.timestampType;
+import static org.apache.parquet.schema.LogicalTypeAnnotation.uuidType;
import static org.apache.parquet.schema.MessageTypeParser.parseMessageType;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
@@ -358,6 +359,8 @@ public class TestParquetMetadataConverter {
assertEquals(ConvertedType.JSON,
parquetMetadataConverter.convertToConvertedType(jsonType()));
assertEquals(ConvertedType.BSON,
parquetMetadataConverter.convertToConvertedType(bsonType()));
+ assertNull(parquetMetadataConverter.convertToConvertedType(uuidType()));
+
assertEquals(ConvertedType.LIST,
parquetMetadataConverter.convertToConvertedType(listType()));
assertEquals(ConvertedType.MAP,
parquetMetadataConverter.convertToConvertedType(mapType()));
assertEquals(ConvertedType.MAP_KEY_VALUE,
parquetMetadataConverter.convertToConvertedType(LogicalTypeAnnotation.MapKeyValueTypeAnnotation.getInstance()));
diff --git
a/parquet-hadoop/src/test/java/org/apache/parquet/statistics/TestColumnIndexes.java
b/parquet-hadoop/src/test/java/org/apache/parquet/statistics/TestColumnIndexes.java
index aac8e43..b13f36f 100644
---
a/parquet-hadoop/src/test/java/org/apache/parquet/statistics/TestColumnIndexes.java
+++
b/parquet-hadoop/src/test/java/org/apache/parquet/statistics/TestColumnIndexes.java
@@ -28,6 +28,7 @@ import static
org.apache.parquet.schema.LogicalTypeAnnotation.jsonType;
import static org.apache.parquet.schema.LogicalTypeAnnotation.stringType;
import static org.apache.parquet.schema.LogicalTypeAnnotation.timeType;
import static org.apache.parquet.schema.LogicalTypeAnnotation.timestampType;
+import static org.apache.parquet.schema.LogicalTypeAnnotation.uuidType;
import static org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName.BINARY;
import static org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName.DOUBLE;
import static
org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY;
@@ -59,6 +60,7 @@ import org.apache.parquet.hadoop.ParquetWriter;
import org.apache.parquet.hadoop.example.ExampleParquetWriter;
import org.apache.parquet.hadoop.util.HadoopInputFile;
import org.apache.parquet.io.api.Binary;
+import org.apache.parquet.schema.LogicalTypeAnnotation;
import org.apache.parquet.schema.LogicalTypeAnnotation.TimeUnit;
import org.apache.parquet.schema.MessageType;
import org.apache.parquet.schema.OriginalType;
@@ -116,6 +118,7 @@ public class TestColumnIndexes {
Types.optional(INT64).as(timestampType(true,
TimeUnit.MILLIS)).named("timestamp-millis"),
Types.optional(INT64).as(timestampType(false,
TimeUnit.NANOS)).named("timestamp-nanos"),
Types.optional(FIXED_LEN_BYTE_ARRAY).length(12).as(OriginalType.INTERVAL).named("interval"),
+
Types.optional(FIXED_LEN_BYTE_ARRAY).length(16).as(uuidType()).named("uuid"),
Types.optional(BINARY).as(stringType()).named("always-null"));
private static List<Supplier<?>> buildGenerators(int recordCount, Random
random) {
@@ -169,6 +172,7 @@ public class TestColumnIndexes {
sortedOrRandom(new RandomValues.LongGenerator(random.nextLong()),
random, recordCount, fieldIndex++),
sortedOrRandom(new RandomValues.LongGenerator(random.nextLong()),
random, recordCount, fieldIndex++),
sortedOrRandom(new RandomValues.FixedGenerator(random.nextLong(), 12),
random, recordCount, fieldIndex++),
+ sortedOrRandom(new RandomValues.FixedGenerator(random.nextLong(), 16),
random, recordCount, fieldIndex++),
null);
}