This is an automated email from the ASF dual-hosted git repository.
uwe pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/parquet-mr.git
The following commit(s) were added to refs/heads/master by this push:
new af977ad PARQUET-1128: [Java] Upgrade the Apache Arrow version to
0.8.0 for SchemaConverter
af977ad is described below
commit af977adc43a071a09652fea4ce3deba2d5b8d171
Author: Masayuki Takahashi <[email protected]>
AuthorDate: Sat Apr 21 14:58:35 2018 +0100
PARQUET-1128: [Java] Upgrade the Apache Arrow version to 0.8.0 for
SchemaConverter
When I converted parquet(1.9.1-SNAPSHOT) schema to arrow(0.4.0) with
SchemaConverter, this exception raised.
```
java.lang.NoClassDefFoundError:
org/apache/arrow/vector/types/pojo/ArrowType$Struct_
at
net.wrap_trap.parquet_arrow.ParquetToArrowConverter.convertToArrow(ParquetToArrowConverter.java:67)
at
net.wrap_trap.parquet_arrow.ParquetToArrowConverter.convertToArrow(ParquetToArrowConverter.java:40)
at
net.wrap_trap.parquet_arrow.ParquetToArrowConverterTest.parquetToArrowConverterTest(ParquetToArrowConverterTest.java:27)
```
This reason is that SchemaConverter refer to Apache Arrow 0.1.0.
I upgrade the Apache Arrow version to 0.8.0(latest) for SchemaConverter.
Author: Masayuki Takahashi <[email protected]>
Closes #443 from masayuki038/PARQUET-1128 and squashes the following
commits:
8ba47813 [Masayuki Takahashi] PARQUET-1128: [Java] Upgrade the Apache Arrow
version to 0.8.0 for SchemaConverter
b80d793a [Masayuki Takahashi] PARQUET-1128: [Java] Upgrade the Apache Arrow
version to 0.8.0 for SchemaConverter
---
parquet-arrow/pom.xml | 2 +-
.../parquet/arrow/schema/SchemaConverter.java | 227 +++++++-------
.../parquet/arrow/schema/TestSchemaConverter.java | 344 +++++++++++----------
3 files changed, 299 insertions(+), 274 deletions(-)
diff --git a/parquet-arrow/pom.xml b/parquet-arrow/pom.xml
index de31e16..232167e 100644
--- a/parquet-arrow/pom.xml
+++ b/parquet-arrow/pom.xml
@@ -33,7 +33,7 @@
<url>https://parquet.apache.org</url>
<properties>
- <arrow.version>0.1.0</arrow.version>
+ <arrow.version>0.8.0</arrow.version>
</properties>
<dependencies>
diff --git
a/parquet-arrow/src/main/java/org/apache/parquet/arrow/schema/SchemaConverter.java
b/parquet-arrow/src/main/java/org/apache/parquet/arrow/schema/SchemaConverter.java
index cf4ec0d..1d69c45 100644
---
a/parquet-arrow/src/main/java/org/apache/parquet/arrow/schema/SchemaConverter.java
+++
b/parquet-arrow/src/main/java/org/apache/parquet/arrow/schema/SchemaConverter.java
@@ -47,8 +47,8 @@ import static
org.apache.parquet.schema.Type.Repetition.REQUIRED;
import java.util.ArrayList;
import java.util.List;
-import org.apache.arrow.flatbuf.Precision;
-import org.apache.arrow.flatbuf.TimeUnit;
+import org.apache.arrow.vector.types.DateUnit;
+import org.apache.arrow.vector.types.FloatingPointPrecision;
import org.apache.arrow.vector.types.pojo.ArrowType;
import org.apache.arrow.vector.types.pojo.ArrowType.ArrowTypeVisitor;
import org.apache.arrow.vector.types.pojo.ArrowType.Binary;
@@ -59,7 +59,7 @@ import
org.apache.arrow.vector.types.pojo.ArrowType.FloatingPoint;
import org.apache.arrow.vector.types.pojo.ArrowType.Int;
import org.apache.arrow.vector.types.pojo.ArrowType.Interval;
import org.apache.arrow.vector.types.pojo.ArrowType.Null;
-import org.apache.arrow.vector.types.pojo.ArrowType.Struct_;
+import org.apache.arrow.vector.types.pojo.ArrowType.Struct;
import org.apache.arrow.vector.types.pojo.ArrowType.Time;
import org.apache.arrow.vector.types.pojo.ArrowType.Timestamp;
import org.apache.arrow.vector.types.pojo.ArrowType.Union;
@@ -141,13 +141,22 @@ public class SchemaConverter {
}
@Override
- public TypeMapping visit(Struct_ type) {
+ public TypeMapping visit(Struct type) {
List<TypeMapping> parquetTypes = fromArrow(children);
return new StructTypeMapping(field, addToBuilder(parquetTypes,
Types.buildGroup(OPTIONAL)).named(fieldName), parquetTypes);
}
@Override
public TypeMapping
visit(org.apache.arrow.vector.types.pojo.ArrowType.List type) {
+ return createListTypeMapping();
+ }
+
+ @Override
+ public TypeMapping
visit(org.apache.arrow.vector.types.pojo.ArrowType.FixedSizeList type) {
+ return createListTypeMapping();
+ }
+
+ private ListTypeMapping createListTypeMapping() {
if (children.size() != 1) {
throw new IllegalArgumentException("list fields must have exactly
one child: " + field);
}
@@ -167,31 +176,31 @@ public class SchemaConverter {
public TypeMapping visit(Int type) {
boolean signed = type.getIsSigned();
switch (type.getBitWidth()) {
- case 8:
- return primitive(INT32, signed ? INT_8 : UINT_8);
- case 16:
- return primitive(INT32, signed ? INT_16 : UINT_16);
- case 32:
- return primitive(INT32, signed ? INT_32 : UINT_32);
- case 64:
- return primitive(INT64, signed ? INT_64 : UINT_64);
- default:
- throw new IllegalArgumentException("Illegal int type: " + field);
+ case 8:
+ return primitive(INT32, signed ? INT_8 : UINT_8);
+ case 16:
+ return primitive(INT32, signed ? INT_16 : UINT_16);
+ case 32:
+ return primitive(INT32, signed ? INT_32 : UINT_32);
+ case 64:
+ return primitive(INT64, signed ? INT_64 : UINT_64);
+ default:
+ throw new IllegalArgumentException("Illegal int type: " + field);
}
}
@Override
public TypeMapping visit(FloatingPoint type) {
switch (type.getPrecision()) {
- case Precision.HALF:
- // TODO(PARQUET-757): original type HalfFloat
- return primitive(FLOAT);
- case Precision.SINGLE:
- return primitive(FLOAT);
- case Precision.DOUBLE:
- return primitive(DOUBLE);
- default:
- throw new IllegalArgumentException("Illegal float type: " + field);
+ case HALF:
+ // TODO(PARQUET-757): original type HalfFloat
+ return primitive(FLOAT);
+ case SINGLE:
+ return primitive(FLOAT);
+ case DOUBLE:
+ return primitive(DOUBLE);
+ default:
+ throw new IllegalArgumentException("Illegal float type: " + field);
}
}
@@ -336,7 +345,7 @@ public class SchemaConverter {
OriginalType ot = type.getOriginalType();
if (ot == null) {
List<TypeMapping> typeMappings = fromParquet(type.getFields());
- Field arrowField = new Field(name, type.isRepetition(OPTIONAL), new
Struct_(), fields(typeMappings));
+ Field arrowField = new Field(name, type.isRepetition(OPTIONAL), new
Struct(), fields(typeMappings));
return new StructTypeMapping(arrowField, type, typeMappings);
} else {
switch (ot) {
@@ -366,12 +375,12 @@ public class SchemaConverter {
@Override
public TypeMapping convertFLOAT(PrimitiveTypeName primitiveTypeName)
throws RuntimeException {
- return field(new ArrowType.FloatingPoint(Precision.SINGLE));
+ return field(new
ArrowType.FloatingPoint(FloatingPointPrecision.SINGLE));
}
@Override
public TypeMapping convertDOUBLE(PrimitiveTypeName primitiveTypeName)
throws RuntimeException {
- return field(new ArrowType.FloatingPoint(Precision.DOUBLE));
+ return field(new
ArrowType.FloatingPoint(FloatingPointPrecision.DOUBLE));
}
@Override
@@ -381,41 +390,41 @@ public class SchemaConverter {
return integer(32, true);
}
switch (ot) {
- case INT_8:
- return integer(8, true);
- case INT_16:
- return integer(16, true);
- case INT_32:
- return integer(32, true);
- case UINT_8:
- return integer(8, false);
- case UINT_16:
- return integer(16, false);
- case UINT_32:
- return integer(32, false);
- case DECIMAL:
- return decimal(type.getDecimalMetadata());
- case DATE:
- return field(new ArrowType.Date());
- case TIMESTAMP_MICROS:
- return field(new ArrowType.Timestamp(TimeUnit.MICROSECOND));
- case TIMESTAMP_MILLIS:
- return field(new ArrowType.Timestamp(TimeUnit.MILLISECOND));
- case TIME_MILLIS:
- return field(new ArrowType.Time());
- default:
- case TIME_MICROS:
- case INT_64:
- case UINT_64:
- case UTF8:
- case ENUM:
- case BSON:
- case INTERVAL:
- case JSON:
- case LIST:
- case MAP:
- case MAP_KEY_VALUE:
- throw new IllegalArgumentException("illegal type " + type);
+ case INT_8:
+ return integer(8, true);
+ case INT_16:
+ return integer(16, true);
+ case INT_32:
+ return integer(32, true);
+ case UINT_8:
+ return integer(8, false);
+ case UINT_16:
+ return integer(16, false);
+ case UINT_32:
+ return integer(32, false);
+ case DECIMAL:
+ return decimal(type.getDecimalMetadata());
+ case DATE:
+ return field(new ArrowType.Date(DateUnit.DAY));
+ case TIMESTAMP_MICROS:
+ return field(new
ArrowType.Timestamp(org.apache.arrow.vector.types.TimeUnit.MICROSECOND, "UTC"));
+ case TIMESTAMP_MILLIS:
+ return field(new
ArrowType.Timestamp(org.apache.arrow.vector.types.TimeUnit.MILLISECOND, "UTC"));
+ case TIME_MILLIS:
+ return field(new
ArrowType.Time(org.apache.arrow.vector.types.TimeUnit.MILLISECOND, 32));
+ default:
+ case TIME_MICROS:
+ case INT_64:
+ case UINT_64:
+ case UTF8:
+ case ENUM:
+ case BSON:
+ case INTERVAL:
+ case JSON:
+ case LIST:
+ case MAP:
+ case MAP_KEY_VALUE:
+ throw new IllegalArgumentException("illegal type " + type);
}
}
@@ -426,43 +435,42 @@ public class SchemaConverter {
return integer(64, true);
}
switch (ot) {
- case INT_8:
- return integer(8, true);
- case INT_16:
- return integer(16, true);
- case INT_32:
- return integer(32, true);
- case INT_64:
- return integer(64, true);
- case UINT_8:
- return integer(8, false);
- case UINT_16:
- return integer(16, false);
- case UINT_32:
- return integer(32, false);
- case UINT_64:
- return integer(64, false);
- case DECIMAL:
- return decimal(type.getDecimalMetadata());
- case DATE:
- return field(new ArrowType.Date());
- case TIMESTAMP_MICROS:
- return field(new ArrowType.Timestamp(TimeUnit.MICROSECOND));
- case TIMESTAMP_MILLIS:
- return field(new ArrowType.Timestamp(TimeUnit.MILLISECOND));
- case TIME_MILLIS:
- return field(new ArrowType.Time());
- default:
- case TIME_MICROS:
- case UTF8:
- case ENUM:
- case BSON:
- case INTERVAL:
- case JSON:
- case LIST:
- case MAP:
- case MAP_KEY_VALUE:
- throw new IllegalArgumentException("illegal type " + type);
+ case INT_8:
+ return integer(8, true);
+ case INT_16:
+ return integer(16, true);
+ case INT_32:
+ return integer(32, true);
+ case INT_64:
+ return integer(64, true);
+ case UINT_8:
+ return integer(8, false);
+ case UINT_16:
+ return integer(16, false);
+ case UINT_32:
+ return integer(32, false);
+ case UINT_64:
+ return integer(64, false);
+ case DECIMAL:
+ return decimal(type.getDecimalMetadata());
+ case DATE:
+ return field(new ArrowType.Date(DateUnit.DAY));
+ case TIMESTAMP_MICROS:
+ return field(new
ArrowType.Timestamp(org.apache.arrow.vector.types.TimeUnit.MICROSECOND, "UTC"));
+ case TIMESTAMP_MILLIS:
+ return field(new
ArrowType.Timestamp(org.apache.arrow.vector.types.TimeUnit.MILLISECOND, "UTC"));
+ default:
+ case TIME_MICROS:
+ case UTF8:
+ case ENUM:
+ case BSON:
+ case INTERVAL:
+ case JSON:
+ case LIST:
+ case MAP:
+ case MAP_KEY_VALUE:
+ case TIME_MILLIS:
+ throw new IllegalArgumentException("illegal type " + type);
}
}
@@ -489,12 +497,12 @@ public class SchemaConverter {
return field(new ArrowType.Binary());
}
switch (ot) {
- case UTF8:
- return field(new ArrowType.Utf8());
- case DECIMAL:
- return decimal(type.getDecimalMetadata());
- default:
- throw new IllegalArgumentException("illegal type " + type);
+ case UTF8:
+ return field(new ArrowType.Utf8());
+ case DECIMAL:
+ return decimal(type.getDecimalMetadata());
+ default:
+ throw new IllegalArgumentException("illegal type " + type);
}
}
@@ -545,7 +553,7 @@ public class SchemaConverter {
}
@Override
- public TypeMapping visit(Struct_ type) {
+ public TypeMapping visit(Struct type) {
if (parquetField.isPrimitive()) {
throw new IllegalArgumentException("Parquet type not a group: " +
parquetField);
}
@@ -555,6 +563,15 @@ public class SchemaConverter {
@Override
public TypeMapping
visit(org.apache.arrow.vector.types.pojo.ArrowType.List type) {
+ return createListTypeMapping(type);
+ }
+
+ @Override
+ public TypeMapping
visit(org.apache.arrow.vector.types.pojo.ArrowType.FixedSizeList type) {
+ return createListTypeMapping(type);
+ }
+
+ private TypeMapping createListTypeMapping(ArrowType.ComplexType type) {
if (arrowField.getChildren().size() != 1) {
throw new IllegalArgumentException("Invalid list type: " + type);
}
diff --git
a/parquet-arrow/src/test/java/org/apache/parquet/arrow/schema/TestSchemaConverter.java
b/parquet-arrow/src/test/java/org/apache/parquet/arrow/schema/TestSchemaConverter.java
index ec2b807..654f773 100644
---
a/parquet-arrow/src/test/java/org/apache/parquet/arrow/schema/TestSchemaConverter.java
+++
b/parquet-arrow/src/test/java/org/apache/parquet/arrow/schema/TestSchemaConverter.java
@@ -43,11 +43,11 @@ import static
org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName.INT64;
import java.io.IOException;
import java.util.List;
+import org.apache.arrow.vector.types.IntervalUnit;
-import org.apache.arrow.flatbuf.IntervalUnit;
-import org.apache.arrow.flatbuf.Precision;
-import org.apache.arrow.flatbuf.TimeUnit;
-import org.apache.arrow.flatbuf.UnionMode;
+import org.apache.arrow.vector.types.UnionMode;
+import org.apache.arrow.vector.types.DateUnit;
+import org.apache.arrow.vector.types.FloatingPointPrecision;
import org.apache.arrow.vector.types.pojo.ArrowType;
import org.apache.arrow.vector.types.pojo.Field;
import org.apache.arrow.vector.types.pojo.Schema;
@@ -79,159 +79,167 @@ public class TestSchemaConverter {
}
private final Schema complexArrowSchema = new Schema(asList(
- field("a", false, new ArrowType.Int(8, true)),
- field("b", new ArrowType.Struct_(),
- field("c", new ArrowType.Int(16, true)),
- field("d", new ArrowType.Utf8())),
- field("e", new ArrowType.List(), field(null, new ArrowType.Date())),
- field("f", new ArrowType.FloatingPoint(Precision.SINGLE)),
- field("g", new ArrowType.Timestamp(TimeUnit.MILLISECOND)),
- field("h", new ArrowType.Interval(IntervalUnit.DAY_TIME))
- ));
+ field("a", false, new ArrowType.Int(8, true)),
+ field("b", new ArrowType.Struct(),
+ field("c", new ArrowType.Int(16, true)),
+ field("d", new ArrowType.Utf8())),
+ field("e", new ArrowType.List(), field(null, new
ArrowType.Date(DateUnit.DAY))),
+ field("f", new ArrowType.FixedSizeList(1), field(null, new
ArrowType.Date(DateUnit.DAY))),
+ field("g", new ArrowType.FloatingPoint(FloatingPointPrecision.SINGLE)),
+ field("h", new
ArrowType.Timestamp(org.apache.arrow.vector.types.TimeUnit.MILLISECOND, "UTC")),
+ field("i", new ArrowType.Interval(IntervalUnit.DAY_TIME))
+ ));
private final MessageType complexParquetSchema = Types.buildMessage()
- .addField(Types.optional(INT32).as(INT_8).named("a"))
- .addField(Types.optionalGroup()
- .addField(Types.optional(INT32).as(INT_16).named("c"))
- .addField(Types.optional(BINARY).as(UTF8).named("d"))
- .named("b"))
- .addField(Types.optionalList().
- setElementType(Types.optional(INT32).as(DATE).named("element"))
- .named("e"))
- .addField(Types.optional(FLOAT).named("f"))
- .addField(Types.optional(INT64).as(TIMESTAMP_MILLIS).named("g"))
-
.addField(Types.optional(FIXED_LEN_BYTE_ARRAY).length(12).as(INTERVAL).named("h"))
- .named("root");
+ .addField(Types.optional(INT32).as(INT_8).named("a"))
+ .addField(Types.optionalGroup()
+ .addField(Types.optional(INT32).as(INT_16).named("c"))
+ .addField(Types.optional(BINARY).as(UTF8).named("d"))
+ .named("b"))
+ .addField(Types.optionalList().
+ setElementType(Types.optional(INT32).as(DATE).named("element"))
+ .named("e"))
+ .addField(Types.optionalList().
+ setElementType(Types.optional(INT32).as(DATE).named("element"))
+ .named("f"))
+ .addField(Types.optional(FLOAT).named("g"))
+ .addField(Types.optional(INT64).as(TIMESTAMP_MILLIS).named("h"))
+
.addField(Types.optional(FIXED_LEN_BYTE_ARRAY).length(12).as(INTERVAL).named("i"))
+ .named("root");
private final Schema allTypesArrowSchema = new Schema(asList(
- field("a", false, new ArrowType.Null()),
- field("b", new ArrowType.Struct_(), field("ba", new ArrowType.Null())),
- field("c", new ArrowType.List(), field("ca", new ArrowType.Null())),
- field("d", new ArrowType.Union(UnionMode.Sparse, new int[] {1, 2, 3}),
field("da", new ArrowType.Null())),
- field("e", new ArrowType.Int(8, true)),
- field("e1", new ArrowType.Int(16, true)),
- field("e2", new ArrowType.Int(32, true)),
- field("e3", new ArrowType.Int(64, true)),
- field("e4", new ArrowType.Int(8, false)),
- field("e5", new ArrowType.Int(16, false)),
- field("e6", new ArrowType.Int(32, false)),
- field("e7", new ArrowType.Int(64, false)),
- field("f", new ArrowType.FloatingPoint(Precision.SINGLE)),
- field("f1", new ArrowType.FloatingPoint(Precision.DOUBLE)),
- field("g", new ArrowType.Utf8()),
- field("h", new ArrowType.Binary()),
- field("i", new ArrowType.Bool()),
- field("j", new ArrowType.Decimal(5, 5)),
- field("j1", new ArrowType.Decimal(15, 5)),
- field("j2", new ArrowType.Decimal(25, 5)),
- field("k", new ArrowType.Date()),
- field("l", new ArrowType.Time()),
- field("m", new ArrowType.Timestamp(TimeUnit.MILLISECOND)),
- field("n", new ArrowType.Interval(IntervalUnit.DAY_TIME)),
- field("n1", new ArrowType.Interval(IntervalUnit.YEAR_MONTH))
- ));
+ field("a", false, new ArrowType.Null()),
+ field("b", new ArrowType.Struct(), field("ba", new ArrowType.Null())),
+ field("c", new ArrowType.List(), field("ca", new ArrowType.Null())),
+ field("d", new ArrowType.FixedSizeList(1), field("da", new
ArrowType.Null())),
+ field("e", new ArrowType.Union(UnionMode.Sparse, new int[] {1, 2, 3}),
field("ea", new ArrowType.Null())),
+ field("f", new ArrowType.Int(8, true)),
+ field("f1", new ArrowType.Int(16, true)),
+ field("f2", new ArrowType.Int(32, true)),
+ field("f3", new ArrowType.Int(64, true)),
+ field("f4", new ArrowType.Int(8, false)),
+ field("f5", new ArrowType.Int(16, false)),
+ field("f6", new ArrowType.Int(32, false)),
+ field("f7", new ArrowType.Int(64, false)),
+ field("g", new ArrowType.FloatingPoint(FloatingPointPrecision.SINGLE)),
+ field("g1", new ArrowType.FloatingPoint(FloatingPointPrecision.DOUBLE)),
+ field("h", new ArrowType.Utf8()),
+ field("i", new ArrowType.Binary()),
+ field("j", new ArrowType.Bool()),
+ field("k", new ArrowType.Decimal(5, 5)),
+ field("k1", new ArrowType.Decimal(15, 5)),
+ field("k2", new ArrowType.Decimal(25, 5)),
+ field("l", new ArrowType.Date(DateUnit.DAY)),
+ field("m", new
ArrowType.Time(org.apache.arrow.vector.types.TimeUnit.SECOND, 32)),
+ field("n", new
ArrowType.Timestamp(org.apache.arrow.vector.types.TimeUnit.MILLISECOND, "UTC")),
+ field("o", new ArrowType.Interval(IntervalUnit.DAY_TIME)),
+ field("o1", new ArrowType.Interval(IntervalUnit.YEAR_MONTH))
+ ));
private final MessageType allTypesParquetSchema = Types.buildMessage()
- .addField(Types.optional(BINARY).named("a"))
- .addField(Types.optionalGroup()
- .addField(Types.optional(BINARY).named("ba"))
- .named("b"))
- .addField(Types.optionalList().
- setElementType(Types.optional(BINARY).named("element"))
- .named("c"))
- .addField(Types.optionalGroup()
- .addField(Types.optional(BINARY).named("da"))
- .named("d"))
- .addField(Types.optional(INT32).as(INT_8).named("e"))
- .addField(Types.optional(INT32).as(INT_16).named("e1"))
- .addField(Types.optional(INT32).as(INT_32).named("e2"))
- .addField(Types.optional(INT64).as(INT_64).named("e3"))
- .addField(Types.optional(INT32).as(UINT_8).named("e4"))
- .addField(Types.optional(INT32).as(UINT_16).named("e5"))
- .addField(Types.optional(INT32).as(UINT_32).named("e6"))
- .addField(Types.optional(INT64).as(UINT_64).named("e7"))
- .addField(Types.optional(FLOAT).named("f"))
- .addField(Types.optional(DOUBLE).named("f1"))
- .addField(Types.optional(BINARY).as(UTF8).named("g"))
- .addField(Types.optional(BINARY).named("h"))
- .addField(Types.optional(BOOLEAN).named("i"))
-
.addField(Types.optional(INT32).as(DECIMAL).precision(5).scale(5).named("j"))
-
.addField(Types.optional(INT64).as(DECIMAL).precision(15).scale(5).named("j1"))
-
.addField(Types.optional(BINARY).as(DECIMAL).precision(25).scale(5).named("j2"))
- .addField(Types.optional(INT32).as(DATE).named("k"))
- .addField(Types.optional(INT32).as(TIME_MILLIS).named("l"))
- .addField(Types.optional(INT64).as(TIMESTAMP_MILLIS).named("m"))
-
.addField(Types.optional(FIXED_LEN_BYTE_ARRAY).length(12).as(INTERVAL).named("n"))
-
.addField(Types.optional(FIXED_LEN_BYTE_ARRAY).length(12).as(INTERVAL).named("n1"))
- .named("root");
+ .addField(Types.optional(BINARY).named("a"))
+ .addField(Types.optionalGroup()
+ .addField(Types.optional(BINARY).named("ba"))
+ .named("b"))
+ .addField(Types.optionalList().
+ setElementType(Types.optional(BINARY).named("element"))
+ .named("c"))
+ .addField(Types.optionalList().
+ setElementType(Types.optional(BINARY).named("element"))
+ .named("d"))
+ .addField(Types.optionalGroup()
+ .addField(Types.optional(BINARY).named("ea"))
+ .named("e"))
+ .addField(Types.optional(INT32).as(INT_8).named("f"))
+ .addField(Types.optional(INT32).as(INT_16).named("f1"))
+ .addField(Types.optional(INT32).as(INT_32).named("f2"))
+ .addField(Types.optional(INT64).as(INT_64).named("f3"))
+ .addField(Types.optional(INT32).as(UINT_8).named("f4"))
+ .addField(Types.optional(INT32).as(UINT_16).named("f5"))
+ .addField(Types.optional(INT32).as(UINT_32).named("f6"))
+ .addField(Types.optional(INT64).as(UINT_64).named("f7"))
+ .addField(Types.optional(FLOAT).named("g"))
+ .addField(Types.optional(DOUBLE).named("g1"))
+ .addField(Types.optional(BINARY).as(UTF8).named("h"))
+ .addField(Types.optional(BINARY).named("i"))
+ .addField(Types.optional(BOOLEAN).named("j"))
+
.addField(Types.optional(INT32).as(DECIMAL).precision(5).scale(5).named("k"))
+
.addField(Types.optional(INT64).as(DECIMAL).precision(15).scale(5).named("k1"))
+
.addField(Types.optional(BINARY).as(DECIMAL).precision(25).scale(5).named("k2"))
+ .addField(Types.optional(INT32).as(DATE).named("l"))
+ .addField(Types.optional(INT32).as(TIME_MILLIS).named("m"))
+ .addField(Types.optional(INT64).as(TIMESTAMP_MILLIS).named("n"))
+
.addField(Types.optional(FIXED_LEN_BYTE_ARRAY).length(12).as(INTERVAL).named("o"))
+
.addField(Types.optional(FIXED_LEN_BYTE_ARRAY).length(12).as(INTERVAL).named("o1"))
+ .named("root");
private final Schema supportedTypesArrowSchema = new Schema(asList(
- field("b", new ArrowType.Struct_(), field("ba", new ArrowType.Binary())),
- field("c", new ArrowType.List(), field(null, new ArrowType.Binary())),
- field("e", new ArrowType.Int(8, true)),
- field("e1", new ArrowType.Int(16, true)),
- field("e2", new ArrowType.Int(32, true)),
- field("e3", new ArrowType.Int(64, true)),
- field("e4", new ArrowType.Int(8, false)),
- field("e5", new ArrowType.Int(16, false)),
- field("e6", new ArrowType.Int(32, false)),
- field("e7", new ArrowType.Int(64, false)),
- field("f", new ArrowType.FloatingPoint(Precision.SINGLE)),
- field("f1", new ArrowType.FloatingPoint(Precision.DOUBLE)),
- field("g", new ArrowType.Utf8()),
- field("h", new ArrowType.Binary()),
- field("i", new ArrowType.Bool()),
- field("j", new ArrowType.Decimal(5, 5)),
- field("j1", new ArrowType.Decimal(15, 5)),
- field("j2", new ArrowType.Decimal(25, 5)),
- field("k", new ArrowType.Date()),
- field("l", new ArrowType.Time()),
- field("m", new ArrowType.Timestamp(TimeUnit.MILLISECOND))
- ));
+ field("b", new ArrowType.Struct(), field("ba", new ArrowType.Binary())),
+ field("c", new ArrowType.List(), field(null, new ArrowType.Binary())),
+ field("e", new ArrowType.Int(8, true)),
+ field("e1", new ArrowType.Int(16, true)),
+ field("e2", new ArrowType.Int(32, true)),
+ field("e3", new ArrowType.Int(64, true)),
+ field("e4", new ArrowType.Int(8, false)),
+ field("e5", new ArrowType.Int(16, false)),
+ field("e6", new ArrowType.Int(32, false)),
+ field("e7", new ArrowType.Int(64, false)),
+ field("f", new ArrowType.FloatingPoint(FloatingPointPrecision.SINGLE)),
+ field("f1", new ArrowType.FloatingPoint(FloatingPointPrecision.DOUBLE)),
+ field("g", new ArrowType.Utf8()),
+ field("h", new ArrowType.Binary()),
+ field("i", new ArrowType.Bool()),
+ field("j", new ArrowType.Decimal(5, 5)),
+ field("j1", new ArrowType.Decimal(15, 5)),
+ field("j2", new ArrowType.Decimal(25, 5)),
+ field("k", new ArrowType.Date(DateUnit.DAY)),
+ field("l", new
ArrowType.Time(org.apache.arrow.vector.types.TimeUnit.MILLISECOND, 32)),
+ field("m", new
ArrowType.Timestamp(org.apache.arrow.vector.types.TimeUnit.MILLISECOND, "UTC"))
+ ));
private final MessageType supportedTypesParquetSchema = Types.buildMessage()
- .addField(Types.optionalGroup()
- .addField(Types.optional(BINARY).named("ba"))
- .named("b"))
- .addField(Types.optionalList().
- setElementType(Types.optional(BINARY).named("element"))
- .named("c"))
- .addField(Types.optional(INT32).as(INT_8).named("e"))
- .addField(Types.optional(INT32).as(INT_16).named("e1"))
- .addField(Types.optional(INT32).as(INT_32).named("e2"))
- .addField(Types.optional(INT64).as(INT_64).named("e3"))
- .addField(Types.optional(INT32).as(UINT_8).named("e4"))
- .addField(Types.optional(INT32).as(UINT_16).named("e5"))
- .addField(Types.optional(INT32).as(UINT_32).named("e6"))
- .addField(Types.optional(INT64).as(UINT_64).named("e7"))
- .addField(Types.optional(FLOAT).named("f"))
- .addField(Types.optional(DOUBLE).named("f1"))
- .addField(Types.optional(BINARY).as(UTF8).named("g"))
- .addField(Types.optional(BINARY).named("h"))
- .addField(Types.optional(BOOLEAN).named("i"))
-
.addField(Types.optional(INT32).as(DECIMAL).precision(5).scale(5).named("j"))
-
.addField(Types.optional(INT64).as(DECIMAL).precision(15).scale(5).named("j1"))
-
.addField(Types.optional(BINARY).as(DECIMAL).precision(25).scale(5).named("j2"))
- .addField(Types.optional(INT32).as(DATE).named("k"))
- .addField(Types.optional(INT32).as(TIME_MILLIS).named("l"))
- .addField(Types.optional(INT64).as(TIMESTAMP_MILLIS).named("m"))
- .named("root");
+ .addField(Types.optionalGroup()
+ .addField(Types.optional(BINARY).named("ba"))
+ .named("b"))
+ .addField(Types.optionalList().
+ setElementType(Types.optional(BINARY).named("element"))
+ .named("c"))
+ .addField(Types.optional(INT32).as(INT_8).named("e"))
+ .addField(Types.optional(INT32).as(INT_16).named("e1"))
+ .addField(Types.optional(INT32).as(INT_32).named("e2"))
+ .addField(Types.optional(INT64).as(INT_64).named("e3"))
+ .addField(Types.optional(INT32).as(UINT_8).named("e4"))
+ .addField(Types.optional(INT32).as(UINT_16).named("e5"))
+ .addField(Types.optional(INT32).as(UINT_32).named("e6"))
+ .addField(Types.optional(INT64).as(UINT_64).named("e7"))
+ .addField(Types.optional(FLOAT).named("f"))
+ .addField(Types.optional(DOUBLE).named("f1"))
+ .addField(Types.optional(BINARY).as(UTF8).named("g"))
+ .addField(Types.optional(BINARY).named("h"))
+ .addField(Types.optional(BOOLEAN).named("i"))
+
.addField(Types.optional(INT32).as(DECIMAL).precision(5).scale(5).named("j"))
+
.addField(Types.optional(INT64).as(DECIMAL).precision(15).scale(5).named("j1"))
+
.addField(Types.optional(BINARY).as(DECIMAL).precision(25).scale(5).named("j2"))
+ .addField(Types.optional(INT32).as(DATE).named("k"))
+ .addField(Types.optional(INT32).as(TIME_MILLIS).named("l"))
+ .addField(Types.optional(INT64).as(TIMESTAMP_MILLIS).named("m"))
+ .named("root");
private final Schema paperArrowSchema = new Schema(asList(
- field("DocId", false, new ArrowType.Int(64, true)),
- field("Links", new ArrowType.Struct_(),
- field("Backward", false, new ArrowType.List(), field(null, false,
new ArrowType.Int(64, true))),
- field("Forward", false, new ArrowType.List(), field(null, false, new
ArrowType.Int(64, true)))
- ),
- field("Name", false, new ArrowType.List(),
- field(null, false, new ArrowType.Struct_(),
- field("Language", false, new ArrowType.List(),
- field(null, false, new ArrowType.Struct_(),
- field("Code", false, new ArrowType.Binary()),
- field("Country", new ArrowType.Binary())
- )
- ),
- field("Url", new ArrowType.Binary())
+ field("DocId", false, new ArrowType.Int(64, true)),
+ field("Links", new ArrowType.Struct(),
+ field("Backward", false, new ArrowType.List(), field(null, false, new
ArrowType.Int(64, true))),
+ field("Forward", false, new ArrowType.List(), field(null, false, new
ArrowType.Int(64, true)))
+ ),
+ field("Name", false, new ArrowType.List(),
+ field(null, false, new ArrowType.Struct(),
+ field("Language", false, new ArrowType.List(),
+ field(null, false, new ArrowType.Struct(),
+ field("Code", false, new ArrowType.Binary()),
+ field("Country", new ArrowType.Binary())
)
+ ),
+ field("Url", new ArrowType.Binary())
)
+ )
));
private SchemaConverter converter = new SchemaConverter();
@@ -286,7 +294,7 @@ public class TestSchemaConverter {
@Test
public void testAllMap() throws IOException {
SchemaMapping map = converter.map(allTypesArrowSchema,
allTypesParquetSchema);
- Assert.assertEquals("p, s<p>, l<p>, u<p>, p, p, p, p, p, p, p, p, p, p, p,
p, p, p, p, p, p, p, p, p, p", toSummaryString(map));
+ Assert.assertEquals("p, s<p>, l<p>, l<p>, u<p>, p, p, p, p, p, p, p, p, p,
p, p, p, p, p, p, p, p, p, p, p, p", toSummaryString(map));
}
private String toSummaryString(SchemaMapping map) {
@@ -301,32 +309,32 @@ public class TestSchemaConverter {
sb.append(", ");
}
sb.append(
- typeMapping.accept(new TypeMappingVisitor<String>() {
- @Override
- public String visit(PrimitiveTypeMapping primitiveTypeMapping) {
- return "p";
- }
+ typeMapping.accept(new TypeMappingVisitor<String>() {
+ @Override
+ public String visit(PrimitiveTypeMapping primitiveTypeMapping) {
+ return "p";
+ }
- @Override
- public String visit(StructTypeMapping structTypeMapping) {
- return "s";
- }
+ @Override
+ public String visit(StructTypeMapping structTypeMapping) {
+ return "s";
+ }
- @Override
- public String visit(UnionTypeMapping unionTypeMapping) {
- return "u";
- }
+ @Override
+ public String visit(UnionTypeMapping unionTypeMapping) {
+ return "u";
+ }
- @Override
- public String visit(ListTypeMapping listTypeMapping) {
- return "l";
- }
+ @Override
+ public String visit(ListTypeMapping listTypeMapping) {
+ return "l";
+ }
- @Override
- public String visit(RepeatedTypeMapping repeatedTypeMapping) {
- return "r";
- }
- })
+ @Override
+ public String visit(RepeatedTypeMapping repeatedTypeMapping) {
+ return "r";
+ }
+ })
);
if (typeMapping.getChildren() != null &&
!typeMapping.getChildren().isEmpty()) {
sb.append("<").append(toSummaryString(typeMapping.getChildren())).append(">");
--
To stop receiving notification emails like this one, please contact
[email protected].