[
https://issues.apache.org/jira/browse/PARQUET-1128?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16446810#comment-16446810
]
ASF GitHub Bot commented on PARQUET-1128:
-----------------------------------------
xhochy closed pull request #443: PARQUET-1128: [Java] Upgrade the Apache Arrow
version to 0.8.0 for SchemaConverter
URL: https://github.com/apache/parquet-mr/pull/443
This is a PR merged from a forked repository.
As GitHub hides the original diff on merge, it is displayed below for
the sake of provenance:
As this is a foreign pull request (from a fork), the diff is supplied
below (as it won't show otherwise due to GitHub magic):
diff --git a/parquet-arrow/pom.xml b/parquet-arrow/pom.xml
index 96981f8c1..2e969b18f 100644
--- a/parquet-arrow/pom.xml
+++ b/parquet-arrow/pom.xml
@@ -33,7 +33,7 @@
<url>https://parquet.apache.org</url>
<properties>
- <arrow.version>0.1.0</arrow.version>
+ <arrow.version>0.8.0</arrow.version>
</properties>
<dependencies>
diff --git
a/parquet-arrow/src/main/java/org/apache/parquet/arrow/schema/SchemaConverter.java
b/parquet-arrow/src/main/java/org/apache/parquet/arrow/schema/SchemaConverter.java
index 773f7c8c0..7d5c28fbc 100644
---
a/parquet-arrow/src/main/java/org/apache/parquet/arrow/schema/SchemaConverter.java
+++
b/parquet-arrow/src/main/java/org/apache/parquet/arrow/schema/SchemaConverter.java
@@ -47,8 +47,8 @@
import java.util.ArrayList;
import java.util.List;
-import org.apache.arrow.flatbuf.Precision;
-import org.apache.arrow.flatbuf.TimeUnit;
+import org.apache.arrow.vector.types.DateUnit;
+import org.apache.arrow.vector.types.FloatingPointPrecision;
import org.apache.arrow.vector.types.pojo.ArrowType;
import org.apache.arrow.vector.types.pojo.ArrowType.ArrowTypeVisitor;
import org.apache.arrow.vector.types.pojo.ArrowType.Binary;
@@ -59,7 +59,7 @@
import org.apache.arrow.vector.types.pojo.ArrowType.Int;
import org.apache.arrow.vector.types.pojo.ArrowType.Interval;
import org.apache.arrow.vector.types.pojo.ArrowType.Null;
-import org.apache.arrow.vector.types.pojo.ArrowType.Struct_;
+import org.apache.arrow.vector.types.pojo.ArrowType.Struct;
import org.apache.arrow.vector.types.pojo.ArrowType.Time;
import org.apache.arrow.vector.types.pojo.ArrowType.Timestamp;
import org.apache.arrow.vector.types.pojo.ArrowType.Union;
@@ -141,13 +141,22 @@ public TypeMapping visit(Null type) {
}
@Override
- public TypeMapping visit(Struct_ type) {
+ public TypeMapping visit(Struct type) {
List<TypeMapping> parquetTypes = fromArrow(children);
return new StructTypeMapping(field, addToBuilder(parquetTypes,
Types.buildGroup(OPTIONAL)).named(fieldName), parquetTypes);
}
@Override
public TypeMapping
visit(org.apache.arrow.vector.types.pojo.ArrowType.List type) {
+ return createListTypeMapping();
+ }
+
+ @Override
+ public TypeMapping
visit(org.apache.arrow.vector.types.pojo.ArrowType.FixedSizeList type) {
+ return createListTypeMapping();
+ }
+
+ private ListTypeMapping createListTypeMapping() {
if (children.size() != 1) {
throw new IllegalArgumentException("list fields must have exactly
one child: " + field);
}
@@ -167,31 +176,31 @@ public TypeMapping visit(Union type) {
public TypeMapping visit(Int type) {
boolean signed = type.getIsSigned();
switch (type.getBitWidth()) {
- case 8:
- return primitive(INT32, signed ? INT_8 : UINT_8);
- case 16:
- return primitive(INT32, signed ? INT_16 : UINT_16);
- case 32:
- return primitive(INT32, signed ? INT_32 : UINT_32);
- case 64:
- return primitive(INT64, signed ? INT_64 : UINT_64);
- default:
- throw new IllegalArgumentException("Illegal int type: " + field);
+ case 8:
+ return primitive(INT32, signed ? INT_8 : UINT_8);
+ case 16:
+ return primitive(INT32, signed ? INT_16 : UINT_16);
+ case 32:
+ return primitive(INT32, signed ? INT_32 : UINT_32);
+ case 64:
+ return primitive(INT64, signed ? INT_64 : UINT_64);
+ default:
+ throw new IllegalArgumentException("Illegal int type: " + field);
}
}
@Override
public TypeMapping visit(FloatingPoint type) {
switch (type.getPrecision()) {
- case Precision.HALF:
- // TODO(PARQUET-757): original type HalfFloat
- return primitive(FLOAT);
- case Precision.SINGLE:
- return primitive(FLOAT);
- case Precision.DOUBLE:
- return primitive(DOUBLE);
- default:
- throw new IllegalArgumentException("Illegal float type: " + field);
+ case HALF:
+ // TODO(PARQUET-757): original type HalfFloat
+ return primitive(FLOAT);
+ case SINGLE:
+ return primitive(FLOAT);
+ case DOUBLE:
+ return primitive(DOUBLE);
+ default:
+ throw new IllegalArgumentException("Illegal float type: " + field);
}
}
@@ -336,7 +345,7 @@ private TypeMapping fromParquetGroup(GroupType type, String
name) {
OriginalType ot = type.getOriginalType();
if (ot == null) {
List<TypeMapping> typeMappings = fromParquet(type.getFields());
- Field arrowField = new Field(name, type.isRepetition(OPTIONAL), new
Struct_(), fields(typeMappings));
+ Field arrowField = new Field(name, type.isRepetition(OPTIONAL), new
Struct(), fields(typeMappings));
return new StructTypeMapping(arrowField, type, typeMappings);
} else {
switch (ot) {
@@ -366,12 +375,12 @@ private TypeMapping field(ArrowType arrowType) {
@Override
public TypeMapping convertFLOAT(PrimitiveTypeName primitiveTypeName)
throws RuntimeException {
- return field(new ArrowType.FloatingPoint(Precision.SINGLE));
+ return field(new
ArrowType.FloatingPoint(FloatingPointPrecision.SINGLE));
}
@Override
public TypeMapping convertDOUBLE(PrimitiveTypeName primitiveTypeName)
throws RuntimeException {
- return field(new ArrowType.FloatingPoint(Precision.DOUBLE));
+ return field(new
ArrowType.FloatingPoint(FloatingPointPrecision.DOUBLE));
}
@Override
@@ -381,41 +390,41 @@ public TypeMapping convertINT32(PrimitiveTypeName
primitiveTypeName) throws Runt
return integer(32, true);
}
switch (ot) {
- case INT_8:
- return integer(8, true);
- case INT_16:
- return integer(16, true);
- case INT_32:
- return integer(32, true);
- case UINT_8:
- return integer(8, false);
- case UINT_16:
- return integer(16, false);
- case UINT_32:
- return integer(32, false);
- case DECIMAL:
- return decimal(type.getDecimalMetadata());
- case DATE:
- return field(new ArrowType.Date());
- case TIMESTAMP_MICROS:
- return field(new ArrowType.Timestamp(TimeUnit.MICROSECOND));
- case TIMESTAMP_MILLIS:
- return field(new ArrowType.Timestamp(TimeUnit.MILLISECOND));
- case TIME_MILLIS:
- return field(new ArrowType.Time());
- default:
- case TIME_MICROS:
- case INT_64:
- case UINT_64:
- case UTF8:
- case ENUM:
- case BSON:
- case INTERVAL:
- case JSON:
- case LIST:
- case MAP:
- case MAP_KEY_VALUE:
- throw new IllegalArgumentException("illegal type " + type);
+ case INT_8:
+ return integer(8, true);
+ case INT_16:
+ return integer(16, true);
+ case INT_32:
+ return integer(32, true);
+ case UINT_8:
+ return integer(8, false);
+ case UINT_16:
+ return integer(16, false);
+ case UINT_32:
+ return integer(32, false);
+ case DECIMAL:
+ return decimal(type.getDecimalMetadata());
+ case DATE:
+ return field(new ArrowType.Date(DateUnit.DAY));
+ case TIMESTAMP_MICROS:
+ return field(new
ArrowType.Timestamp(org.apache.arrow.vector.types.TimeUnit.MICROSECOND, "UTC"));
+ case TIMESTAMP_MILLIS:
+ return field(new
ArrowType.Timestamp(org.apache.arrow.vector.types.TimeUnit.MILLISECOND, "UTC"));
+ case TIME_MILLIS:
+ return field(new
ArrowType.Time(org.apache.arrow.vector.types.TimeUnit.MILLISECOND, 32));
+ default:
+ case TIME_MICROS:
+ case INT_64:
+ case UINT_64:
+ case UTF8:
+ case ENUM:
+ case BSON:
+ case INTERVAL:
+ case JSON:
+ case LIST:
+ case MAP:
+ case MAP_KEY_VALUE:
+ throw new IllegalArgumentException("illegal type " + type);
}
}
@@ -426,43 +435,42 @@ public TypeMapping convertINT64(PrimitiveTypeName
primitiveTypeName) throws Runt
return integer(64, true);
}
switch (ot) {
- case INT_8:
- return integer(8, true);
- case INT_16:
- return integer(16, true);
- case INT_32:
- return integer(32, true);
- case INT_64:
- return integer(64, true);
- case UINT_8:
- return integer(8, false);
- case UINT_16:
- return integer(16, false);
- case UINT_32:
- return integer(32, false);
- case UINT_64:
- return integer(64, false);
- case DECIMAL:
- return decimal(type.getDecimalMetadata());
- case DATE:
- return field(new ArrowType.Date());
- case TIMESTAMP_MICROS:
- return field(new ArrowType.Timestamp(TimeUnit.MICROSECOND));
- case TIMESTAMP_MILLIS:
- return field(new ArrowType.Timestamp(TimeUnit.MILLISECOND));
- case TIME_MILLIS:
- return field(new ArrowType.Time());
- default:
- case TIME_MICROS:
- case UTF8:
- case ENUM:
- case BSON:
- case INTERVAL:
- case JSON:
- case LIST:
- case MAP:
- case MAP_KEY_VALUE:
- throw new IllegalArgumentException("illegal type " + type);
+ case INT_8:
+ return integer(8, true);
+ case INT_16:
+ return integer(16, true);
+ case INT_32:
+ return integer(32, true);
+ case INT_64:
+ return integer(64, true);
+ case UINT_8:
+ return integer(8, false);
+ case UINT_16:
+ return integer(16, false);
+ case UINT_32:
+ return integer(32, false);
+ case UINT_64:
+ return integer(64, false);
+ case DECIMAL:
+ return decimal(type.getDecimalMetadata());
+ case DATE:
+ return field(new ArrowType.Date(DateUnit.DAY));
+ case TIMESTAMP_MICROS:
+ return field(new
ArrowType.Timestamp(org.apache.arrow.vector.types.TimeUnit.MICROSECOND, "UTC"));
+ case TIMESTAMP_MILLIS:
+ return field(new
ArrowType.Timestamp(org.apache.arrow.vector.types.TimeUnit.MILLISECOND, "UTC"));
+ default:
+ case TIME_MICROS:
+ case UTF8:
+ case ENUM:
+ case BSON:
+ case INTERVAL:
+ case JSON:
+ case LIST:
+ case MAP:
+ case MAP_KEY_VALUE:
+ case TIME_MILLIS:
+ throw new IllegalArgumentException("illegal type " + type);
}
}
@@ -489,12 +497,12 @@ public TypeMapping convertBINARY(PrimitiveTypeName
primitiveTypeName) throws Run
return field(new ArrowType.Binary());
}
switch (ot) {
- case UTF8:
- return field(new ArrowType.Utf8());
- case DECIMAL:
- return decimal(type.getDecimalMetadata());
- default:
- throw new IllegalArgumentException("illegal type " + type);
+ case UTF8:
+ return field(new ArrowType.Utf8());
+ case DECIMAL:
+ return decimal(type.getDecimalMetadata());
+ default:
+ throw new IllegalArgumentException("illegal type " + type);
}
}
@@ -545,7 +553,7 @@ public TypeMapping visit(Null type) {
}
@Override
- public TypeMapping visit(Struct_ type) {
+ public TypeMapping visit(Struct type) {
if (parquetField.isPrimitive()) {
throw new IllegalArgumentException("Parquet type not a group: " +
parquetField);
}
@@ -555,6 +563,15 @@ public TypeMapping visit(Struct_ type) {
@Override
public TypeMapping
visit(org.apache.arrow.vector.types.pojo.ArrowType.List type) {
+ return createListTypeMapping(type);
+ }
+
+ @Override
+ public TypeMapping
visit(org.apache.arrow.vector.types.pojo.ArrowType.FixedSizeList type) {
+ return createListTypeMapping(type);
+ }
+
+ private TypeMapping createListTypeMapping(ArrowType.ComplexType type) {
if (arrowField.getChildren().size() != 1) {
throw new IllegalArgumentException("Invalid list type: " + type);
}
diff --git
a/parquet-arrow/src/test/java/org/apache/parquet/arrow/schema/TestSchemaConverter.java
b/parquet-arrow/src/test/java/org/apache/parquet/arrow/schema/TestSchemaConverter.java
index ec2b8074c..654f773f9 100644
---
a/parquet-arrow/src/test/java/org/apache/parquet/arrow/schema/TestSchemaConverter.java
+++
b/parquet-arrow/src/test/java/org/apache/parquet/arrow/schema/TestSchemaConverter.java
@@ -43,11 +43,11 @@
import java.io.IOException;
import java.util.List;
+import org.apache.arrow.vector.types.IntervalUnit;
-import org.apache.arrow.flatbuf.IntervalUnit;
-import org.apache.arrow.flatbuf.Precision;
-import org.apache.arrow.flatbuf.TimeUnit;
-import org.apache.arrow.flatbuf.UnionMode;
+import org.apache.arrow.vector.types.UnionMode;
+import org.apache.arrow.vector.types.DateUnit;
+import org.apache.arrow.vector.types.FloatingPointPrecision;
import org.apache.arrow.vector.types.pojo.ArrowType;
import org.apache.arrow.vector.types.pojo.Field;
import org.apache.arrow.vector.types.pojo.Schema;
@@ -79,159 +79,167 @@ private static Field field(String name, ArrowType type,
Field... children) {
}
private final Schema complexArrowSchema = new Schema(asList(
- field("a", false, new ArrowType.Int(8, true)),
- field("b", new ArrowType.Struct_(),
- field("c", new ArrowType.Int(16, true)),
- field("d", new ArrowType.Utf8())),
- field("e", new ArrowType.List(), field(null, new ArrowType.Date())),
- field("f", new ArrowType.FloatingPoint(Precision.SINGLE)),
- field("g", new ArrowType.Timestamp(TimeUnit.MILLISECOND)),
- field("h", new ArrowType.Interval(IntervalUnit.DAY_TIME))
- ));
+ field("a", false, new ArrowType.Int(8, true)),
+ field("b", new ArrowType.Struct(),
+ field("c", new ArrowType.Int(16, true)),
+ field("d", new ArrowType.Utf8())),
+ field("e", new ArrowType.List(), field(null, new
ArrowType.Date(DateUnit.DAY))),
+ field("f", new ArrowType.FixedSizeList(1), field(null, new
ArrowType.Date(DateUnit.DAY))),
+ field("g", new ArrowType.FloatingPoint(FloatingPointPrecision.SINGLE)),
+ field("h", new
ArrowType.Timestamp(org.apache.arrow.vector.types.TimeUnit.MILLISECOND, "UTC")),
+ field("i", new ArrowType.Interval(IntervalUnit.DAY_TIME))
+ ));
private final MessageType complexParquetSchema = Types.buildMessage()
- .addField(Types.optional(INT32).as(INT_8).named("a"))
- .addField(Types.optionalGroup()
- .addField(Types.optional(INT32).as(INT_16).named("c"))
- .addField(Types.optional(BINARY).as(UTF8).named("d"))
- .named("b"))
- .addField(Types.optionalList().
- setElementType(Types.optional(INT32).as(DATE).named("element"))
- .named("e"))
- .addField(Types.optional(FLOAT).named("f"))
- .addField(Types.optional(INT64).as(TIMESTAMP_MILLIS).named("g"))
-
.addField(Types.optional(FIXED_LEN_BYTE_ARRAY).length(12).as(INTERVAL).named("h"))
- .named("root");
+ .addField(Types.optional(INT32).as(INT_8).named("a"))
+ .addField(Types.optionalGroup()
+ .addField(Types.optional(INT32).as(INT_16).named("c"))
+ .addField(Types.optional(BINARY).as(UTF8).named("d"))
+ .named("b"))
+ .addField(Types.optionalList().
+ setElementType(Types.optional(INT32).as(DATE).named("element"))
+ .named("e"))
+ .addField(Types.optionalList().
+ setElementType(Types.optional(INT32).as(DATE).named("element"))
+ .named("f"))
+ .addField(Types.optional(FLOAT).named("g"))
+ .addField(Types.optional(INT64).as(TIMESTAMP_MILLIS).named("h"))
+
.addField(Types.optional(FIXED_LEN_BYTE_ARRAY).length(12).as(INTERVAL).named("i"))
+ .named("root");
private final Schema allTypesArrowSchema = new Schema(asList(
- field("a", false, new ArrowType.Null()),
- field("b", new ArrowType.Struct_(), field("ba", new ArrowType.Null())),
- field("c", new ArrowType.List(), field("ca", new ArrowType.Null())),
- field("d", new ArrowType.Union(UnionMode.Sparse, new int[] {1, 2, 3}),
field("da", new ArrowType.Null())),
- field("e", new ArrowType.Int(8, true)),
- field("e1", new ArrowType.Int(16, true)),
- field("e2", new ArrowType.Int(32, true)),
- field("e3", new ArrowType.Int(64, true)),
- field("e4", new ArrowType.Int(8, false)),
- field("e5", new ArrowType.Int(16, false)),
- field("e6", new ArrowType.Int(32, false)),
- field("e7", new ArrowType.Int(64, false)),
- field("f", new ArrowType.FloatingPoint(Precision.SINGLE)),
- field("f1", new ArrowType.FloatingPoint(Precision.DOUBLE)),
- field("g", new ArrowType.Utf8()),
- field("h", new ArrowType.Binary()),
- field("i", new ArrowType.Bool()),
- field("j", new ArrowType.Decimal(5, 5)),
- field("j1", new ArrowType.Decimal(15, 5)),
- field("j2", new ArrowType.Decimal(25, 5)),
- field("k", new ArrowType.Date()),
- field("l", new ArrowType.Time()),
- field("m", new ArrowType.Timestamp(TimeUnit.MILLISECOND)),
- field("n", new ArrowType.Interval(IntervalUnit.DAY_TIME)),
- field("n1", new ArrowType.Interval(IntervalUnit.YEAR_MONTH))
- ));
+ field("a", false, new ArrowType.Null()),
+ field("b", new ArrowType.Struct(), field("ba", new ArrowType.Null())),
+ field("c", new ArrowType.List(), field("ca", new ArrowType.Null())),
+ field("d", new ArrowType.FixedSizeList(1), field("da", new
ArrowType.Null())),
+ field("e", new ArrowType.Union(UnionMode.Sparse, new int[] {1, 2, 3}),
field("ea", new ArrowType.Null())),
+ field("f", new ArrowType.Int(8, true)),
+ field("f1", new ArrowType.Int(16, true)),
+ field("f2", new ArrowType.Int(32, true)),
+ field("f3", new ArrowType.Int(64, true)),
+ field("f4", new ArrowType.Int(8, false)),
+ field("f5", new ArrowType.Int(16, false)),
+ field("f6", new ArrowType.Int(32, false)),
+ field("f7", new ArrowType.Int(64, false)),
+ field("g", new ArrowType.FloatingPoint(FloatingPointPrecision.SINGLE)),
+ field("g1", new ArrowType.FloatingPoint(FloatingPointPrecision.DOUBLE)),
+ field("h", new ArrowType.Utf8()),
+ field("i", new ArrowType.Binary()),
+ field("j", new ArrowType.Bool()),
+ field("k", new ArrowType.Decimal(5, 5)),
+ field("k1", new ArrowType.Decimal(15, 5)),
+ field("k2", new ArrowType.Decimal(25, 5)),
+ field("l", new ArrowType.Date(DateUnit.DAY)),
+ field("m", new
ArrowType.Time(org.apache.arrow.vector.types.TimeUnit.SECOND, 32)),
+ field("n", new
ArrowType.Timestamp(org.apache.arrow.vector.types.TimeUnit.MILLISECOND, "UTC")),
+ field("o", new ArrowType.Interval(IntervalUnit.DAY_TIME)),
+ field("o1", new ArrowType.Interval(IntervalUnit.YEAR_MONTH))
+ ));
private final MessageType allTypesParquetSchema = Types.buildMessage()
- .addField(Types.optional(BINARY).named("a"))
- .addField(Types.optionalGroup()
- .addField(Types.optional(BINARY).named("ba"))
- .named("b"))
- .addField(Types.optionalList().
- setElementType(Types.optional(BINARY).named("element"))
- .named("c"))
- .addField(Types.optionalGroup()
- .addField(Types.optional(BINARY).named("da"))
- .named("d"))
- .addField(Types.optional(INT32).as(INT_8).named("e"))
- .addField(Types.optional(INT32).as(INT_16).named("e1"))
- .addField(Types.optional(INT32).as(INT_32).named("e2"))
- .addField(Types.optional(INT64).as(INT_64).named("e3"))
- .addField(Types.optional(INT32).as(UINT_8).named("e4"))
- .addField(Types.optional(INT32).as(UINT_16).named("e5"))
- .addField(Types.optional(INT32).as(UINT_32).named("e6"))
- .addField(Types.optional(INT64).as(UINT_64).named("e7"))
- .addField(Types.optional(FLOAT).named("f"))
- .addField(Types.optional(DOUBLE).named("f1"))
- .addField(Types.optional(BINARY).as(UTF8).named("g"))
- .addField(Types.optional(BINARY).named("h"))
- .addField(Types.optional(BOOLEAN).named("i"))
-
.addField(Types.optional(INT32).as(DECIMAL).precision(5).scale(5).named("j"))
-
.addField(Types.optional(INT64).as(DECIMAL).precision(15).scale(5).named("j1"))
-
.addField(Types.optional(BINARY).as(DECIMAL).precision(25).scale(5).named("j2"))
- .addField(Types.optional(INT32).as(DATE).named("k"))
- .addField(Types.optional(INT32).as(TIME_MILLIS).named("l"))
- .addField(Types.optional(INT64).as(TIMESTAMP_MILLIS).named("m"))
-
.addField(Types.optional(FIXED_LEN_BYTE_ARRAY).length(12).as(INTERVAL).named("n"))
-
.addField(Types.optional(FIXED_LEN_BYTE_ARRAY).length(12).as(INTERVAL).named("n1"))
- .named("root");
+ .addField(Types.optional(BINARY).named("a"))
+ .addField(Types.optionalGroup()
+ .addField(Types.optional(BINARY).named("ba"))
+ .named("b"))
+ .addField(Types.optionalList().
+ setElementType(Types.optional(BINARY).named("element"))
+ .named("c"))
+ .addField(Types.optionalList().
+ setElementType(Types.optional(BINARY).named("element"))
+ .named("d"))
+ .addField(Types.optionalGroup()
+ .addField(Types.optional(BINARY).named("ea"))
+ .named("e"))
+ .addField(Types.optional(INT32).as(INT_8).named("f"))
+ .addField(Types.optional(INT32).as(INT_16).named("f1"))
+ .addField(Types.optional(INT32).as(INT_32).named("f2"))
+ .addField(Types.optional(INT64).as(INT_64).named("f3"))
+ .addField(Types.optional(INT32).as(UINT_8).named("f4"))
+ .addField(Types.optional(INT32).as(UINT_16).named("f5"))
+ .addField(Types.optional(INT32).as(UINT_32).named("f6"))
+ .addField(Types.optional(INT64).as(UINT_64).named("f7"))
+ .addField(Types.optional(FLOAT).named("g"))
+ .addField(Types.optional(DOUBLE).named("g1"))
+ .addField(Types.optional(BINARY).as(UTF8).named("h"))
+ .addField(Types.optional(BINARY).named("i"))
+ .addField(Types.optional(BOOLEAN).named("j"))
+
.addField(Types.optional(INT32).as(DECIMAL).precision(5).scale(5).named("k"))
+
.addField(Types.optional(INT64).as(DECIMAL).precision(15).scale(5).named("k1"))
+
.addField(Types.optional(BINARY).as(DECIMAL).precision(25).scale(5).named("k2"))
+ .addField(Types.optional(INT32).as(DATE).named("l"))
+ .addField(Types.optional(INT32).as(TIME_MILLIS).named("m"))
+ .addField(Types.optional(INT64).as(TIMESTAMP_MILLIS).named("n"))
+
.addField(Types.optional(FIXED_LEN_BYTE_ARRAY).length(12).as(INTERVAL).named("o"))
+
.addField(Types.optional(FIXED_LEN_BYTE_ARRAY).length(12).as(INTERVAL).named("o1"))
+ .named("root");
private final Schema supportedTypesArrowSchema = new Schema(asList(
- field("b", new ArrowType.Struct_(), field("ba", new ArrowType.Binary())),
- field("c", new ArrowType.List(), field(null, new ArrowType.Binary())),
- field("e", new ArrowType.Int(8, true)),
- field("e1", new ArrowType.Int(16, true)),
- field("e2", new ArrowType.Int(32, true)),
- field("e3", new ArrowType.Int(64, true)),
- field("e4", new ArrowType.Int(8, false)),
- field("e5", new ArrowType.Int(16, false)),
- field("e6", new ArrowType.Int(32, false)),
- field("e7", new ArrowType.Int(64, false)),
- field("f", new ArrowType.FloatingPoint(Precision.SINGLE)),
- field("f1", new ArrowType.FloatingPoint(Precision.DOUBLE)),
- field("g", new ArrowType.Utf8()),
- field("h", new ArrowType.Binary()),
- field("i", new ArrowType.Bool()),
- field("j", new ArrowType.Decimal(5, 5)),
- field("j1", new ArrowType.Decimal(15, 5)),
- field("j2", new ArrowType.Decimal(25, 5)),
- field("k", new ArrowType.Date()),
- field("l", new ArrowType.Time()),
- field("m", new ArrowType.Timestamp(TimeUnit.MILLISECOND))
- ));
+ field("b", new ArrowType.Struct(), field("ba", new ArrowType.Binary())),
+ field("c", new ArrowType.List(), field(null, new ArrowType.Binary())),
+ field("e", new ArrowType.Int(8, true)),
+ field("e1", new ArrowType.Int(16, true)),
+ field("e2", new ArrowType.Int(32, true)),
+ field("e3", new ArrowType.Int(64, true)),
+ field("e4", new ArrowType.Int(8, false)),
+ field("e5", new ArrowType.Int(16, false)),
+ field("e6", new ArrowType.Int(32, false)),
+ field("e7", new ArrowType.Int(64, false)),
+ field("f", new ArrowType.FloatingPoint(FloatingPointPrecision.SINGLE)),
+ field("f1", new ArrowType.FloatingPoint(FloatingPointPrecision.DOUBLE)),
+ field("g", new ArrowType.Utf8()),
+ field("h", new ArrowType.Binary()),
+ field("i", new ArrowType.Bool()),
+ field("j", new ArrowType.Decimal(5, 5)),
+ field("j1", new ArrowType.Decimal(15, 5)),
+ field("j2", new ArrowType.Decimal(25, 5)),
+ field("k", new ArrowType.Date(DateUnit.DAY)),
+ field("l", new
ArrowType.Time(org.apache.arrow.vector.types.TimeUnit.MILLISECOND, 32)),
+ field("m", new
ArrowType.Timestamp(org.apache.arrow.vector.types.TimeUnit.MILLISECOND, "UTC"))
+ ));
private final MessageType supportedTypesParquetSchema = Types.buildMessage()
- .addField(Types.optionalGroup()
- .addField(Types.optional(BINARY).named("ba"))
- .named("b"))
- .addField(Types.optionalList().
- setElementType(Types.optional(BINARY).named("element"))
- .named("c"))
- .addField(Types.optional(INT32).as(INT_8).named("e"))
- .addField(Types.optional(INT32).as(INT_16).named("e1"))
- .addField(Types.optional(INT32).as(INT_32).named("e2"))
- .addField(Types.optional(INT64).as(INT_64).named("e3"))
- .addField(Types.optional(INT32).as(UINT_8).named("e4"))
- .addField(Types.optional(INT32).as(UINT_16).named("e5"))
- .addField(Types.optional(INT32).as(UINT_32).named("e6"))
- .addField(Types.optional(INT64).as(UINT_64).named("e7"))
- .addField(Types.optional(FLOAT).named("f"))
- .addField(Types.optional(DOUBLE).named("f1"))
- .addField(Types.optional(BINARY).as(UTF8).named("g"))
- .addField(Types.optional(BINARY).named("h"))
- .addField(Types.optional(BOOLEAN).named("i"))
-
.addField(Types.optional(INT32).as(DECIMAL).precision(5).scale(5).named("j"))
-
.addField(Types.optional(INT64).as(DECIMAL).precision(15).scale(5).named("j1"))
-
.addField(Types.optional(BINARY).as(DECIMAL).precision(25).scale(5).named("j2"))
- .addField(Types.optional(INT32).as(DATE).named("k"))
- .addField(Types.optional(INT32).as(TIME_MILLIS).named("l"))
- .addField(Types.optional(INT64).as(TIMESTAMP_MILLIS).named("m"))
- .named("root");
+ .addField(Types.optionalGroup()
+ .addField(Types.optional(BINARY).named("ba"))
+ .named("b"))
+ .addField(Types.optionalList().
+ setElementType(Types.optional(BINARY).named("element"))
+ .named("c"))
+ .addField(Types.optional(INT32).as(INT_8).named("e"))
+ .addField(Types.optional(INT32).as(INT_16).named("e1"))
+ .addField(Types.optional(INT32).as(INT_32).named("e2"))
+ .addField(Types.optional(INT64).as(INT_64).named("e3"))
+ .addField(Types.optional(INT32).as(UINT_8).named("e4"))
+ .addField(Types.optional(INT32).as(UINT_16).named("e5"))
+ .addField(Types.optional(INT32).as(UINT_32).named("e6"))
+ .addField(Types.optional(INT64).as(UINT_64).named("e7"))
+ .addField(Types.optional(FLOAT).named("f"))
+ .addField(Types.optional(DOUBLE).named("f1"))
+ .addField(Types.optional(BINARY).as(UTF8).named("g"))
+ .addField(Types.optional(BINARY).named("h"))
+ .addField(Types.optional(BOOLEAN).named("i"))
+
.addField(Types.optional(INT32).as(DECIMAL).precision(5).scale(5).named("j"))
+
.addField(Types.optional(INT64).as(DECIMAL).precision(15).scale(5).named("j1"))
+
.addField(Types.optional(BINARY).as(DECIMAL).precision(25).scale(5).named("j2"))
+ .addField(Types.optional(INT32).as(DATE).named("k"))
+ .addField(Types.optional(INT32).as(TIME_MILLIS).named("l"))
+ .addField(Types.optional(INT64).as(TIMESTAMP_MILLIS).named("m"))
+ .named("root");
private final Schema paperArrowSchema = new Schema(asList(
- field("DocId", false, new ArrowType.Int(64, true)),
- field("Links", new ArrowType.Struct_(),
- field("Backward", false, new ArrowType.List(), field(null, false,
new ArrowType.Int(64, true))),
- field("Forward", false, new ArrowType.List(), field(null, false, new
ArrowType.Int(64, true)))
- ),
- field("Name", false, new ArrowType.List(),
- field(null, false, new ArrowType.Struct_(),
- field("Language", false, new ArrowType.List(),
- field(null, false, new ArrowType.Struct_(),
- field("Code", false, new ArrowType.Binary()),
- field("Country", new ArrowType.Binary())
- )
- ),
- field("Url", new ArrowType.Binary())
+ field("DocId", false, new ArrowType.Int(64, true)),
+ field("Links", new ArrowType.Struct(),
+ field("Backward", false, new ArrowType.List(), field(null, false, new
ArrowType.Int(64, true))),
+ field("Forward", false, new ArrowType.List(), field(null, false, new
ArrowType.Int(64, true)))
+ ),
+ field("Name", false, new ArrowType.List(),
+ field(null, false, new ArrowType.Struct(),
+ field("Language", false, new ArrowType.List(),
+ field(null, false, new ArrowType.Struct(),
+ field("Code", false, new ArrowType.Binary()),
+ field("Country", new ArrowType.Binary())
)
+ ),
+ field("Url", new ArrowType.Binary())
)
+ )
));
private SchemaConverter converter = new SchemaConverter();
@@ -286,7 +294,7 @@ private void compareFields(List<Field> left, List<Field>
right) {
@Test
public void testAllMap() throws IOException {
SchemaMapping map = converter.map(allTypesArrowSchema,
allTypesParquetSchema);
- Assert.assertEquals("p, s<p>, l<p>, u<p>, p, p, p, p, p, p, p, p, p, p, p,
p, p, p, p, p, p, p, p, p, p", toSummaryString(map));
+ Assert.assertEquals("p, s<p>, l<p>, l<p>, u<p>, p, p, p, p, p, p, p, p, p,
p, p, p, p, p, p, p, p, p, p, p, p", toSummaryString(map));
}
private String toSummaryString(SchemaMapping map) {
@@ -301,32 +309,32 @@ private String toSummaryString(List<TypeMapping> fields) {
sb.append(", ");
}
sb.append(
- typeMapping.accept(new TypeMappingVisitor<String>() {
- @Override
- public String visit(PrimitiveTypeMapping primitiveTypeMapping) {
- return "p";
- }
+ typeMapping.accept(new TypeMappingVisitor<String>() {
+ @Override
+ public String visit(PrimitiveTypeMapping primitiveTypeMapping) {
+ return "p";
+ }
- @Override
- public String visit(StructTypeMapping structTypeMapping) {
- return "s";
- }
+ @Override
+ public String visit(StructTypeMapping structTypeMapping) {
+ return "s";
+ }
- @Override
- public String visit(UnionTypeMapping unionTypeMapping) {
- return "u";
- }
+ @Override
+ public String visit(UnionTypeMapping unionTypeMapping) {
+ return "u";
+ }
- @Override
- public String visit(ListTypeMapping listTypeMapping) {
- return "l";
- }
+ @Override
+ public String visit(ListTypeMapping listTypeMapping) {
+ return "l";
+ }
- @Override
- public String visit(RepeatedTypeMapping repeatedTypeMapping) {
- return "r";
- }
- })
+ @Override
+ public String visit(RepeatedTypeMapping repeatedTypeMapping) {
+ return "r";
+ }
+ })
);
if (typeMapping.getChildren() != null &&
!typeMapping.getChildren().isEmpty()) {
sb.append("<").append(toSummaryString(typeMapping.getChildren())).append(">");
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
> [Java] Upgrade the Apache Arrow version to 0.8.0 for SchemaConverter
> --------------------------------------------------------------------
>
> Key: PARQUET-1128
> URL: https://issues.apache.org/jira/browse/PARQUET-1128
> Project: Parquet
> Issue Type: Bug
> Components: parquet-mr
> Reporter: Masayuki Takahashi
> Priority: Minor
>
> When I converted parquet(1.9.1-SNAPSHOT) schema to arrow(0.4.0) with
> SchemaConverter, this exception raised.
> {code:java}
> java.lang.NoClassDefFoundError:
> org/apache/arrow/vector/types/pojo/ArrowType$Struct_
> at
> net.wrap_trap.parquet_arrow.ParquetToArrowConverter.convertToArrow(ParquetToArrowConverter.java:67)
> at
> net.wrap_trap.parquet_arrow.ParquetToArrowConverter.convertToArrow(ParquetToArrowConverter.java:40)
> at
> net.wrap_trap.parquet_arrow.ParquetToArrowConverterTest.parquetToArrowConverterTest(ParquetToArrowConverterTest.java:27)
> {code}
> This reason is that SchemaConverter refer to Apache Arrow 0.1.0.
> I upgrade the Apache Arrow version to 0.8.0 for SchemaConverter.
--
This message was sent by Atlassian JIRA
(v7.6.3#76005)