Repository: arrow Updated Branches: refs/heads/master bae33d622 -> 768c7d0be
ARROW-257: Add a typeids Vector to Union type Author: Julien Le Dem <[email protected]> Closes #143 from julienledem/union and squashes the following commits: cd1b711 [Julien Le Dem] ARROW-257: Add a typeids Vector to Union type Project: http://git-wip-us.apache.org/repos/asf/arrow/repo Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/768c7d0b Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/768c7d0b Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/768c7d0b Branch: refs/heads/master Commit: 768c7d0be7dde9942235b5312c1c46ab035af86b Parents: bae33d6 Author: Julien Le Dem <[email protected]> Authored: Tue Sep 27 11:54:35 2016 -0700 Committer: Julien Le Dem <[email protected]> Committed: Tue Sep 27 11:54:35 2016 -0700 ---------------------------------------------------------------------- format/Message.fbs | 5 +++ .../vector/src/main/codegen/data/ArrowTypes.tdd | 2 +- .../src/main/codegen/templates/ArrowType.java | 38 +++++++++++++++----- .../src/main/codegen/templates/UnionVector.java | 7 ++-- .../org/apache/arrow/vector/types/Types.java | 2 +- .../apache/arrow/vector/pojo/TestConvert.java | 5 +-- 6 files changed, 45 insertions(+), 14 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/arrow/blob/768c7d0b/format/Message.fbs ---------------------------------------------------------------------- diff --git a/format/Message.fbs b/format/Message.fbs index 07da862..288f5a1 100644 --- a/format/Message.fbs +++ b/format/Message.fbs @@ -23,8 +23,13 @@ table List { enum UnionMode:short { Sparse, Dense } +/// A union is a complex type with children in Field +/// By default ids in the type vector refer to the offsets in the children +/// optionally typeIds provides an indirection between the child offset and the type id +/// for each child typeIds[offset] is the id used in the type vector table Union { mode: UnionMode; + typeIds: [ int ]; // optional, describes typeid of each child. } table Int { http://git-wip-us.apache.org/repos/asf/arrow/blob/768c7d0b/java/vector/src/main/codegen/data/ArrowTypes.tdd ---------------------------------------------------------------------- diff --git a/java/vector/src/main/codegen/data/ArrowTypes.tdd b/java/vector/src/main/codegen/data/ArrowTypes.tdd index 9f81f0e..9624fec 100644 --- a/java/vector/src/main/codegen/data/ArrowTypes.tdd +++ b/java/vector/src/main/codegen/data/ArrowTypes.tdd @@ -30,7 +30,7 @@ }, { name: "Union", - fields: [{name: "mode", type: short}] + fields: [{name: "mode", type: short}, {name: "typeIds", type: "int[]"}] }, { name: "Int", http://git-wip-us.apache.org/repos/asf/arrow/blob/768c7d0b/java/vector/src/main/codegen/templates/ArrowType.java ---------------------------------------------------------------------- diff --git a/java/vector/src/main/codegen/templates/ArrowType.java b/java/vector/src/main/codegen/templates/ArrowType.java index 29dee20..30f2c68 100644 --- a/java/vector/src/main/codegen/templates/ArrowType.java +++ b/java/vector/src/main/codegen/templates/ArrowType.java @@ -33,12 +33,23 @@ import org.apache.arrow.flatbuf.Type; import java.util.Objects; +/** + * Arrow types + **/ public abstract class ArrowType { public abstract byte getTypeType(); public abstract int getType(FlatBufferBuilder builder); public abstract <T> T accept(ArrowTypeVisitor<T> visitor); + /** + * to visit the ArrowTypes + * <code> + * type.accept(new ArrowTypeVisitor<Type>() { + * ... + * }); + * </code> + */ public static interface ArrowTypeVisitor<T> { <#list arrowTypes.types as type> T visit(${type.name} type); @@ -55,9 +66,7 @@ public abstract class ArrowType { </#if> <#list fields as field> - <#assign fieldName = field.name> - <#assign fieldType = field.type> - ${fieldType} ${fieldName}; + ${field.type} ${field.name}; </#list> <#if type.fields?size != 0> @@ -79,6 +88,9 @@ public abstract class ArrowType { <#if field.type == "String"> int ${field.name} = builder.createString(this.${field.name}); </#if> + <#if field.type == "int[]"> + int ${field.name} = org.apache.arrow.flatbuf.${type.name}.create${field.name?cap_first}Vector(builder, this.${field.name}); + </#if> </#list> org.apache.arrow.flatbuf.${type.name}.start${type.name}(builder); <#list type.fields as field> @@ -96,7 +108,7 @@ public abstract class ArrowType { public String toString() { return "${name}{" <#list fields as field> - + ", " + ${field.name} + + <#if field.type == "int[]">java.util.Arrays.toString(${field.name})<#else>${field.name}</#if><#if field_has_next> + ", " </#if> </#list> + "}"; } @@ -115,8 +127,7 @@ public abstract class ArrowType { return true; <#else> ${type.name} that = (${type.name}) obj; - return - <#list type.fields as field>Objects.equals(this.${field.name}, that.${field.name}) <#if field_has_next>&&<#else>;</#if> + return <#list type.fields as field>Objects.deepEquals(this.${field.name}, that.${field.name}) <#if field_has_next>&&<#else>;</#if> </#list> </#if> } @@ -134,9 +145,20 @@ public abstract class ArrowType { <#assign name = type.name> <#assign nameLower = type.name?lower_case> <#assign fields = type.fields> - case Type.${type.name}: + case Type.${type.name}: { org.apache.arrow.flatbuf.${type.name} ${nameLower}Type = (org.apache.arrow.flatbuf.${type.name}) field.type(new org.apache.arrow.flatbuf.${type.name}()); - return new ${type.name}(<#list type.fields as field>${nameLower}Type.${field.name}()<#if field_has_next>, </#if></#list>); + <#list type.fields as field> + <#if field.type == "int[]"> + ${field.type} ${field.name} = new int[${nameLower}Type.${field.name}Length()]; + for (int i = 0; i< ${field.name}.length; ++i) { + ${field.name}[i] = ${nameLower}Type.${field.name}(i); + } + <#else> + ${field.type} ${field.name} = ${nameLower}Type.${field.name}(); + </#if> + </#list> + return new ${type.name}(<#list type.fields as field>${field.name}<#if field_has_next>, </#if></#list>); + } </#list> default: throw new UnsupportedOperationException("Unsupported type: " + field.typeType()); http://git-wip-us.apache.org/repos/asf/arrow/blob/768c7d0b/java/vector/src/main/codegen/templates/UnionVector.java ---------------------------------------------------------------------- diff --git a/java/vector/src/main/codegen/templates/UnionVector.java b/java/vector/src/main/codegen/templates/UnionVector.java index 3014bbb..b14314d 100644 --- a/java/vector/src/main/codegen/templates/UnionVector.java +++ b/java/vector/src/main/codegen/templates/UnionVector.java @@ -232,10 +232,13 @@ public class UnionVector implements FieldVector { @Override public Field getField() { List<org.apache.arrow.vector.types.pojo.Field> childFields = new ArrayList<>(); - for (ValueVector v : internalMap.getChildren()) { + List<FieldVector> children = internalMap.getChildren(); + int[] typeIds = new int[children.size()]; + for (ValueVector v : children) { + typeIds[childFields.size()] = v.getMinorType().ordinal(); childFields.add(v.getField()); } - return new Field(name, true, new ArrowType.Union(Sparse), childFields); + return new Field(name, true, new ArrowType.Union(Sparse, typeIds), childFields); } @Override http://git-wip-us.apache.org/repos/asf/arrow/blob/768c7d0b/java/vector/src/main/java/org/apache/arrow/vector/types/Types.java ---------------------------------------------------------------------- diff --git a/java/vector/src/main/java/org/apache/arrow/vector/types/Types.java b/java/vector/src/main/java/org/apache/arrow/vector/types/Types.java index 181d835..6e63ae2 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/types/Types.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/types/Types.java @@ -472,7 +472,7 @@ public class Types { return new UnionListWriter((ListVector) vector); } }, - UNION(new Union(UnionMode.Sparse)) { + UNION(new Union(UnionMode.Sparse, null)) { @Override public Field getField() { throw new UnsupportedOperationException("Cannot get simple field for Union type"); http://git-wip-us.apache.org/repos/asf/arrow/blob/768c7d0b/java/vector/src/test/java/org/apache/arrow/vector/pojo/TestConvert.java ---------------------------------------------------------------------- diff --git a/java/vector/src/test/java/org/apache/arrow/vector/pojo/TestConvert.java b/java/vector/src/test/java/org/apache/arrow/vector/pojo/TestConvert.java index 448117d..ed740cd 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/pojo/TestConvert.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/pojo/TestConvert.java @@ -22,11 +22,12 @@ import static org.apache.arrow.flatbuf.Precision.SINGLE; import static org.junit.Assert.assertEquals; import org.apache.arrow.flatbuf.UnionMode; +import org.apache.arrow.vector.types.Types.MinorType; import org.apache.arrow.vector.types.pojo.ArrowType.FloatingPoint; import org.apache.arrow.vector.types.pojo.ArrowType.Int; import org.apache.arrow.vector.types.pojo.ArrowType.List; -import org.apache.arrow.vector.types.pojo.ArrowType.Timestamp; import org.apache.arrow.vector.types.pojo.ArrowType.Struct_; +import org.apache.arrow.vector.types.pojo.ArrowType.Timestamp; import org.apache.arrow.vector.types.pojo.ArrowType.Union; import org.apache.arrow.vector.types.pojo.ArrowType.Utf8; import org.apache.arrow.vector.types.pojo.Field; @@ -78,7 +79,7 @@ public class TestConvert { childrenBuilder.add(new Field("child4", true, new List(), ImmutableList.<Field>of( new Field("child4.1", true, Utf8.INSTANCE, null) ))); - childrenBuilder.add(new Field("child5", true, new Union(UnionMode.Sparse), ImmutableList.<Field>of( + childrenBuilder.add(new Field("child5", true, new Union(UnionMode.Sparse, new int[] { MinorType.TIMESTAMP.ordinal(), MinorType.FLOAT8.ordinal() } ), ImmutableList.<Field>of( new Field("child5.1", true, new Timestamp("UTC"), null), new Field("child5.2", true, new FloatingPoint(DOUBLE), ImmutableList.<Field>of()) )));
