Repository: arrow Updated Branches: refs/heads/master bf2acf6cb -> 990e2bde7
ARROW-691: [Java] Encode dictionary type in message format Author: Emilio Lahr-Vivaz <[email protected]> Closes #422 from elahrvivaz/ARROW-691 and squashes the following commits: c1adad1 [Emilio Lahr-Vivaz] ARROW-691 Encode dictionary type in message format Project: http://git-wip-us.apache.org/repos/asf/arrow/repo Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/990e2bde Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/990e2bde Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/990e2bde Branch: refs/heads/master Commit: 990e2bde758ac8bc6e4497ae1bc37f89b71bb5cf Parents: bf2acf6 Author: Emilio Lahr-Vivaz <[email protected]> Authored: Wed Mar 22 23:08:01 2017 -0400 Committer: Wes McKinney <[email protected]> Committed: Wed Mar 22 23:08:01 2017 -0400 ---------------------------------------------------------------------- .../vector/types/pojo/DictionaryEncoding.java | 18 ++++++++++++++++++ .../org/apache/arrow/vector/types/pojo/Field.java | 3 ++- .../vector/stream/MessageSerializerTest.java | 15 +++++++++++++++ 3 files changed, 35 insertions(+), 1 deletion(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/arrow/blob/990e2bde/java/vector/src/main/java/org/apache/arrow/vector/types/pojo/DictionaryEncoding.java ---------------------------------------------------------------------- diff --git a/java/vector/src/main/java/org/apache/arrow/vector/types/pojo/DictionaryEncoding.java b/java/vector/src/main/java/org/apache/arrow/vector/types/pojo/DictionaryEncoding.java index 6d35cde..32568d3 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/types/pojo/DictionaryEncoding.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/types/pojo/DictionaryEncoding.java @@ -18,6 +18,8 @@ ******************************************************************************/ package org.apache.arrow.vector.types.pojo; +import java.util.Objects; + import org.apache.arrow.vector.types.pojo.ArrowType.Int; public class DictionaryEncoding { @@ -48,4 +50,20 @@ public class DictionaryEncoding { public String toString() { return "DictionaryEncoding[id=" + id + ",ordered=" + ordered + ",indexType=" + indexType + "]"; } + + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } else if (o == null || getClass() != o.getClass()) { + return false; + } + DictionaryEncoding that = (DictionaryEncoding) o; + return id == that.id && ordered == that.ordered && Objects.equals(indexType, that.indexType); + } + + @Override + public int hashCode() { + return Objects.hash(id, ordered, indexType); + } } http://git-wip-us.apache.org/repos/asf/arrow/blob/990e2bde/java/vector/src/main/java/org/apache/arrow/vector/types/pojo/Field.java ---------------------------------------------------------------------- diff --git a/java/vector/src/main/java/org/apache/arrow/vector/types/pojo/Field.java b/java/vector/src/main/java/org/apache/arrow/vector/types/pojo/Field.java index c310b90..011f0e6 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/types/pojo/Field.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/types/pojo/Field.java @@ -121,10 +121,11 @@ public class Field { int typeOffset = type.getType(builder); int dictionaryOffset = -1; if (dictionary != null) { - // TODO encode dictionary type - currently type is only signed 32 bit int (default null) + int dictionaryType = dictionary.getIndexType().getType(builder); org.apache.arrow.flatbuf.DictionaryEncoding.startDictionaryEncoding(builder); org.apache.arrow.flatbuf.DictionaryEncoding.addId(builder, dictionary.getId()); org.apache.arrow.flatbuf.DictionaryEncoding.addIsOrdered(builder, dictionary.isOrdered()); + org.apache.arrow.flatbuf.DictionaryEncoding.addIndexType(builder, dictionaryType); dictionaryOffset = org.apache.arrow.flatbuf.DictionaryEncoding.endDictionaryEncoding(builder); } int[] childrenData = new int[children.size()]; http://git-wip-us.apache.org/repos/asf/arrow/blob/990e2bde/java/vector/src/test/java/org/apache/arrow/vector/stream/MessageSerializerTest.java ---------------------------------------------------------------------- diff --git a/java/vector/src/test/java/org/apache/arrow/vector/stream/MessageSerializerTest.java b/java/vector/src/test/java/org/apache/arrow/vector/stream/MessageSerializerTest.java index bb2ccf8..d3d49d5 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/stream/MessageSerializerTest.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/stream/MessageSerializerTest.java @@ -37,6 +37,7 @@ import org.apache.arrow.vector.schema.ArrowFieldNode; import org.apache.arrow.vector.schema.ArrowMessage; import org.apache.arrow.vector.schema.ArrowRecordBatch; import org.apache.arrow.vector.types.pojo.ArrowType; +import org.apache.arrow.vector.types.pojo.DictionaryEncoding; import org.apache.arrow.vector.types.pojo.Field; import org.apache.arrow.vector.types.pojo.Schema; import org.junit.Test; @@ -73,6 +74,20 @@ public class MessageSerializerTest { } @Test + public void testSchemaDictionaryMessageSerialization() throws IOException { + DictionaryEncoding dictionary = new DictionaryEncoding(9L, false, new ArrowType.Int(8, true)); + Field field = new Field("test", true, ArrowType.Utf8.INSTANCE, dictionary, null); + Schema schema = new Schema(Collections.singletonList(field)); + ByteArrayOutputStream out = new ByteArrayOutputStream(); + long size = MessageSerializer.serialize(new WriteChannel(Channels.newChannel(out)), schema); + assertEquals(size, out.toByteArray().length); + + ByteArrayInputStream in = new ByteArrayInputStream(out.toByteArray()); + Schema deserialized = MessageSerializer.deserializeSchema(new ReadChannel(Channels.newChannel(in))); + assertEquals(schema, deserialized); + } + + @Test public void testSerializeRecordBatch() throws IOException { byte[] validity = new byte[] { (byte)255, 0}; // second half is "undefined"
