Repository: arrow Updated Branches: refs/heads/master ea9bc8378 -> 0396240b5
ARROW-1190: [JAVA] Fixing VectorLoader for duplicate field names VectorLoader was corrupting data when some of the fields had same name in which case only one of that vectors got properly loaded. This PR resolves the problem by avoiding by-name field lookups. Author: Antony Mayi <antonym...@yahoo.com> Closes #816 from antonymayi/master and squashes the following commits: 38c1837 [Antony Mayi] adding unit test for unload-loading vectors with duplicate field names a723416 [Antony Mayi] ARROW-1190 - fixing VectorLoader for duplicate field names Project: http://git-wip-us.apache.org/repos/asf/arrow/repo Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/0396240b Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/0396240b Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/0396240b Branch: refs/heads/master Commit: 0396240b55ab1d74c6b36ffaf95290135d8da389 Parents: ea9bc83 Author: Antony Mayi <antonym...@yahoo.com> Authored: Mon Jul 17 18:18:56 2017 +0200 Committer: Uwe L. Korn <uw...@xhochy.com> Committed: Mon Jul 17 18:18:56 2017 +0200 ---------------------------------------------------------------------- .../org/apache/arrow/vector/VectorLoader.java | 6 +-- .../arrow/vector/TestVectorUnloadLoad.java | 47 ++++++++++++++++++++ 2 files changed, 49 insertions(+), 4 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/arrow/blob/0396240b/java/vector/src/main/java/org/apache/arrow/vector/VectorLoader.java ---------------------------------------------------------------------- diff --git a/java/vector/src/main/java/org/apache/arrow/vector/VectorLoader.java b/java/vector/src/main/java/org/apache/arrow/vector/VectorLoader.java index 33a608c..e640c7c 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/VectorLoader.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/VectorLoader.java @@ -55,10 +55,8 @@ public class VectorLoader { public void load(ArrowRecordBatch recordBatch) { Iterator<ArrowBuf> buffers = recordBatch.getBuffers().iterator(); Iterator<ArrowFieldNode> nodes = recordBatch.getNodes().iterator(); - List<Field> fields = root.getSchema().getFields(); - for (Field field: fields) { - FieldVector fieldVector = root.getVector(field.getName()); - loadBuffers(fieldVector, field, buffers, nodes); + for (FieldVector fieldVector: root.getFieldVectors()) { + loadBuffers(fieldVector, fieldVector.getField(), buffers, nodes); } root.setRowCount(recordBatch.getLength()); if (nodes.hasNext() || buffers.hasNext()) { http://git-wip-us.apache.org/repos/asf/arrow/blob/0396240b/java/vector/src/test/java/org/apache/arrow/vector/TestVectorUnloadLoad.java ---------------------------------------------------------------------- diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestVectorUnloadLoad.java b/java/vector/src/test/java/org/apache/arrow/vector/TestVectorUnloadLoad.java index 6fb559c..f369465 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestVectorUnloadLoad.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/TestVectorUnloadLoad.java @@ -240,6 +240,53 @@ public class TestVectorUnloadLoad { } } + @Test + public void testUnloadLoadDuplicates() throws IOException { + int count = 10; + Schema schema = new Schema(asList( + new Field("duplicate", FieldType.nullable(new ArrowType.Int(32, true)), Collections.<Field>emptyList()), + new Field("duplicate", FieldType.nullable(new ArrowType.Int(32, true)), Collections.<Field>emptyList()) + )); + + try ( + BufferAllocator originalVectorsAllocator = allocator.newChildAllocator("original vectors", 0, Integer.MAX_VALUE); + ) { + List<FieldVector> sources = new ArrayList<>(); + for (Field field: schema.getFields()) { + FieldVector vector = field.createVector(originalVectorsAllocator); + vector.allocateNew(); + sources.add(vector); + NullableIntVector.Mutator mutator = (NullableIntVector.Mutator) vector.getMutator(); + for (int i = 0; i < count; i++) { + mutator.set(i, i); + } + mutator.setValueCount(count); + } + + try (VectorSchemaRoot root = new VectorSchemaRoot(schema.getFields(), sources, count)) { + VectorUnloader vectorUnloader = new VectorUnloader(root); + try (ArrowRecordBatch recordBatch = vectorUnloader.getRecordBatch(); + BufferAllocator finalVectorsAllocator = allocator.newChildAllocator("final vectors", 0, Integer.MAX_VALUE); + VectorSchemaRoot newRoot = VectorSchemaRoot.create(schema, finalVectorsAllocator);) { + // load it + VectorLoader vectorLoader = new VectorLoader(newRoot); + vectorLoader.load(recordBatch); + + List<FieldVector> targets = newRoot.getFieldVectors(); + Assert.assertEquals(sources.size(), targets.size()); + for (int k = 0; k < sources.size(); k++) { + NullableIntVector.Accessor src = (NullableIntVector.Accessor) sources.get(k).getAccessor(); + NullableIntVector.Accessor tgt = (NullableIntVector.Accessor) targets.get(k).getAccessor(); + Assert.assertEquals(src.getValueCount(), tgt.getValueCount()); + for (int i = 0; i < count; i++) { + Assert.assertEquals(src.get(i), tgt.get(i)); + } + } + } + } + } + } + public static VectorUnloader newVectorUnloader(FieldVector root) { Schema schema = new Schema(root.getField().getChildren()); int valueCount = root.getAccessor().getValueCount();