Repository: arrow Updated Branches: refs/heads/master e3c167bd1 -> 65b74b350
ARROW-393: [JAVA] JSON file reader fails to set the buffer size on String data vector Fixed by calling setValueCount after setting the values instead of before. Since we set the inner vectors of NullableVarCharVector directly we don't have to worry about it's lastSet field and the way null values are handled. Author: Julien Le Dem <[email protected]> Closes #218 from julienledem/json_read_varchar and squashes the following commits: e147906 [Julien Le Dem] ARROW-393: [JAVA] JSON file reader fails to set the buffer size on String data vector Project: http://git-wip-us.apache.org/repos/asf/arrow/repo Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/65b74b35 Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/65b74b35 Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/65b74b35 Branch: refs/heads/master Commit: 65b74b350209ee3f930a00a0270e1d7c3d485c93 Parents: e3c167b Author: Julien Le Dem <[email protected]> Authored: Tue Nov 29 22:23:19 2016 -0500 Committer: Wes McKinney <[email protected]> Committed: Tue Nov 29 22:23:19 2016 -0500 ---------------------------------------------------------------------- .../org/apache/arrow/tools/Integration.java | 2 +- .../org/apache/arrow/tools/TestIntegration.java | 54 +++++++++++++++++++- .../arrow/vector/file/json/JsonFileReader.java | 8 ++- .../arrow/vector/schema/ArrowVectorType.java | 15 ++++++ 4 files changed, 72 insertions(+), 7 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/arrow/blob/65b74b35/java/tools/src/main/java/org/apache/arrow/tools/Integration.java ---------------------------------------------------------------------- diff --git a/java/tools/src/main/java/org/apache/arrow/tools/Integration.java b/java/tools/src/main/java/org/apache/arrow/tools/Integration.java index fa4bedc..85af30d 100644 --- a/java/tools/src/main/java/org/apache/arrow/tools/Integration.java +++ b/java/tools/src/main/java/org/apache/arrow/tools/Integration.java @@ -80,7 +80,7 @@ public class Integration { Schema schema = footer.getSchema(); LOGGER.debug("Input file size: " + arrowFile.length()); LOGGER.debug("Found schema: " + schema); - try (JsonFileWriter writer = new JsonFileWriter(jsonFile);) { + try (JsonFileWriter writer = new JsonFileWriter(jsonFile, JsonFileWriter.config().pretty(true));) { writer.start(schema); List<ArrowBlock> recordBatches = footer.getRecordBatches(); for (ArrowBlock rbBlock : recordBatches) { http://git-wip-us.apache.org/repos/asf/arrow/blob/65b74b35/java/tools/src/test/java/org/apache/arrow/tools/TestIntegration.java ---------------------------------------------------------------------- diff --git a/java/tools/src/test/java/org/apache/arrow/tools/TestIntegration.java b/java/tools/src/test/java/org/apache/arrow/tools/TestIntegration.java index bb69ed1..464144b 100644 --- a/java/tools/src/test/java/org/apache/arrow/tools/TestIntegration.java +++ b/java/tools/src/test/java/org/apache/arrow/tools/TestIntegration.java @@ -24,9 +24,12 @@ import static org.apache.arrow.tools.ArrowFileTestFixtures.writeData; import static org.apache.arrow.tools.ArrowFileTestFixtures.writeInput; import static org.junit.Assert.fail; +import java.io.BufferedReader; import java.io.File; import java.io.FileNotFoundException; import java.io.IOException; +import java.io.StringReader; +import java.util.Map; import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.memory.RootAllocator; @@ -44,6 +47,11 @@ import org.junit.Rule; import org.junit.Test; import org.junit.rules.TemporaryFolder; +import com.fasterxml.jackson.core.util.DefaultPrettyPrinter; +import com.fasterxml.jackson.core.util.DefaultPrettyPrinter.NopIndenter; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.fasterxml.jackson.databind.SerializationFeature; + public class TestIntegration { @Rule @@ -69,7 +77,7 @@ public class TestIntegration { File testOutFile = testFolder.newFile("testOut.arrow"); testOutFile.delete(); - // generate an arow file + // generate an arrow file writeInput(testInFile, allocator); Integration integration = new Integration(); @@ -91,6 +99,50 @@ public class TestIntegration { } @Test + public void testJSONRoundTripWithVariableWidth() throws Exception { + File testJSONFile = new File("../../integration/data/simple.json"); + File testOutFile = testFolder.newFile("testOut.arrow"); + File testRoundTripJSONFile = testFolder.newFile("testOut.json"); + testOutFile.delete(); + testRoundTripJSONFile.delete(); + + Integration integration = new Integration(); + + // convert to arrow + String[] args1 = { "-arrow", testOutFile.getAbsolutePath(), "-json", testJSONFile.getAbsolutePath(), "-command", Command.JSON_TO_ARROW.name()}; + integration.run(args1); + + // convert back to json + String[] args2 = { "-arrow", testOutFile.getAbsolutePath(), "-json", testRoundTripJSONFile.getAbsolutePath(), "-command", Command.ARROW_TO_JSON.name()}; + integration.run(args2); + + BufferedReader orig = readNormalized(testJSONFile); + BufferedReader rt = readNormalized(testRoundTripJSONFile); + String i, o; + int j = 0; + while ((i = orig.readLine()) != null && (o = rt.readLine()) != null) { + Assert.assertEquals("line: " + j, i, o); + ++j; + } + } + + private ObjectMapper om = new ObjectMapper(); + { + DefaultPrettyPrinter prettyPrinter = new DefaultPrettyPrinter(); + prettyPrinter.indentArraysWith(NopIndenter.instance); + om.setDefaultPrettyPrinter(prettyPrinter); + om.enable(SerializationFeature.INDENT_OUTPUT); + om.enable(SerializationFeature.ORDER_MAP_ENTRIES_BY_KEYS); + } + + private BufferedReader readNormalized(File f) throws IOException { + Map<?,?> tree = om.readValue(f, Map.class); + String normalized = om.writeValueAsString(tree); + return new BufferedReader(new StringReader(normalized)); + } + + + @Test public void testInvalid() throws Exception { File testValidInFile = testFolder.newFile("testValidIn.arrow"); File testInvalidInFile = testFolder.newFile("testInvalidIn.arrow"); http://git-wip-us.apache.org/repos/asf/arrow/blob/65b74b35/java/vector/src/main/java/org/apache/arrow/vector/file/json/JsonFileReader.java ---------------------------------------------------------------------- diff --git a/java/vector/src/main/java/org/apache/arrow/vector/file/json/JsonFileReader.java b/java/vector/src/main/java/org/apache/arrow/vector/file/json/JsonFileReader.java index f205982..26dd3f6 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/file/json/JsonFileReader.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/file/json/JsonFileReader.java @@ -22,6 +22,7 @@ import static com.fasterxml.jackson.core.JsonToken.END_OBJECT; import static com.fasterxml.jackson.core.JsonToken.START_ARRAY; import static com.fasterxml.jackson.core.JsonToken.START_OBJECT; import static java.nio.charset.StandardCharsets.UTF_8; +import static org.apache.arrow.vector.schema.ArrowVectorType.OFFSET; import java.io.File; import java.io.IOException; @@ -128,15 +129,12 @@ public class JsonFileReader implements AutoCloseable { valueVector.allocateNew(); Mutator mutator = valueVector.getMutator(); - int innerVectorCount = count; - if (vectorType.getName() == "OFFSET") { - innerVectorCount++; - } - mutator.setValueCount(innerVectorCount); + int innerVectorCount = vectorType.equals(OFFSET) ? count + 1 : count; for (int i = 0; i < innerVectorCount; i++) { parser.nextToken(); setValueFromParser(valueVector, i); } + mutator.setValueCount(innerVectorCount); readToken(END_ARRAY); } // if children http://git-wip-us.apache.org/repos/asf/arrow/blob/65b74b35/java/vector/src/main/java/org/apache/arrow/vector/schema/ArrowVectorType.java ---------------------------------------------------------------------- diff --git a/java/vector/src/main/java/org/apache/arrow/vector/schema/ArrowVectorType.java b/java/vector/src/main/java/org/apache/arrow/vector/schema/ArrowVectorType.java index 8fe8e48..68da705 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/schema/ArrowVectorType.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/schema/ArrowVectorType.java @@ -81,4 +81,19 @@ public class ArrowVectorType { public String toString() { return getName(); } + + @Override + public int hashCode() { + return type; + } + + @Override + public boolean equals(Object obj) { + if (obj instanceof ArrowVectorType) { + ArrowVectorType other = (ArrowVectorType) obj; + return type == other.type; + } + return false; + } + }
