DRILL-809: BitVector.allocateNewSafe fixes.
Project: http://git-wip-us.apache.org/repos/asf/incubator-drill/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-drill/commit/f7688be4 Tree: http://git-wip-us.apache.org/repos/asf/incubator-drill/tree/f7688be4 Diff: http://git-wip-us.apache.org/repos/asf/incubator-drill/diff/f7688be4 Branch: refs/heads/master Commit: f7688be48ad93ffae4351199d4ac4ec363aa3599 Parents: 0f8be86 Author: vkorukanti <[email protected]> Authored: Wed May 21 20:04:29 2014 -0700 Committer: Jacques Nadeau <[email protected]> Committed: Thu May 22 10:42:35 2014 -0700 ---------------------------------------------------------------------- .../exec/store/easy/json/JSONRecordReader2.java | 5 +- .../org/apache/drill/exec/vector/BitVector.java | 4 +- .../complex/writer/TestJsonReaderLargeFile.java | 101 +++++++++++++++++++ .../src/test/resources/storage-plugins.json | 3 + 4 files changed, 110 insertions(+), 3 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-drill/blob/f7688be4/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/json/JSONRecordReader2.java ---------------------------------------------------------------------- diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/json/JSONRecordReader2.java b/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/json/JSONRecordReader2.java index 37624d2..98c5ae3 100644 --- a/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/json/JSONRecordReader2.java +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/json/JSONRecordReader2.java @@ -84,11 +84,12 @@ public class JSONRecordReader2 implements RecordReader{ break; case NO_MORE: -// System.out.println("no more records - main loop"); break outside; case WRITE_FAILED: -// System.out.println("==== hit bounds at " + i); + if (i == 0) { + throw new DrillRuntimeException("Record is too big to fit into allocated ValueVector"); + } break outside; }; } http://git-wip-us.apache.org/repos/asf/incubator-drill/blob/f7688be4/exec/java-exec/src/main/java/org/apache/drill/exec/vector/BitVector.java ---------------------------------------------------------------------- diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/vector/BitVector.java b/exec/java-exec/src/main/java/org/apache/drill/exec/vector/BitVector.java index 323eae7..5952c6c 100644 --- a/exec/java-exec/src/main/java/org/apache/drill/exec/vector/BitVector.java +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/vector/BitVector.java @@ -74,9 +74,11 @@ public final class BitVector extends BaseDataValueVector implements FixedWidthVe public boolean allocateNewSafe() { clear(); if (allocationMonitor > 5) { - allocationValueCount = Math.min(1, (int)(allocationValueCount * 0.9)); + allocationValueCount = Math.max(1, (int)(allocationValueCount * 0.9)); + allocationMonitor = 0; } else if (allocationMonitor < -5) { allocationValueCount = (int) (allocationValueCount * 1.1); + allocationMonitor = 0; } clear(); http://git-wip-us.apache.org/repos/asf/incubator-drill/blob/f7688be4/exec/java-exec/src/test/java/org/apache/drill/exec/vector/complex/writer/TestJsonReaderLargeFile.java ---------------------------------------------------------------------- diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/vector/complex/writer/TestJsonReaderLargeFile.java b/exec/java-exec/src/test/java/org/apache/drill/exec/vector/complex/writer/TestJsonReaderLargeFile.java new file mode 100644 index 0000000..643100a --- /dev/null +++ b/exec/java-exec/src/test/java/org/apache/drill/exec/vector/complex/writer/TestJsonReaderLargeFile.java @@ -0,0 +1,101 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.drill.exec.vector.complex.writer; + +import org.apache.commons.io.FileUtils; +import org.apache.drill.BaseTestQuery; +import org.apache.drill.exec.record.RecordBatchLoader; +import org.apache.drill.exec.rpc.user.QueryResultBatch; +import org.apache.drill.exec.vector.BigIntVector; +import org.junit.AfterClass; +import org.junit.BeforeClass; +import org.junit.Test; + +import java.io.File; +import java.io.PrintWriter; +import java.util.List; + +import static org.junit.Assert.assertTrue; + +public class TestJsonReaderLargeFile extends BaseTestQuery { + static final org.slf4j.Logger logger = org.slf4j.LoggerFactory.getLogger(TestJsonReaderLargeFile.class); + + private static File dataFile = null; + private static int NUM_RECORDS = 15000; + + @BeforeClass + public static void generateTestData() throws Exception { + // Generate a json file with NUM_RECORDS number of records + while (true) { + dataFile = File.createTempFile("drill-json", ".json"); + if (dataFile.exists()) { + boolean success = dataFile.delete(); + if (success) { + break; + } + } + logger.trace("retry creating tmp file"); + } + + PrintWriter printWriter = new PrintWriter(dataFile); + String record = "{\n" + + "\"project\" : \"Drill\", \n" + + "\"summary\" : \"Apache Drill provides low latency ad-hoc queries to many different data sources, " + + "including nested data. Inspired by Google's Dremel, Drill is designed to scale to 10,000 servers and " + + "query petabytes of data in seconds.\"\n" + + "}"; + + for (int i=1; i<=NUM_RECORDS; i++) { + printWriter.println(record); + } + + printWriter.close(); + } + + @Test + public void testRead() throws Exception { + List<QueryResultBatch> results = testSqlWithResults( + String.format("SELECT count(*) FROM dfs.`default`.`%s`", dataFile.getPath())); + + RecordBatchLoader batchLoader = new RecordBatchLoader(getAllocator()); + + for(QueryResultBatch batch : results) { + batchLoader.load(batch.getHeader().getDef(), batch.getData()); + + if (batchLoader.getRecordCount() <= 0) { + continue; + } + + BigIntVector countV = (BigIntVector) batchLoader.getValueAccessorById(BigIntVector.class, 0).getValueVector(); + assertTrue("Total of "+ NUM_RECORDS + " records expected in count", countV.getAccessor().get(0) == NUM_RECORDS); + + batchLoader.clear(); + batch.release(); + } + } + + @AfterClass + public static void deleteTestData() throws Exception { + if (dataFile != null) { + if (dataFile.exists()) { + FileUtils.forceDelete(dataFile); + } + } + } +} http://git-wip-us.apache.org/repos/asf/incubator-drill/blob/f7688be4/exec/java-exec/src/test/resources/storage-plugins.json ---------------------------------------------------------------------- diff --git a/exec/java-exec/src/test/resources/storage-plugins.json b/exec/java-exec/src/test/resources/storage-plugins.json index 020805e..9503482 100644 --- a/exec/java-exec/src/test/resources/storage-plugins.json +++ b/exec/java-exec/src/test/resources/storage-plugins.json @@ -30,6 +30,9 @@ extensions: [ "tsv" ], delimiter: "\t" }, + "json" : { + type: "json" + }, "parquet" : { type: "parquet" }
