Repository: drill Updated Branches: refs/heads/master 1b69869d9 -> a219f8784
DRILL-3537: Whe scanning files in ScanBatch, ignore all the empty files before reach a non-empty file Project: http://git-wip-us.apache.org/repos/asf/drill/repo Commit: http://git-wip-us.apache.org/repos/asf/drill/commit/a219f878 Tree: http://git-wip-us.apache.org/repos/asf/drill/tree/a219f878 Diff: http://git-wip-us.apache.org/repos/asf/drill/diff/a219f878 Branch: refs/heads/master Commit: a219f8784c55ce3bc15b9bb3a19d7b33e4021c00 Parents: 1b69869 Author: Hsuan-Yi Chu <hsua...@usc.edu> Authored: Thu Jul 23 17:20:17 2015 -0700 Committer: Parth Chandra <par...@apache.org> Committed: Mon Jul 27 14:23:33 2015 -0700 ---------------------------------------------------------------------- .../apache/drill/exec/physical/impl/ScanBatch.java | 12 +++++++++++- .../exec/vector/complex/writer/TestJsonReader.java | 16 ++++++++++++++++ .../store/json/jsonDirectoryWithEmpyFile/a.json | 0 .../store/json/jsonDirectoryWithEmpyFile/b.json | 3 +++ 4 files changed, 30 insertions(+), 1 deletion(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/drill/blob/a219f878/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/ScanBatch.java ---------------------------------------------------------------------- diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/ScanBatch.java b/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/ScanBatch.java index 6bf1280..4b91e1f 100644 --- a/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/ScanBatch.java +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/ScanBatch.java @@ -83,7 +83,7 @@ public class ScanBatch implements CloseableRecordBatch { private String partitionColumnDesignator; private boolean done = false; private SchemaChangeCallBack callBack = new SchemaChangeCallBack(); - + private boolean hasReadNonEmptyFile = false; public ScanBatch(PhysicalOperator subScanConfig, FragmentContext context, OperatorContext oContext, Iterator<RecordReader> readers, List<String[]> partitionColumns, List<Integer> selectedPartitionColumns) throws ExecutionSetupException { this.context = context; @@ -186,6 +186,15 @@ public class ScanBatch implements CloseableRecordBatch { return IterOutcome.NONE; } + // If all the files we have read so far are just empty, the schema is not useful + if(!hasReadNonEmptyFile) { + container.clear(); + for (ValueVector v : fieldVectorMap.values()) { + v.clear(); + } + fieldVectorMap.clear(); + } + currentReader.cleanup(); currentReader = readers.next(); partitionValues = partitionColumns.hasNext() ? partitionColumns.next() : null; @@ -208,6 +217,7 @@ public class ScanBatch implements CloseableRecordBatch { } } + hasReadNonEmptyFile = true; populatePartitionVectors(); // this is a slight misuse of this metric but it will allow Readers to report how many records they generated. http://git-wip-us.apache.org/repos/asf/drill/blob/a219f878/exec/java-exec/src/test/java/org/apache/drill/exec/vector/complex/writer/TestJsonReader.java ---------------------------------------------------------------------- diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/vector/complex/writer/TestJsonReader.java b/exec/java-exec/src/test/java/org/apache/drill/exec/vector/complex/writer/TestJsonReader.java index 912a5f0..7d6c71c 100644 --- a/exec/java-exec/src/test/java/org/apache/drill/exec/vector/complex/writer/TestJsonReader.java +++ b/exec/java-exec/src/test/java/org/apache/drill/exec/vector/complex/writer/TestJsonReader.java @@ -296,6 +296,22 @@ public class TestJsonReader extends BaseTestQuery { batchLoader.clear(); } + @Test + public void testJsonDirectoryWithEmptyFile() throws Exception { + String root = FileUtils.getResourceAsFile("/store/json/jsonDirectoryWithEmpyFile").toURI().toString(); + + String queryRightEmpty = String.format( + "select * from dfs_test.`%s`", root); + + testBuilder() + .sqlQuery(queryRightEmpty) + .unOrdered() + .baselineColumns("a") + .baselineValues(1l) + .build() + .run(); + } + private void testExistentColumns(RecordBatchLoader batchLoader) throws SchemaChangeException { VectorWrapper<?> vw = batchLoader.getValueAccessorById( RepeatedBigIntVector.class, // http://git-wip-us.apache.org/repos/asf/drill/blob/a219f878/exec/java-exec/src/test/resources/store/json/jsonDirectoryWithEmpyFile/a.json ---------------------------------------------------------------------- diff --git a/exec/java-exec/src/test/resources/store/json/jsonDirectoryWithEmpyFile/a.json b/exec/java-exec/src/test/resources/store/json/jsonDirectoryWithEmpyFile/a.json new file mode 100644 index 0000000..e69de29 http://git-wip-us.apache.org/repos/asf/drill/blob/a219f878/exec/java-exec/src/test/resources/store/json/jsonDirectoryWithEmpyFile/b.json ---------------------------------------------------------------------- diff --git a/exec/java-exec/src/test/resources/store/json/jsonDirectoryWithEmpyFile/b.json b/exec/java-exec/src/test/resources/store/json/jsonDirectoryWithEmpyFile/b.json new file mode 100644 index 0000000..73d0e9c --- /dev/null +++ b/exec/java-exec/src/test/resources/store/json/jsonDirectoryWithEmpyFile/b.json @@ -0,0 +1,3 @@ +{ + a : 1 +} \ No newline at end of file