DRILL-4919: Fix select count(1) / count(*) on csv with header This closes #714
Project: http://git-wip-us.apache.org/repos/asf/drill/repo Commit: http://git-wip-us.apache.org/repos/asf/drill/commit/34969583 Tree: http://git-wip-us.apache.org/repos/asf/drill/tree/34969583 Diff: http://git-wip-us.apache.org/repos/asf/drill/diff/34969583 Branch: refs/heads/master Commit: 34969583bfab410c80cb14a1c20249f097d5f7a7 Parents: 535623b Author: Arina Ielchiieva <[email protected]> Authored: Thu Dec 29 15:42:53 2016 +0000 Committer: Parth Chandra <[email protected]> Committed: Fri Jan 13 17:46:13 2017 -0800 ---------------------------------------------------------------------- .../compliant/CompliantTextRecordReader.java | 18 +++++++++++++++- .../drill/exec/store/text/TestCsvHeader.java | 22 ++++++++++++++++++-- 2 files changed, 37 insertions(+), 3 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/drill/blob/34969583/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/text/compliant/CompliantTextRecordReader.java ---------------------------------------------------------------------- diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/text/compliant/CompliantTextRecordReader.java b/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/text/compliant/CompliantTextRecordReader.java index d324270..ac4abb9 100644 --- a/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/text/compliant/CompliantTextRecordReader.java +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/text/compliant/CompliantTextRecordReader.java @@ -17,6 +17,7 @@ */ package org.apache.drill.exec.store.easy.text.compliant; +import com.google.common.collect.ImmutableList; import com.google.common.collect.Maps; import com.univocity.parsers.common.TextParsingException; import io.netty.buffer.DrillBuf; @@ -51,8 +52,12 @@ public class CompliantTextRecordReader extends AbstractRecordReader { static final org.slf4j.Logger logger = org.slf4j.LoggerFactory.getLogger(CompliantTextRecordReader.class); private static final int MAX_RECORDS_PER_BATCH = 8096; - static final int READ_BUFFER = 1024*1024; + private static final int READ_BUFFER = 1024*1024; private static final int WHITE_SPACE_BUFFER = 64*1024; + // When no named column is required, ask SCAN to return a DEFAULT column. + // If such column does not exist, it will be returned as a nullable-int column. + private static final List<SchemaPath> DEFAULT_NAMED_TEXT_COLS_TO_READ = + ImmutableList.of(SchemaPath.getSimplePath("_DEFAULT_COL_TO_READ_")); // settings to be used while parsing private TextParsingSettings settings; @@ -89,8 +94,19 @@ public class CompliantTextRecordReader extends AbstractRecordReader { return super.isStarQuery(); } + /** + * Returns list of default columns to read to replace empty list of columns. + * For text files without headers returns "columns[0]". + * Text files with headers do not support columns syntax, + * so when header extraction is enabled, returns fake named column "_DEFAULT_COL_TO_READ_". + * + * @return list of default columns to read + */ @Override protected List<SchemaPath> getDefaultColumnsToRead() { + if (settings.isHeaderExtractionEnabled()) { + return DEFAULT_NAMED_TEXT_COLS_TO_READ; + } return DEFAULT_TEXT_COLS_TO_READ; } http://git-wip-us.apache.org/repos/asf/drill/blob/34969583/exec/java-exec/src/test/java/org/apache/drill/exec/store/text/TestCsvHeader.java ---------------------------------------------------------------------- diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/store/text/TestCsvHeader.java b/exec/java-exec/src/test/java/org/apache/drill/exec/store/text/TestCsvHeader.java index a2e548b..cf54bb0 100644 --- a/exec/java-exec/src/test/java/org/apache/drill/exec/store/text/TestCsvHeader.java +++ b/exec/java-exec/src/test/java/org/apache/drill/exec/store/text/TestCsvHeader.java @@ -17,6 +17,7 @@ */ package org.apache.drill.exec.store.text; +import com.google.common.collect.Lists; import org.apache.drill.BaseTestQuery; import org.apache.drill.TestBuilder; import org.apache.drill.common.util.FileUtils; @@ -24,14 +25,14 @@ import org.apache.drill.common.util.FileUtils; import java.io.BufferedOutputStream; import java.io.File; import java.io.FileOutputStream; +import java.util.List; import org.junit.Before; import org.junit.Test; public class TestCsvHeader extends BaseTestQuery{ - static final org.slf4j.Logger logger = org.slf4j.LoggerFactory.getLogger(TestCsvHeader.class); - String root; + private String root; @Before public void initialize() throws Exception { @@ -185,4 +186,21 @@ public class TestCsvHeader extends BaseTestQuery{ } builder.go(); } + + @Test + public void testCountOnCsvWithHeader() throws Exception { + final String query = "select count(%s) as cnt from %s.`%s`"; + final List<Object> options = Lists.<Object>newArrayList("*", 1, "'A'"); + + for (Object option : options) { + testBuilder() + .sqlQuery(query, option, TEMP_SCHEMA, root) + .unOrdered() + .baselineColumns("cnt") + .baselineValues(4L) + .build() + .run(); + } + } + }
