[drill] 04/04: DRILL-7335: Fix error when reading csv file with headers only

ihuzenko Mon, 05 Aug 2019 00:39:17 -0700

This is an automated email from the ASF dual-hosted git repository.

ihuzenko pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/drill.git


commit 7bd442d02093240bc31aca827d57e12eb4c2f4f5
Author: Arina Ielchiieva <[email protected]>
AuthorDate: Thu Aug 1 20:18:34 2019 +0300

    DRILL-7335: Fix error when reading csv file with headers only
    
    closes #1834
---
 .../exec/store/easy/text/reader/TextInput.java     | 10 ++--
 .../easy/text/compliant/TestCsvWithHeaders.java    | 62 ++++++++++++++--------
 2 files changed, 46 insertions(+), 26 deletions(-)

diff --git 
a/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/text/reader/TextInput.java
 
b/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/text/reader/TextInput.java
index 3e05e58..d9fa973 100644
--- 
a/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/text/reader/TextInput.java
+++ 
b/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/text/reader/TextInput.java
@@ -86,9 +86,9 @@ final class TextInput {
   /**
    * Creates a new instance with the mandatory characters for handling newlines
    * transparently. lineSeparator the sequence of characters that represent a
-   * newline, as defined in {@link Format#getLineSeparator()}
+   * newline, as defined in {@link TextParsingSettings#getNewLineDelimiter()}
    * normalizedLineSeparator the normalized newline character (as defined in
-   * {@link Format#getNormalizedNewline()}) that is used to replace any
+   * {@link TextParsingSettings#getNormalizedNewLine()}) that is used to 
replace any
    * lineSeparator sequence found in the input.
    */
   public TextInput(TextParsingSettings settings, InputStream input, DrillBuf 
readBuffer, long startPos, long endPos) {
@@ -142,7 +142,11 @@ final class TextInput {
       if (startPos > 0 || settings.isSkipFirstLine()) {
 
         // move to next full record.
-        skipLines(1);
+        try {
+          skipLines(1);
+        } catch (StreamFinishedPseudoException e) {
+          // file does not have any more lines, ignore
+        }
       }
     }
   }
diff --git 
a/exec/java-exec/src/test/java/org/apache/drill/exec/store/easy/text/compliant/TestCsvWithHeaders.java
 
b/exec/java-exec/src/test/java/org/apache/drill/exec/store/easy/text/compliant/TestCsvWithHeaders.java
index 8a2e8b0..4d4202c 100644
--- 
a/exec/java-exec/src/test/java/org/apache/drill/exec/store/easy/text/compliant/TestCsvWithHeaders.java
+++ 
b/exec/java-exec/src/test/java/org/apache/drill/exec/store/easy/text/compliant/TestCsvWithHeaders.java
@@ -23,7 +23,10 @@ import static org.junit.Assert.assertNull;
 import static org.junit.Assert.assertTrue;
 import static org.junit.Assert.fail;
 
+import java.io.File;
+import java.io.FileWriter;
 import java.io.IOException;
+import java.io.PrintWriter;
 import java.util.Iterator;
 
 import org.apache.drill.categories.RowSetTests;
@@ -55,34 +58,34 @@ import org.junit.experimental.categories.Category;
  * the first batch either contains data, or that the first batch is empty
  * only if there is no data at all to be read.
  *
- * @see {@link TestHeaderBuilder}
+ * @see TestHeaderBuilder
  */
 
 @Category(RowSetTests.class)
 public class TestCsvWithHeaders extends BaseCsvTest {
 
   private static final String TEST_FILE_NAME = "basic.csv";
+  private static final String COLUMNS_FILE_NAME = "columns.csv";
+  private static final String EMPTY_HEADERS_FILE = "noHeaders.csv";
 
-  private static String invalidHeaders[] = {
+  private static String[] invalidHeaders = {
       "$,,9b,c,c,c_2",
       "10,foo,bar,fourth,fifth,sixth"
   };
 
-  private static String emptyHeaders[] = {
+  private static String[] emptyHeaders = {
       "",
       "10,foo,bar"
   };
 
-  private static String raggedRows[] = {
+  private static String[] raggedRows = {
       "a,b,c",
       "10,dino",
       "20,foo,bar",
       "30"
   };
 
-  public static final String COLUMNS_FILE_NAME = "columns.csv";
-
-  private static String columnsCol[] = {
+  private static String[] columnsCol = {
       "author,columns",
       "fred,\"Rocks Today,Dino Wrangling\"",
       "barney,Bowlarama"
@@ -109,7 +112,7 @@ public class TestCsvWithHeaders extends BaseCsvTest {
    * <br><tt>SELECT * FROM VALUES ();</tt><br>
    * The implementation tested here follows that pattern.
    *
-   * @see {@link TestCsvWithoutHeaders#testEmptyFile()}
+   * @see TestCsvWithoutHeaders#testEmptyFile()
    */
   @Test
   public void testEmptyFile() throws IOException {
@@ -118,8 +121,6 @@ public class TestCsvWithHeaders extends BaseCsvTest {
     assertNull(rowSet);
   }
 
-  private static final String EMPTY_HEADERS_FILE = "noheaders.csv";
-
   /**
    * Trivial case: empty header. This case should fail.
    */
@@ -188,10 +189,6 @@ public class TestCsvWithHeaders extends BaseCsvTest {
     RowSetUtilities.verify(expected, actual);
   }
 
-  private String makeStatement(String fileName) {
-    return "SELECT * FROM `dfs.data`.`" + fileName + "`";
-  }
-
   /**
    * Verify that the wildcard expands columns to the header names, including
    * case
@@ -352,9 +349,8 @@ public class TestCsvWithHeaders extends BaseCsvTest {
    * files are nested to another level.)
    */
   @Test
-  public void testPartitionExpansion() throws IOException {
-    String sql = "SELECT * FROM `dfs.data`.`%s`";
-    Iterator<DirectRowSet> iter = client.queryBuilder().sql(sql, 
PART_DIR).rowSetIterator();
+  public void testPartitionExpansion() {
+    Iterator<DirectRowSet> iter = 
client.queryBuilder().sql(makeStatement(PART_DIR)).rowSetIterator();
 
     TupleMetadata expectedSchema = new SchemaBuilder()
         .add("a", MinorType.VARCHAR)
@@ -407,7 +403,7 @@ public class TestCsvWithHeaders extends BaseCsvTest {
    * partition column moves after data columns.
    */
   @Test
-  public void testWilcardAndPartitionsMultiFiles() throws IOException {
+  public void testWildcardAndPartitionsMultiFiles() {
     String sql = "SELECT *, dir0, dir1 FROM `dfs.data`.`%s`";
     Iterator<DirectRowSet> iter = client.queryBuilder().sql(sql, 
PART_DIR).rowSetIterator();
 
@@ -464,7 +460,7 @@ public class TestCsvWithHeaders extends BaseCsvTest {
    * are consistent even when used across multiple scans.
    */
   @Test
-  public void doTestExplicitPartitionsMultiFiles() throws IOException {
+  public void doTestExplicitPartitionsMultiFiles() {
     String sql = "SELECT a, b, c, dir0, dir1 FROM `dfs.data`.`%s`";
     Iterator<DirectRowSet> iter = client.queryBuilder().sql(sql, 
PART_DIR).rowSetIterator();
 
@@ -537,7 +533,6 @@ public class TestCsvWithHeaders extends BaseCsvTest {
    * The column name `columns` is treated as a plain old
    * column when using column headers. If used with an index,
    * validation will fail because the VarChar column is not an array
-   * @throws Exception
    */
   @Test
   public void testColumnsIndex() throws Exception {
@@ -572,7 +567,6 @@ public class TestCsvWithHeaders extends BaseCsvTest {
   /**
    * If columns[x] is used, then this can't possibly match a valid
    * text reader column, so raise an error instead.
-   * @throws Exception
    */
   @Test
   public void testColumnsIndexMissing() throws Exception {
@@ -594,8 +588,7 @@ public class TestCsvWithHeaders extends BaseCsvTest {
   @Test
   public void testHugeColumn() throws IOException {
     String fileName = buildBigColFile(true);
-    String sql = "SELECT * FROM `dfs.data`.`%s`";
-    RowSet actual = client.queryBuilder().sql(sql, fileName).rowSet();
+    RowSet actual = 
client.queryBuilder().sql(makeStatement(fileName)).rowSet();
     assertEquals(10, actual.rowCount());
     RowSetReader reader = actual.reader();
     while (reader.next()) {
@@ -610,4 +603,27 @@ public class TestCsvWithHeaders extends BaseCsvTest {
     }
     actual.clear();
   }
+
+  @Test
+  public void testHeadersOnly() throws Exception {
+    String fileName = "headersOnly.csv";
+    try (PrintWriter out = new PrintWriter(new FileWriter(new File(testDir, 
fileName)))) {
+      out.print("a,b,c"); // note: no \n in the end
+    }
+
+    RowSet actual = 
client.queryBuilder().sql(makeStatement(fileName)).rowSet();
+
+    TupleMetadata expectedSchema = new SchemaBuilder()
+      .add("a", MinorType.VARCHAR)
+      .add("b", MinorType.VARCHAR)
+      .add("c", MinorType.VARCHAR)
+      .buildSchema();
+    RowSet expected = new RowSetBuilder(client.allocator(), expectedSchema)
+      .build();
+    RowSetUtilities.verify(expected, actual);
+  }
+
+  private String makeStatement(String fileName) {
+    return "SELECT * FROM `dfs.data`.`" + fileName + "`";
+  }
 }

[drill] 04/04: DRILL-7335: Fix error when reading csv file with headers only

Reply via email to