[ 
https://issues.apache.org/jira/browse/FLINK-3901?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=15332758#comment-15332758
 ] 

ASF GitHub Bot commented on FLINK-3901:
---------------------------------------

Github user fhueske commented on a diff in the pull request:

    https://github.com/apache/flink/pull/1989#discussion_r67257863
  
    --- Diff: 
flink-libraries/flink-table/src/test/java/org/apache/flink/api/java/io/RowCsvInputFormatTest.java
 ---
    @@ -0,0 +1,1075 @@
    +/*
    + * Licensed to the Apache Software Foundation (ASF) under one
    + * or more contributor license agreements.  See the NOTICE file
    + * distributed with this work for additional information
    + * regarding copyright ownership.  The ASF licenses this file
    + * to you under the Apache License, Version 2.0 (the
    + * "License"); you may not use this file except in compliance
    + * with the License.  You may obtain a copy of the License at
    + *
    + *     http://www.apache.org/licenses/LICENSE-2.0
    + *
    + * Unless required by applicable law or agreed to in writing, software
    + * distributed under the License is distributed on an "AS IS" BASIS,
    + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    + * See the License for the specific language governing permissions and
    + * limitations under the License.
    + */
    +
    +package org.apache.flink.api.java.io;
    +
    +import static org.hamcrest.CoreMatchers.is;
    +import static org.junit.Assert.assertEquals;
    +import static org.junit.Assert.assertNotNull;
    +import static org.junit.Assert.assertNull;
    +import static org.junit.Assert.assertThat;
    +import static org.junit.Assert.assertTrue;
    +import static org.junit.Assert.fail;
    +
    +import java.io.File;
    +import java.io.FileOutputStream;
    +import java.io.IOException;
    +import java.io.OutputStreamWriter;
    +import java.nio.charset.StandardCharsets;
    +import java.util.ArrayList;
    +import java.util.List;
    +
    +import org.apache.flink.api.common.io.ParseException;
    +import org.apache.flink.api.common.typeinfo.BasicTypeInfo;
    +import org.apache.flink.api.common.typeinfo.TypeInformation;
    +import org.apache.flink.api.java.typeutils.PojoTypeInfo;
    +import org.apache.flink.api.java.typeutils.TypeExtractor;
    +import org.apache.flink.api.table.Row;
    +import org.apache.flink.api.table.typeutils.RowTypeInfo;
    +import org.apache.flink.configuration.Configuration;
    +import org.apache.flink.core.fs.FileInputSplit;
    +import org.apache.flink.core.fs.Path;
    +import org.apache.flink.types.parser.FieldParser;
    +import org.apache.flink.types.parser.StringParser;
    +import org.junit.Test;
    +
    +public class RowCsvInputFormatTest {
    +
    +   private static final Path PATH = new Path("an/ignored/file/");
    +
    +   //Static variables for testing the removal of \r\n to \n
    +   private static final String FIRST_PART = "That is the first part";
    +
    +   private static final String SECOND_PART = "That is the second part";
    +
    +   @Test
    +   public void ignoreInvalidLines() {
    +           try {
    +                   String fileContent =
    +                                   "header1|header2|header3|\n"+
    +                                   "this is|1|2.0|\n"+
    +                                   "//a comment\n" +
    +                                   "a test|3|4.0|\n" +
    +                                   "#next|5|6.0|\n";
    +
    +                   FileInputSplit split = createTempFile(fileContent);
    +
    +                   RowTypeInfo typeInfo = new RowTypeInfo(new 
TypeInformation<?>[] {
    +                           BasicTypeInfo.STRING_TYPE_INFO,
    +                           BasicTypeInfo.INT_TYPE_INFO,
    +                           BasicTypeInfo.DOUBLE_TYPE_INFO
    +                   });
    +                   CsvInputFormat<Row> format = new 
RowCsvInputFormat(PATH, "\n", "|", typeInfo);
    +                   format.setLenient(false);
    +
    +                   Configuration parameters = new Configuration();
    +                   format.configure(parameters);
    +                   format.open(split);
    +
    +                   Row result = new Row(3);
    +
    +                   try {
    +                           result = format.nextRecord(result);
    +                           fail("Parse Exception was not thrown! (Invalid 
int value)");
    +                   } catch (ParseException ex) {
    +                   }
    +
    +                   // if format has lenient == false this can be asserted 
only after FLINK-3908
    +//                 result = format.nextRecord(result);
    +//                 assertNotNull(result);
    +//                 assertEquals("this is", result.productElement(0));
    +//                 assertEquals(new Integer(1), result.productElement(1));
    +//                 assertEquals(new Double(2.0), result.productElement(2));
    +//
    +//                 result = format.nextRecord(result);
    +//                 assertNotNull(result);
    +//                 assertEquals("a test", result.productElement(0));
    +//                 assertEquals(new Integer(3), result.productElement(1));
    +//                 assertEquals(new Double(4.0), result.productElement(2));
    +//
    +//                 result = format.nextRecord(result);
    +//                 assertNotNull(result);
    +//                 assertEquals("#next", result.productElement(0));
    +//                 assertEquals(new Integer(5), result.productElement(1));
    +//                 assertEquals(new Double(6.0), result.productElement(2));
    +//
    +//                 result = format.nextRecord(result);
    +//                 assertNull(result);
    +                   
    +                   //re-open with lenient = true
    +                   format.setLenient(true);
    +                   format.configure(parameters);
    +                   format.open(split);
    +
    +                   result = new Row(3);
    +
    +                   result = format.nextRecord(result);
    +                   assertNotNull(result);
    +                   assertEquals("header1", result.productElement(0));
    +                   assertNull(result.productElement(1));
    +                   assertNull(result.productElement(2));
    +                   
    +                   result = format.nextRecord(result);
    +                   assertNotNull(result);
    +                   assertEquals("this is", result.productElement(0));
    +                   assertEquals(new Integer(1), result.productElement(1));
    +                   assertEquals(new Double(2.0), result.productElement(2));
    +
    +                   result = format.nextRecord(result);
    +                   assertNotNull(result);
    +                   assertEquals("a test", result.productElement(0));
    +                   assertEquals(new Integer(3), result.productElement(1));
    +                   assertEquals(new Double(4.0), result.productElement(2));
    +
    +                   result = format.nextRecord(result);
    +                   assertNotNull(result);
    +                   assertEquals("#next", result.productElement(0));
    +                   assertEquals(new Integer(5), result.productElement(1));
    +                   assertEquals(new Double(6.0), result.productElement(2));
    +
    +                   result = format.nextRecord(result);
    +                   assertNull(result);
    +           }
    +           catch (Exception ex) {
    +                   ex.printStackTrace();
    +                   fail("Test failed due to a " + ex.getClass().getName() 
+ ": " + ex.getMessage());
    +           }
    +   }
    +
    +   @Test
    +   public void ignoreSingleCharPrefixComments() {
    +           try {
    +                   final String fileContent = 
    +                                   "#description of the data\n" +
    +                                   "#successive commented line\n" +
    +                                   "this is|1|2.0|\n" +
    +                                   "a test|3|4.0|\n" +
    +                                   "#next|5|6.0|\n";
    +
    +                   FileInputSplit split = createTempFile(fileContent);
    +
    +                   RowTypeInfo typeInfo = new RowTypeInfo(new 
TypeInformation<?>[] {
    +                           BasicTypeInfo.STRING_TYPE_INFO,
    +                           BasicTypeInfo.INT_TYPE_INFO,
    +                           BasicTypeInfo.DOUBLE_TYPE_INFO });
    +                   CsvInputFormat<Row> format = new 
RowCsvInputFormat(PATH, "\n", "|", typeInfo);
    +                   format.setCommentPrefix("#");
    +
    +                   Configuration parameters = new Configuration();
    +                   format.configure(parameters);
    +                   format.open(split);
    +
    +                   Row result = new Row(3);
    +
    +                   result = format.nextRecord(result);
    +                   assertNotNull(result);
    +                   assertEquals("this is", result.productElement(0));
    +                   assertEquals(new Integer(1), result.productElement(1));
    +                   assertEquals(new Double(2.0), result.productElement(2));
    +
    +                   result = format.nextRecord(result);
    +                   assertNotNull(result);
    +                   assertEquals("a test", result.productElement(0));
    +                   assertEquals(new Integer(3), result.productElement(1));
    +                   assertEquals(new Double(4.0), result.productElement(2));
    +
    +                   result = format.nextRecord(result);
    +                   assertNull(result);
    +           }
    +           catch (Exception ex) {
    +                   ex.printStackTrace();
    +                   fail("Test failed due to a " + ex.getClass().getName() 
+ ": " + ex.getMessage());
    +           }
    +   }
    +
    +   @Test
    +   public void ignoreMultiCharPrefixComments() {
    +           try {
    +
    +
    +                   final String fileContent = "//description of the 
data\n" +
    +                                   "//successive commented line\n" +
    +                                   "this is|1|2.0|\n"+
    +                                   "a test|3|4.0|\n" +
    +                                   "//next|5|6.0|\n";
    +
    +                   final FileInputSplit split = 
createTempFile(fileContent);
    +
    +                   final RowTypeInfo typeInfo = new RowTypeInfo(new 
TypeInformation<?>[] {
    +                           BasicTypeInfo.STRING_TYPE_INFO, 
BasicTypeInfo.INT_TYPE_INFO, BasicTypeInfo.DOUBLE_TYPE_INFO });
    +                   final CsvInputFormat<Row> format = new 
RowCsvInputFormat(PATH, "\n", "|", typeInfo);
    +                   format.setCommentPrefix("//");
    +
    +                   final Configuration parameters = new Configuration();
    +                   format.configure(parameters);
    +                   format.open(split);
    +
    +                   Row result = new Row(3);
    +
    +                   result = format.nextRecord(result);
    +                   assertNotNull(result);
    +                   assertEquals("this is", result.productElement(0));
    +                   assertEquals(new Integer(1), result.productElement(1));
    +                   assertEquals(new Double(2.0), result.productElement(2));
    +
    +                   result = format.nextRecord(result);
    +                   assertNotNull(result);
    +                   assertEquals("a test", result.productElement(0));
    +                   assertEquals(new Integer(3), result.productElement(1));
    +                   assertEquals(new Double(4.0), result.productElement(2));
    +
    +                   result = format.nextRecord(result);
    +                   assertNull(result);
    +           }
    +           catch (Exception ex) {
    +                   ex.printStackTrace();
    +                   fail("Test failed due to a " + ex.getClass().getName() 
+ ": " + ex.getMessage());
    +           }
    +   }
    +
    +   @Test
    +   public void readStringFields() {
    +           try {
    +                   String fileContent = "abc|def|ghijk\nabc||hhg\n|||";
    +                   FileInputSplit split = createTempFile(fileContent);
    +
    +                   RowTypeInfo typeInfo = new RowTypeInfo(new 
TypeInformation<?>[] {
    +                           BasicTypeInfo.STRING_TYPE_INFO,
    +                           BasicTypeInfo.STRING_TYPE_INFO,
    +                           BasicTypeInfo.STRING_TYPE_INFO
    +                   });
    +                   CsvInputFormat<Row> format = new 
RowCsvInputFormat(PATH, "\n", "|", typeInfo);
    +
    +                   final Configuration parameters = new Configuration();
    +                   format.configure(parameters);
    +                   format.open(split);
    +
    +                   Row result = new Row(3);
    +
    +                   result = format.nextRecord(result);
    +                   assertNotNull(result);
    +                   assertEquals("abc", result.productElement(0));
    +                   assertEquals("def", result.productElement(1));
    +                   assertEquals("ghijk", result.productElement(2));
    +
    +                   result = format.nextRecord(result);
    +                   assertNotNull(result);
    +                   assertEquals("abc", result.productElement(0));
    +                   assertEquals("", result.productElement(1));
    +                   assertEquals("hhg", result.productElement(2));
    +
    +                   result = format.nextRecord(result);
    +                   assertNotNull(result);
    +                   assertEquals("", result.productElement(0));
    +                   assertEquals("", result.productElement(1));
    +                   assertEquals("", result.productElement(2));
    +
    +                   result = format.nextRecord(result);
    +                   assertNull(result);
    +                   assertTrue(format.reachedEnd());
    +           }
    +           catch (Exception ex) {
    +                   ex.printStackTrace();
    +                   fail("Test failed due to a " + ex.getClass().getName() 
+ ": " + ex.getMessage());
    +           }
    +   }
    +
    +   @Test
    +   public void readMixedQuotedStringFields() {
    +           try {
    +                   String fileContent = 
"@a|b|c@|def|@ghijk@\nabc||@|hhg@\n|||";
    +                   FileInputSplit split = createTempFile(fileContent);
    +
    +                   RowTypeInfo typeInfo = new RowTypeInfo(new 
TypeInformation<?>[] {
    +                           BasicTypeInfo.STRING_TYPE_INFO,
    +                           BasicTypeInfo.STRING_TYPE_INFO,
    +                           BasicTypeInfo.STRING_TYPE_INFO
    +                   });
    +                   CsvInputFormat<Row> format = new 
RowCsvInputFormat(PATH, "\n", "|", typeInfo);
    +
    +                   Configuration parameters = new Configuration();
    +                   format.configure(parameters);
    +                   format.enableQuotedStringParsing('@');
    +                   format.open(split);
    +
    +                   Row result = new Row(3);
    +
    +                   result = format.nextRecord(result);
    +                   assertNotNull(result);
    +                   assertEquals("a|b|c", result.productElement(0));
    +                   assertEquals("def", result.productElement(1));
    +                   assertEquals("ghijk", result.productElement(2));
    +
    +                   result = format.nextRecord(result);
    +                   assertNotNull(result);
    +                   assertEquals("abc", result.productElement(0));
    +                   assertEquals("", result.productElement(1));
    +                   assertEquals("|hhg", result.productElement(2));
    +
    +                   result = format.nextRecord(result);
    +                   assertNotNull(result);
    +                   assertEquals("", result.productElement(0));
    +                   assertEquals("", result.productElement(1));
    +                   assertEquals("", result.productElement(2));
    +
    +                   result = format.nextRecord(result);
    +                   assertNull(result);
    +                   assertTrue(format.reachedEnd());
    +           }
    +           catch (Exception ex) {
    +                   ex.printStackTrace();
    +                   fail("Test failed due to a " + ex.getClass().getName() 
+ ": " + ex.getMessage());
    +           }
    +   }
    +
    +   @Test
    +   public void readStringFieldsWithTrailingDelimiters() {
    +           try {
    +                   String fileContent = 
"abc|-def|-ghijk\nabc|-|-hhg\n|-|-|-\n";
    +                   FileInputSplit split = createTempFile(fileContent);
    +
    +                   RowTypeInfo typeInfo = new RowTypeInfo(new 
TypeInformation<?>[] { 
    +                           BasicTypeInfo.STRING_TYPE_INFO,
    +                           BasicTypeInfo.STRING_TYPE_INFO,
    +                           BasicTypeInfo.STRING_TYPE_INFO
    +                   });
    +                   CsvInputFormat<Row> format = new 
RowCsvInputFormat(PATH, "\n", "|", typeInfo);
    +
    +                   format.setFieldDelimiter("|-");
    +
    +                   format.configure(new Configuration());
    +                   format.open(split);
    +
    +                   Row result = new Row(3);
    +
    +                   result = format.nextRecord(result);
    +                   assertNotNull(result);
    +                   assertEquals("abc", result.productElement(0));
    +                   assertEquals("def", result.productElement(1));
    +                   assertEquals("ghijk", result.productElement(2));
    +
    +                   result = format.nextRecord(result);
    +                   assertNotNull(result);
    +                   assertEquals("abc", result.productElement(0));
    +                   assertEquals("", result.productElement(1));
    +                   assertEquals("hhg", result.productElement(2));
    +
    +                   result = format.nextRecord(result);
    +                   assertNotNull(result);
    +                   assertEquals("", result.productElement(0));
    +                   assertEquals("", result.productElement(1));
    +                   assertEquals("", result.productElement(2));
    +
    +                   result = format.nextRecord(result);
    +                   assertNull(result);
    +                   assertTrue(format.reachedEnd());
    +           }
    +           catch (Exception ex) {
    +                   fail("Test failed due to a " + ex.getClass().getName() 
+ ": " + ex.getMessage());
    +           }
    +   }
    +
    +   @Test
    +   public void testIntegerFields() throws IOException {
    +           try {
    +                   String fileContent = 
"111|222|333|444|555\n666|777|888|999|000|\n";
    +                   FileInputSplit split = createTempFile(fileContent);
    +
    +                   RowTypeInfo typeInfo = new RowTypeInfo(
    +                                   new TypeInformation<?>[] {
    +                                           BasicTypeInfo.INT_TYPE_INFO,
    +                                           BasicTypeInfo.INT_TYPE_INFO,
    +                                           BasicTypeInfo.INT_TYPE_INFO,
    +                                           BasicTypeInfo.INT_TYPE_INFO,
    +                                           BasicTypeInfo.INT_TYPE_INFO
    +                                   });
    +                   CsvInputFormat<Row> format = new 
RowCsvInputFormat(PATH, "\n", "|", typeInfo);
    +
    +                   format.setFieldDelimiter("|");
    +
    +                   format.configure(new Configuration());
    +                   format.open(split);
    +
    +                   Row result = new Row(5);
    +
    +                   result = format.nextRecord(result);
    +                   assertNotNull(result);
    +                   assertEquals(Integer.valueOf(111), 
result.productElement(0));
    +                   assertEquals(Integer.valueOf(222), 
result.productElement(1));
    +                   assertEquals(Integer.valueOf(333), 
result.productElement(2));
    +                   assertEquals(Integer.valueOf(444), 
result.productElement(3));
    +                   assertEquals(Integer.valueOf(555), 
result.productElement(4));
    +
    +                   result = format.nextRecord(result);
    +                   assertNotNull(result);
    +                   assertEquals(Integer.valueOf(666), 
result.productElement(0));
    +                   assertEquals(Integer.valueOf(777), 
result.productElement(1));
    +                   assertEquals(Integer.valueOf(888), 
result.productElement(2));
    +                   assertEquals(Integer.valueOf(999), 
result.productElement(3));
    +                   assertEquals(Integer.valueOf(000), 
result.productElement(4));
    +
    +                   result = format.nextRecord(result);
    +                   assertNull(result);
    +                   assertTrue(format.reachedEnd());
    +           }
    +           catch (Exception ex) {
    +                   fail("Test failed due to a " + ex.getClass().getName() 
+ ": " + ex.getMessage());
    +           }
    +   }
    +
    +   @Test
    +   public void testEmptyFields() throws IOException {
    +           try{
    +                   String fileContent = 
    +                                   "|0|0|0|0|0|\n" +
    +                                   "1||1|1|1|1|\n" +
    +                                   "2|2||2|2|2|\n" +
    +                                   "3|3|3||3|3|\n" +
    +                                   "4|4|4|4||4|\n" +
    +                                   "5|5|5|5|5||\n";
    +                   
    +                   FileInputSplit split = createTempFile(fileContent);
    +                   
    +                   //TODO: FLOAT_TYPE_INFO and DOUBLE_TYPE_INFO don't 
handle correctly null values
    +                   RowTypeInfo typeInfo = new RowTypeInfo(new 
TypeInformation<?>[] {
    +                           BasicTypeInfo.SHORT_TYPE_INFO,
    +                           BasicTypeInfo.INT_TYPE_INFO,
    +                           BasicTypeInfo.LONG_TYPE_INFO,
    +                           BasicTypeInfo.INT_TYPE_INFO,
    +//                         BasicTypeInfo.FLOAT_TYPE_INFO,
    --- End diff --
    
    Can these be removed?


> Create a RowCsvInputFormat to use as default CSV IF in Table API
> ----------------------------------------------------------------
>
>                 Key: FLINK-3901
>                 URL: https://issues.apache.org/jira/browse/FLINK-3901
>             Project: Flink
>          Issue Type: Improvement
>    Affects Versions: 1.0.2
>            Reporter: Flavio Pompermaier
>            Assignee: Flavio Pompermaier
>            Priority: Minor
>              Labels: csv, null-values, row, tuple
>
> At the moment the Table APIs reads CSVs using the TupleCsvInputFormat, that 
> has the big limitation of 25 fields and null handling.
> A new IF producing Row object is indeed necessary to avoid those limitations



--
This message was sent by Atlassian JIRA
(v6.3.4#6332)

Reply via email to