[ 
https://issues.apache.org/jira/browse/FLINK-3901?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=15332755#comment-15332755
 ] 

ASF GitHub Bot commented on FLINK-3901:
---------------------------------------

Github user fhueske commented on a diff in the pull request:

    https://github.com/apache/flink/pull/1989#discussion_r67257812
  
    --- Diff: 
flink-libraries/flink-table/src/test/java/org/apache/flink/api/java/io/RowCsvInputFormatTest.java
 ---
    @@ -0,0 +1,1075 @@
    +/*
    + * Licensed to the Apache Software Foundation (ASF) under one
    + * or more contributor license agreements.  See the NOTICE file
    + * distributed with this work for additional information
    + * regarding copyright ownership.  The ASF licenses this file
    + * to you under the Apache License, Version 2.0 (the
    + * "License"); you may not use this file except in compliance
    + * with the License.  You may obtain a copy of the License at
    + *
    + *     http://www.apache.org/licenses/LICENSE-2.0
    + *
    + * Unless required by applicable law or agreed to in writing, software
    + * distributed under the License is distributed on an "AS IS" BASIS,
    + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    + * See the License for the specific language governing permissions and
    + * limitations under the License.
    + */
    +
    +package org.apache.flink.api.java.io;
    +
    +import static org.hamcrest.CoreMatchers.is;
    +import static org.junit.Assert.assertEquals;
    +import static org.junit.Assert.assertNotNull;
    +import static org.junit.Assert.assertNull;
    +import static org.junit.Assert.assertThat;
    +import static org.junit.Assert.assertTrue;
    +import static org.junit.Assert.fail;
    +
    +import java.io.File;
    +import java.io.FileOutputStream;
    +import java.io.IOException;
    +import java.io.OutputStreamWriter;
    +import java.nio.charset.StandardCharsets;
    +import java.util.ArrayList;
    +import java.util.List;
    +
    +import org.apache.flink.api.common.io.ParseException;
    +import org.apache.flink.api.common.typeinfo.BasicTypeInfo;
    +import org.apache.flink.api.common.typeinfo.TypeInformation;
    +import org.apache.flink.api.java.typeutils.PojoTypeInfo;
    +import org.apache.flink.api.java.typeutils.TypeExtractor;
    +import org.apache.flink.api.table.Row;
    +import org.apache.flink.api.table.typeutils.RowTypeInfo;
    +import org.apache.flink.configuration.Configuration;
    +import org.apache.flink.core.fs.FileInputSplit;
    +import org.apache.flink.core.fs.Path;
    +import org.apache.flink.types.parser.FieldParser;
    +import org.apache.flink.types.parser.StringParser;
    +import org.junit.Test;
    +
    +public class RowCsvInputFormatTest {
    +
    +   private static final Path PATH = new Path("an/ignored/file/");
    +
    +   //Static variables for testing the removal of \r\n to \n
    +   private static final String FIRST_PART = "That is the first part";
    +
    +   private static final String SECOND_PART = "That is the second part";
    +
    +   @Test
    +   public void ignoreInvalidLines() {
    +           try {
    +                   String fileContent =
    +                                   "header1|header2|header3|\n"+
    +                                   "this is|1|2.0|\n"+
    +                                   "//a comment\n" +
    +                                   "a test|3|4.0|\n" +
    +                                   "#next|5|6.0|\n";
    +
    +                   FileInputSplit split = createTempFile(fileContent);
    +
    +                   RowTypeInfo typeInfo = new RowTypeInfo(new 
TypeInformation<?>[] {
    +                           BasicTypeInfo.STRING_TYPE_INFO,
    +                           BasicTypeInfo.INT_TYPE_INFO,
    +                           BasicTypeInfo.DOUBLE_TYPE_INFO
    +                   });
    +                   CsvInputFormat<Row> format = new 
RowCsvInputFormat(PATH, "\n", "|", typeInfo);
    +                   format.setLenient(false);
    +
    +                   Configuration parameters = new Configuration();
    +                   format.configure(parameters);
    +                   format.open(split);
    +
    +                   Row result = new Row(3);
    +
    +                   try {
    +                           result = format.nextRecord(result);
    +                           fail("Parse Exception was not thrown! (Invalid 
int value)");
    +                   } catch (ParseException ex) {
    +                   }
    +
    +                   // if format has lenient == false this can be asserted 
only after FLINK-3908
    +//                 result = format.nextRecord(result);
    --- End diff --
    
    Enable after FLINK-3908 was fixed.


> Create a RowCsvInputFormat to use as default CSV IF in Table API
> ----------------------------------------------------------------
>
>                 Key: FLINK-3901
>                 URL: https://issues.apache.org/jira/browse/FLINK-3901
>             Project: Flink
>          Issue Type: Improvement
>    Affects Versions: 1.0.2
>            Reporter: Flavio Pompermaier
>            Assignee: Flavio Pompermaier
>            Priority: Minor
>              Labels: csv, null-values, row, tuple
>
> At the moment the Table APIs reads CSVs using the TupleCsvInputFormat, that 
> has the big limitation of 25 fields and null handling.
> A new IF producing Row object is indeed necessary to avoid those limitations



--
This message was sent by Atlassian JIRA
(v6.3.4#6332)

Reply via email to