[ 
https://issues.apache.org/jira/browse/FLINK-1208?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=14220734#comment-14220734
 ] 

ASF GitHub Bot commented on FLINK-1208:
---------------------------------------

Github user fhueske commented on a diff in the pull request:

    https://github.com/apache/incubator-flink/pull/201#discussion_r20706556
  
    --- Diff: 
flink-core/src/test/java/org/apache/flink/api/common/io/GenericCsvInputFormatTest.java
 ---
    @@ -271,6 +271,138 @@ public void testSparseParseWithIndices() {
        }
        
        @Test
    +   public void testIgnoreInvalidInput() throws IOException {
    +           try {
    +                   final String fileContent = "#description of the data\n" 
+ 
    +                                                                      
"header1|header2|header3|\n"+
    +                                                                      
"this is|1|2.0|\n"+
    +                                                                      "//a 
comment\n" +
    +                                                                      "a 
test|3|4.0|\n" +
    +                                                                      
"#next|5|6.0|\n";
    +                   
    +                   final FileInputSplit split = 
createTempFile(fileContent);       
    +           
    +                   final Configuration parameters = new Configuration();
    +                   format.setFieldDelimiter('|');
    +                   format.setFieldTypesGeneric(StringValue.class, 
IntValue.class, DoubleValue.class);
    +                   format.setLenient(true);
    +                   
    +                   format.configure(parameters);
    +                   format.open(split);
    +                   
    +                   Value[] values;
    +                   
    +                   values = format.nextRecord(new Value[] { new 
StringValue(), new IntValue(), new DoubleValue() });
    +                   assertNull(values);
    +                   values = format.nextRecord(new Value[] { new 
StringValue(), new IntValue(), new DoubleValue() });
    +                   assertNull(values);
    +                   
    +                   values = format.nextRecord(new Value[] { new 
StringValue(), new IntValue(), new DoubleValue() });
    +                   assertEquals("this is", ((StringValue) 
values[0]).getValue());
    +                   assertEquals(1, ((IntValue) values[1]).getValue());
    +                   assertEquals(2.0, ((DoubleValue) values[2]).getValue(), 
0.001);
    +                   
    +                   values = format.nextRecord(new Value[] { new 
StringValue(), new IntValue(), new DoubleValue() });
    +                   assertNull(values);
    +                   
    +                   values = format.nextRecord(new Value[] { new 
StringValue(), new IntValue(), new DoubleValue() });
    +                   assertEquals("a test", ((StringValue) 
values[0]).getValue());
    +                   assertEquals(3, ((IntValue) values[1]).getValue());
    +                   assertEquals(4.0, ((DoubleValue) values[2]).getValue(), 
0.001);
    +                   
    +                   values = format.nextRecord(new Value[] { new 
StringValue(), new IntValue(), new DoubleValue() });
    +                   assertEquals("#next", ((StringValue) 
values[0]).getValue());
    +                   assertEquals(5, ((IntValue) values[1]).getValue());
    +                   assertEquals(6.0, ((DoubleValue) values[2]).getValue(), 
0.001);
    +           }
    +           catch (Exception ex) {
    +                   fail("Test failed due to a " + 
ex.getClass().getSimpleName() + ": " + ex.getMessage());
    +           }
    +   }
    +   
    +   @Test
    +   public void testIgnoreSingleCharPrefixComments() throws IOException {
    +           try {
    +                   final String fileContent = "#description of the data\n" 
+ 
    +                                                                      
"this is|1|2.0|\n"+
    +                                                                      "a 
test|3|4.0|#comment after record\n" +
    +                                                                      
"#next|5|6.0|\n";
    +                   
    +                   final FileInputSplit split = 
createTempFile(fileContent);       
    +           
    +                   final Configuration parameters = new Configuration();
    +                   format.setFieldDelimiter('|');
    +                   format.setFieldTypesGeneric(StringValue.class, 
IntValue.class, DoubleValue.class);
    +                   format.setCommentPrefix("#");
    +                   
    +                   format.configure(parameters);
    +                   format.open(split);
    +                   
    +                   Value[] values;
    +                   
    +                   values = format.nextRecord(new Value[] { new 
StringValue(), new IntValue(), new DoubleValue() });
    +                   assertNull(values);
    +                   
    +                   values = format.nextRecord(new Value[] { new 
StringValue(), new IntValue(), new DoubleValue() });
    +                   assertEquals("this is", ((StringValue) 
values[0]).getValue());
    +                   assertEquals(1, ((IntValue) values[1]).getValue());
    +                   assertEquals(2.0, ((DoubleValue) values[2]).getValue(), 
0.001);
    +                   
    +                   values = format.nextRecord(new Value[] { new 
StringValue(), new IntValue(), new DoubleValue() });
    +                   assertEquals("a test", ((StringValue) 
values[0]).getValue());
    +                   assertEquals(3, ((IntValue) values[1]).getValue());
    +                   assertEquals(4.0, ((DoubleValue) values[2]).getValue(), 
0.001);
    +                   
    +                   values = format.nextRecord(new Value[] { new 
StringValue(), new IntValue(), new DoubleValue() });
    +                   assertNull(values);
    +           }
    +           catch (Exception ex) {
    +                   fail("Test failed due to a " + 
ex.getClass().getSimpleName() + ": " + ex.getMessage());
    +           }
    +   }
    +   
    +   @Test
    +   public void testIgnoreMultiCharPrefixComments() throws IOException {
    +           try {
    --- End diff --
    
    what about this test case?


> Skip comment lines in CSV input format. Allow user to specify comment 
> character.
> --------------------------------------------------------------------------------
>
>                 Key: FLINK-1208
>                 URL: https://issues.apache.org/jira/browse/FLINK-1208
>             Project: Flink
>          Issue Type: Improvement
>          Components: Java API, Scala API
>    Affects Versions: 0.8-incubating
>            Reporter: Aljoscha Krettek
>            Assignee: Felix Neutatz
>            Priority: Minor
>              Labels: starter
>
> The current skipFirstLine is limited. Skipping arbitrary lines that start 
> with a certain character would be much more flexible while still easy to 
> implement.



--
This message was sent by Atlassian JIRA
(v6.3.4#6332)

Reply via email to