[ 
https://issues.apache.org/jira/browse/FLINK-1208?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=14223609#comment-14223609
 ] 

ASF GitHub Bot commented on FLINK-1208:
---------------------------------------

Github user fhueske commented on a diff in the pull request:

    https://github.com/apache/incubator-flink/pull/201#discussion_r20826354
  
    --- Diff: 
flink-java/src/test/java/org/apache/flink/api/java/io/CsvInputFormatTest.java 
---
    @@ -48,6 +48,143 @@
        private static final String FIRST_PART = "That is the first part";
        
        private static final String SECOND_PART = "That is the second part";
    +   
    +   @Test
    +   public void ignoreInvalidLines() {
    +           try {
    +                   
    +                   
    +                   final String fileContent =  "#description of the 
data\n" + 
    +                                                                           
"header1|header2|header3|\n"+
    +                                                                           
"this is|1|2.0|\n"+
    +                                                                           
"//a comment\n" +
    +                                                                           
"a test|3|4.0|\n" +
    +                                                                           
"#next|5|6.0|\n";
    +                   
    +                   final FileInputSplit split = 
createTempFile(fileContent);
    +                   
    +                   CsvInputFormat<Tuple3<String, Integer, Double>> format 
= 
    +                                   new CsvInputFormat<Tuple3<String, 
Integer, Double>>(PATH, "\n", '|',  String.class, Integer.class, Double.class);
    +                   format.setLenient(true);
    +           
    +                   final Configuration parameters = new Configuration();
    +                   format.configure(parameters);
    +                   format.open(split);
    +                   
    +                   
    +                   Tuple3<String, Integer, Double> result = new 
Tuple3<String, Integer, Double>();
    +                   result = format.nextRecord(result);
    +                   assertNotNull(result);
    +                   assertEquals("this is", result.f0);
    +                   assertEquals(new Integer(1), result.f1);
    +                   assertEquals(new Double(2.0), result.f2);
    +                   
    +                   result = format.nextRecord(result);
    +                   assertNotNull(result);
    +                   assertEquals("a test", result.f0);
    +                   assertEquals(new Integer(3), result.f1);
    +                   assertEquals(new Double(4.0), result.f2);
    +                   
    +                   result = format.nextRecord(result);
    +                   assertNotNull(result);
    +                   assertEquals("#next", result.f0);
    +                   assertEquals(new Integer(5), result.f1);
    +                   assertEquals(new Double(6.0), result.f2);
    +
    +                   result = format.nextRecord(result);
    +                   assertNull(result);
    +           }
    +           catch (Exception ex) {
    +                   ex.printStackTrace();
    +                   fail("Test failed due to a " + ex.getClass().getName() 
+ ": " + ex.getMessage());
    +           }
    +   }
    +   
    +   @Test
    +   public void ignoreSingleCharPrefixComments() {
    +           try {
    +                   final String fileContent = "#description of the data\n" 
+
    +                                                                      
"#successive commented line\n" +
    +                                                                      
"this is|1|2.0|\n" +
    +                                                                      "a 
test|3|4.0|\n" +
    +                                                                      
"#next|5|6.0|\n";
    +                   
    +                   final FileInputSplit split = 
createTempFile(fileContent);
    +                   
    +                   CsvInputFormat<Tuple3<String, Integer, Double>> format 
= 
    +                                   new CsvInputFormat<Tuple3<String, 
Integer, Double>>(PATH, "\n", '|', String.class, Integer.class, Double.class);
    +                   format.setCommentPrefix("#");
    +           
    +                   final Configuration parameters = new Configuration();
    +                   format.configure(parameters);
    +                   format.open(split);
    +                   
    +                   Tuple3<String, Integer, Double> result = new 
Tuple3<String, Integer, Double>();
    +                   
    +                   result = format.nextRecord(result);
    +                   assertNotNull(result);
    +                   assertEquals("this is", result.f0);
    +                   assertEquals(new Integer(1), result.f1);
    +                   assertEquals(new Double(2.0), result.f2);
    +                   
    +                   result = format.nextRecord(result);
    +                   assertNotNull(result);
    +                   assertEquals("a test", result.f0);
    +                   assertEquals(new Integer(3), result.f1);
    +                   assertEquals(new Double(4.0), result.f2);
    +
    +                   result = format.nextRecord(result);
    +                   assertNull(result);
    +           }
    +           catch (Exception ex) {
    +                   ex.printStackTrace();
    +                   fail("Test failed due to a " + ex.getClass().getName() 
+ ": " + ex.getMessage());
    +           }
    +   }
    +   
    +   @Test
    +   public void ignoreMultiCharPrefixComments() {
    +           try {
    +                   
    +                   
    +                   final String fileContent = "//description of the 
data\n" +
    +                                                                      
"//successive commented line\n" +
    +                                                                      
"this is|1|2.0|\n"+
    +                                                                      "a 
test|3|4.0|\n" +
    +                                                                      
"//next|5|6.0|\n";
    +                   
    +                   final FileInputSplit split = 
createTempFile(fileContent);
    +                   
    +                   CsvInputFormat<Tuple3<String, Integer, Double>> format 
= 
    +                                   new CsvInputFormat<Tuple3<String, 
Integer, Double>>(PATH, "\n", '|', String.class, Integer.class, Double.class);
    +                   format.setCommentPrefix("//");
    +           
    +                   final Configuration parameters = new Configuration();
    +                   format.configure(parameters);
    +                   format.open(split);
    +                   
    +                   Tuple3<String, Integer, Double> result = new 
Tuple3<String, Integer, Double>();
    +                   
    +                   result = format.nextRecord(result);
    +                   assertNotNull(result);
    +                   assertEquals("this is", result.f0);
    +                   assertEquals(new Integer(1), result.f1);
    +                   assertEquals(new Double(2.0), result.f2);
    +                   
    +                   result = format.nextRecord(result);
    +                   assertNotNull(result);
    +                   assertEquals("a test", result.f0);
    +                   assertEquals(new Integer(3), result.f1);
    +                   assertEquals(new Double(4.0), result.f2);
    +                   
    +                   result = format.nextRecord(result);
    +                   assertNull(result);
    +           }
    +           catch (Exception ex) {
    +                   ex.printStackTrace();
    +                   fail("Test failed due to a " + ex.getClass().getName() 
+ ": " + ex.getMessage());
    +           }
    +   }
    --- End diff --
    
    Can you add a test case that checks for correct behavior of `lenient = 
false`?


> Skip comment lines in CSV input format. Allow user to specify comment 
> character.
> --------------------------------------------------------------------------------
>
>                 Key: FLINK-1208
>                 URL: https://issues.apache.org/jira/browse/FLINK-1208
>             Project: Flink
>          Issue Type: Improvement
>          Components: Java API, Scala API
>    Affects Versions: 0.8-incubating
>            Reporter: Aljoscha Krettek
>            Assignee: Felix Neutatz
>            Priority: Minor
>              Labels: starter
>
> The current skipFirstLine is limited. Skipping arbitrary lines that start 
> with a certain character would be much more flexible while still easy to 
> implement.



--
This message was sent by Atlassian JIRA
(v6.3.4#6332)

Reply via email to