[
https://issues.apache.org/jira/browse/FLINK-1208?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=14223609#comment-14223609
]
ASF GitHub Bot commented on FLINK-1208:
---------------------------------------
Github user fhueske commented on a diff in the pull request:
https://github.com/apache/incubator-flink/pull/201#discussion_r20826354
--- Diff:
flink-java/src/test/java/org/apache/flink/api/java/io/CsvInputFormatTest.java
---
@@ -48,6 +48,143 @@
private static final String FIRST_PART = "That is the first part";
private static final String SECOND_PART = "That is the second part";
+
+ @Test
+ public void ignoreInvalidLines() {
+ try {
+
+
+ final String fileContent = "#description of the
data\n" +
+
"header1|header2|header3|\n"+
+
"this is|1|2.0|\n"+
+
"//a comment\n" +
+
"a test|3|4.0|\n" +
+
"#next|5|6.0|\n";
+
+ final FileInputSplit split =
createTempFile(fileContent);
+
+ CsvInputFormat<Tuple3<String, Integer, Double>> format
=
+ new CsvInputFormat<Tuple3<String,
Integer, Double>>(PATH, "\n", '|', String.class, Integer.class, Double.class);
+ format.setLenient(true);
+
+ final Configuration parameters = new Configuration();
+ format.configure(parameters);
+ format.open(split);
+
+
+ Tuple3<String, Integer, Double> result = new
Tuple3<String, Integer, Double>();
+ result = format.nextRecord(result);
+ assertNotNull(result);
+ assertEquals("this is", result.f0);
+ assertEquals(new Integer(1), result.f1);
+ assertEquals(new Double(2.0), result.f2);
+
+ result = format.nextRecord(result);
+ assertNotNull(result);
+ assertEquals("a test", result.f0);
+ assertEquals(new Integer(3), result.f1);
+ assertEquals(new Double(4.0), result.f2);
+
+ result = format.nextRecord(result);
+ assertNotNull(result);
+ assertEquals("#next", result.f0);
+ assertEquals(new Integer(5), result.f1);
+ assertEquals(new Double(6.0), result.f2);
+
+ result = format.nextRecord(result);
+ assertNull(result);
+ }
+ catch (Exception ex) {
+ ex.printStackTrace();
+ fail("Test failed due to a " + ex.getClass().getName()
+ ": " + ex.getMessage());
+ }
+ }
+
+ @Test
+ public void ignoreSingleCharPrefixComments() {
+ try {
+ final String fileContent = "#description of the data\n"
+
+
"#successive commented line\n" +
+
"this is|1|2.0|\n" +
+ "a
test|3|4.0|\n" +
+
"#next|5|6.0|\n";
+
+ final FileInputSplit split =
createTempFile(fileContent);
+
+ CsvInputFormat<Tuple3<String, Integer, Double>> format
=
+ new CsvInputFormat<Tuple3<String,
Integer, Double>>(PATH, "\n", '|', String.class, Integer.class, Double.class);
+ format.setCommentPrefix("#");
+
+ final Configuration parameters = new Configuration();
+ format.configure(parameters);
+ format.open(split);
+
+ Tuple3<String, Integer, Double> result = new
Tuple3<String, Integer, Double>();
+
+ result = format.nextRecord(result);
+ assertNotNull(result);
+ assertEquals("this is", result.f0);
+ assertEquals(new Integer(1), result.f1);
+ assertEquals(new Double(2.0), result.f2);
+
+ result = format.nextRecord(result);
+ assertNotNull(result);
+ assertEquals("a test", result.f0);
+ assertEquals(new Integer(3), result.f1);
+ assertEquals(new Double(4.0), result.f2);
+
+ result = format.nextRecord(result);
+ assertNull(result);
+ }
+ catch (Exception ex) {
+ ex.printStackTrace();
+ fail("Test failed due to a " + ex.getClass().getName()
+ ": " + ex.getMessage());
+ }
+ }
+
+ @Test
+ public void ignoreMultiCharPrefixComments() {
+ try {
+
+
+ final String fileContent = "//description of the
data\n" +
+
"//successive commented line\n" +
+
"this is|1|2.0|\n"+
+ "a
test|3|4.0|\n" +
+
"//next|5|6.0|\n";
+
+ final FileInputSplit split =
createTempFile(fileContent);
+
+ CsvInputFormat<Tuple3<String, Integer, Double>> format
=
+ new CsvInputFormat<Tuple3<String,
Integer, Double>>(PATH, "\n", '|', String.class, Integer.class, Double.class);
+ format.setCommentPrefix("//");
+
+ final Configuration parameters = new Configuration();
+ format.configure(parameters);
+ format.open(split);
+
+ Tuple3<String, Integer, Double> result = new
Tuple3<String, Integer, Double>();
+
+ result = format.nextRecord(result);
+ assertNotNull(result);
+ assertEquals("this is", result.f0);
+ assertEquals(new Integer(1), result.f1);
+ assertEquals(new Double(2.0), result.f2);
+
+ result = format.nextRecord(result);
+ assertNotNull(result);
+ assertEquals("a test", result.f0);
+ assertEquals(new Integer(3), result.f1);
+ assertEquals(new Double(4.0), result.f2);
+
+ result = format.nextRecord(result);
+ assertNull(result);
+ }
+ catch (Exception ex) {
+ ex.printStackTrace();
+ fail("Test failed due to a " + ex.getClass().getName()
+ ": " + ex.getMessage());
+ }
+ }
--- End diff --
Can you add a test case that checks for correct behavior of `lenient =
false`?
> Skip comment lines in CSV input format. Allow user to specify comment
> character.
> --------------------------------------------------------------------------------
>
> Key: FLINK-1208
> URL: https://issues.apache.org/jira/browse/FLINK-1208
> Project: Flink
> Issue Type: Improvement
> Components: Java API, Scala API
> Affects Versions: 0.8-incubating
> Reporter: Aljoscha Krettek
> Assignee: Felix Neutatz
> Priority: Minor
> Labels: starter
>
> The current skipFirstLine is limited. Skipping arbitrary lines that start
> with a certain character would be much more flexible while still easy to
> implement.
--
This message was sent by Atlassian JIRA
(v6.3.4#6332)