Repository: commons-csv Updated Branches: refs/heads/master 4d0f22600 -> aae6f9044
[CSV-214] Adding a placeholder in the Lexer and CSV parser to store the end-of-line string. I applied the patch in spirit and made changes: there is no need to use a boolean to track the state of the EOL String (set vs. not set). I also allowed for CR to be saved as an EOL string since we allow that already. Project: http://git-wip-us.apache.org/repos/asf/commons-csv/repo Commit: http://git-wip-us.apache.org/repos/asf/commons-csv/commit/aae6f904 Tree: http://git-wip-us.apache.org/repos/asf/commons-csv/tree/aae6f904 Diff: http://git-wip-us.apache.org/repos/asf/commons-csv/diff/aae6f904 Branch: refs/heads/master Commit: aae6f90442ca09e2461e766a987b33316d9fa6be Parents: 4d0f226 Author: Gary Gregory <garydgreg...@gmail.com> Authored: Fri Aug 11 15:05:27 2017 -0600 Committer: Gary Gregory <garydgreg...@gmail.com> Committed: Fri Aug 11 15:05:27 2017 -0600 ---------------------------------------------------------------------- src/changes/changes.xml | 1 + .../java/org/apache/commons/csv/CSVParser.java | 10 +++++++ src/main/java/org/apache/commons/csv/Lexer.java | 21 ++++++++++++++ .../org/apache/commons/csv/CSVParserTest.java | 30 ++++++++++++++++++++ 4 files changed, 62 insertions(+) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/commons-csv/blob/aae6f904/src/changes/changes.xml ---------------------------------------------------------------------- diff --git a/src/changes/changes.xml b/src/changes/changes.xml index 95ad0b5..744ffc8 100644 --- a/src/changes/changes.xml +++ b/src/changes/changes.xml @@ -51,6 +51,7 @@ <action issue="CSV-192" type="add" dev="ggregory" due-to="Gary Gregory">Add convenience API CSVParser.parse(Path, Charset, CSVFormat)</action> <action issue="CSV-205" type="add" dev="ggregory" due-to="Gary Gregory">Add convenience API CSVFormat#printer() to print to System.out</action> <action issue="CSV-207" type="add" dev="ggregory" due-to="Gary Gregory">Provide a CSV Format for printing PostgreSQL CSV and Text formats.</action> + <action issue="CSV-214" type="add" dev="ggregory" due-to="Nitin Mahendru, Gary Gregory">Adding a placeholder in the Lexer and CSV parser to store the end-of-line string.</action> </release> <release version="1.4" date="2016-05-28" description="Feature and bug fix release"> <action issue="CSV-181" type="update" dev="ggregory" due-to="Gary Gregory">Make CSVPrinter.print(Object) GC-free.</action> http://git-wip-us.apache.org/repos/asf/commons-csv/blob/aae6f904/src/main/java/org/apache/commons/csv/CSVParser.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/commons/csv/CSVParser.java b/src/main/java/org/apache/commons/csv/CSVParser.java index efc0d86..2e4d662 100644 --- a/src/main/java/org/apache/commons/csv/CSVParser.java +++ b/src/main/java/org/apache/commons/csv/CSVParser.java @@ -397,6 +397,16 @@ public final class CSVParser implements Iterable<CSVRecord>, Closeable { } /** + * Gets the first end-of-line string encountered. + * + * @return the first end-of-line string + * @since 1.5 + */ + public String getFirstEndOfLine() { + return lexer.getFirstEol(); + } + + /** * Returns a copy of the header map that iterates in column order. * <p> * The map keys are column names. The map values are 0-based indices. http://git-wip-us.apache.org/repos/asf/commons-csv/blob/aae6f904/src/main/java/org/apache/commons/csv/Lexer.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/commons/csv/Lexer.java b/src/main/java/org/apache/commons/csv/Lexer.java index 0329c35..027e41e 100644 --- a/src/main/java/org/apache/commons/csv/Lexer.java +++ b/src/main/java/org/apache/commons/csv/Lexer.java @@ -40,6 +40,9 @@ import java.io.IOException; */ final class Lexer implements Closeable { + private static final String CR_STRING = Character.toString(Constants.CR); + private static final String LF_STRING = Character.toString(Constants.LF); + /** * Constant char to use for disabling comments, escapes and encapsulation. The value -2 is used because it * won't be confused with an EOF signal (-1), and because the Unicode value {@code FFFE} would be encoded as two @@ -57,7 +60,12 @@ final class Lexer implements Closeable { /** The input stream */ private final ExtendedBufferedReader reader; + private String firstEol; + String getFirstEol(){ + return firstEol; + } + Lexer(final CSVFormat format, final ExtendedBufferedReader reader) { this.reader = reader; this.delimiter = format.getDelimiter(); @@ -374,7 +382,20 @@ final class Lexer implements Closeable { if (ch == CR && reader.lookAhead() == LF) { // note: does not change ch outside of this method! ch = reader.read(); + // Save the EOL state + if (firstEol == null) { + this.firstEol = Constants.CRLF; + } } + // save EOL state here. + if (firstEol == null) { + if (ch == LF) { + this.firstEol = LF_STRING; + } else if (ch == CR) { + this.firstEol = CR_STRING; + } + } + return ch == LF || ch == CR; } http://git-wip-us.apache.org/repos/asf/commons-csv/blob/aae6f904/src/test/java/org/apache/commons/csv/CSVParserTest.java ---------------------------------------------------------------------- diff --git a/src/test/java/org/apache/commons/csv/CSVParserTest.java b/src/test/java/org/apache/commons/csv/CSVParserTest.java index c547b0d..cffd143 100644 --- a/src/test/java/org/apache/commons/csv/CSVParserTest.java +++ b/src/test/java/org/apache/commons/csv/CSVParserTest.java @@ -234,6 +234,36 @@ public class CSVParserTest { assertEquals(4, records.size()); } } + + @Test + public void testFirstEndOfLineCrLf() throws IOException { + final String data = "foo\r\nbaar,\r\nhello,world\r\n,kanu"; + try (final CSVParser parser = CSVParser.parse(data, CSVFormat.DEFAULT)) { + final List<CSVRecord> records = parser.getRecords(); + assertEquals(4, records.size()); + assertEquals("\r\n", parser.getFirstEndOfLine()); + } + } + + @Test + public void testFirstEndOfLineLf() throws IOException { + final String data = "foo\nbaar,\nhello,world\n,kanu"; + try (final CSVParser parser = CSVParser.parse(data, CSVFormat.DEFAULT)) { + final List<CSVRecord> records = parser.getRecords(); + assertEquals(4, records.size()); + assertEquals("\n", parser.getFirstEndOfLine()); + } + } + + @Test + public void testFirstEndOfLineCr() throws IOException { + final String data = "foo\rbaar,\rhello,world\r,kanu"; + try (final CSVParser parser = CSVParser.parse(data, CSVFormat.DEFAULT)) { + final List<CSVRecord> records = parser.getRecords(); + assertEquals(4, records.size()); + assertEquals("\r", parser.getFirstEndOfLine()); + } + } @Test(expected = NoSuchElementException.class) public void testClose() throws Exception {