This is an automated email from the ASF dual-hosted git repository. garydgregory pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/commons-csv.git
commit e36e7f3a1d0fbe29f2ff602f041d3a3d4195b84a Author: Gary Gregory <[email protected]> AuthorDate: Thu Jun 25 22:11:53 2026 +0000 Sort members --- .../java/org/apache/commons/csv/CSVParserTest.java | 40 +++++++++++----------- .../java/org/apache/commons/csv/LexerTest.java | 38 ++++++++++---------- 2 files changed, 39 insertions(+), 39 deletions(-) diff --git a/src/test/java/org/apache/commons/csv/CSVParserTest.java b/src/test/java/org/apache/commons/csv/CSVParserTest.java index 565e132e..3bea08fa 100644 --- a/src/test/java/org/apache/commons/csv/CSVParserTest.java +++ b/src/test/java/org/apache/commons/csv/CSVParserTest.java @@ -465,6 +465,26 @@ class CSVParserTest { () -> CSVParser.parse("a,b,a\n1,2,3\nx,y,z", CSVFormat.DEFAULT.withHeader().withAllowDuplicateHeaderNames(false))); } + /** + * With {@code ignoreSurroundingSpaces} enabled and a multi-character delimiter whose first character is whitespace, + * the empty field at the delimiter boundary must survive. The delimiter look-ahead is consumed while skipping + * leading whitespace, so re-evaluating it would drop the empty field and merge the following field's value. + */ + @Test + void testEmptyFieldBeforeWhitespacePrefixedMultiCharacterDelimiter() throws IOException { + final CSVFormat format = CSVFormat.DEFAULT.builder().setDelimiter(" |").setIgnoreSurroundingSpaces(true).get(); + try (CSVParser parser = CSVParser.parse(" |a", format)) { + final List<CSVRecord> records = parser.getRecords(); + assertEquals(1, records.size()); + assertValuesEquals(new String[] { "", "a" }, records.get(0)); + } + try (CSVParser parser = CSVParser.parse("a | |b", format)) { + final List<CSVRecord> records = parser.getRecords(); + assertEquals(1, records.size()); + assertValuesEquals(new String[] { "a", "", "b" }, records.get(0)); + } + } + @Test void testEmptyFile() throws Exception { try (CSVParser parser = CSVParser.parse(Paths.get("src/test/resources/org/apache/commons/csv/empty.txt"), StandardCharsets.UTF_8, @@ -1758,26 +1778,6 @@ class CSVParserTest { } } - /** - * With {@code ignoreSurroundingSpaces} enabled and a multi-character delimiter whose first character is whitespace, - * the empty field at the delimiter boundary must survive. The delimiter look-ahead is consumed while skipping - * leading whitespace, so re-evaluating it would drop the empty field and merge the following field's value. - */ - @Test - void testEmptyFieldBeforeWhitespacePrefixedMultiCharacterDelimiter() throws IOException { - final CSVFormat format = CSVFormat.DEFAULT.builder().setDelimiter(" |").setIgnoreSurroundingSpaces(true).get(); - try (CSVParser parser = CSVParser.parse(" |a", format)) { - final List<CSVRecord> records = parser.getRecords(); - assertEquals(1, records.size()); - assertValuesEquals(new String[] { "", "a" }, records.get(0)); - } - try (CSVParser parser = CSVParser.parse("a | |b", format)) { - final List<CSVRecord> records = parser.getRecords(); - assertEquals(1, records.size()); - assertValuesEquals(new String[] { "a", "", "b" }, records.get(0)); - } - } - @Test void testProvidedHeader() throws Exception { final Reader in = new StringReader("a,b,c\n1,2,3\nx,y,z"); diff --git a/src/test/java/org/apache/commons/csv/LexerTest.java b/src/test/java/org/apache/commons/csv/LexerTest.java index 445f710a..a76f6e51 100644 --- a/src/test/java/org/apache/commons/csv/LexerTest.java +++ b/src/test/java/org/apache/commons/csv/LexerTest.java @@ -216,6 +216,25 @@ class LexerTest { } } + /** + * With {@code ignoreSurroundingSpaces} enabled and a multi-character delimiter whose first character is whitespace, + * the side-effecting {@link Lexer#isDelimiter(int)} must only be evaluated once per character, otherwise the + * delimiter is consumed in the whitespace-skip loop and the empty field at the boundary is dropped. + */ + @Test + void testEmptyTokenBeforeWhitespacePrefixedMultiCharacterDelimiter() throws IOException { + final CSVFormat format = CSVFormat.DEFAULT.builder().setDelimiter(" |").setIgnoreSurroundingSpaces(true).get(); + try (Lexer lexer = createLexer(" |a", format)) { + assertNextToken(TOKEN, "", lexer); + assertNextToken(EOF, "a", lexer); + } + try (Lexer lexer = createLexer("a | |b", format)) { + assertNextToken(TOKEN, "a", lexer); + assertNextToken(TOKEN, "", lexer); + assertNextToken(EOF, "b", lexer); + } + } + @Test void testEOFWithoutClosingQuote() throws Exception { final String code = "a,\"b"; @@ -447,25 +466,6 @@ class LexerTest { } } - /** - * With {@code ignoreSurroundingSpaces} enabled and a multi-character delimiter whose first character is whitespace, - * the side-effecting {@link Lexer#isDelimiter(int)} must only be evaluated once per character, otherwise the - * delimiter is consumed in the whitespace-skip loop and the empty field at the boundary is dropped. - */ - @Test - void testEmptyTokenBeforeWhitespacePrefixedMultiCharacterDelimiter() throws IOException { - final CSVFormat format = CSVFormat.DEFAULT.builder().setDelimiter(" |").setIgnoreSurroundingSpaces(true).get(); - try (Lexer lexer = createLexer(" |a", format)) { - assertNextToken(TOKEN, "", lexer); - assertNextToken(EOF, "a", lexer); - } - try (Lexer lexer = createLexer("a | |b", format)) { - assertNextToken(TOKEN, "a", lexer); - assertNextToken(TOKEN, "", lexer); - assertNextToken(EOF, "b", lexer); - } - } - @Test void testReadEscapeBackspace() throws IOException { try (Lexer lexer = createLexer("b", CSVFormat.DEFAULT.withEscape('\b'))) {
