This is an automated email from the ASF dual-hosted git repository.

ggregory pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/commons-csv.git


The following commit(s) were added to refs/heads/master by this push:
     new ac280e7  [CSV-93] Allow the handling of NULL values.
ac280e7 is described below

commit ac280e705d99cc1168f93f7d9a5f92618bebbb97
Author: Gary Gregory <garydgreg...@gmail.com>
AuthorDate: Thu Jul 8 12:36:23 2021 -0400

    [CSV-93] Allow the handling of NULL values.
    
    - [CSV-253] Handle absent values in input (null).
    - Cleaned up version of PR 77 from dota17 where:
    - Don't duplicate two state items from the format.
    - Use try-with-resources.
    - Remove useless parens.
    - Update Javaodc.
    - Sort members in the new tests.
    - Use builder.
---
 .../java/org/apache/commons/csv/CSVParser.java     |  37 ++++-
 src/main/java/org/apache/commons/csv/Lexer.java    |   1 +
 .../java/org/apache/commons/csv/QuoteMode.java     |   2 +-
 src/main/java/org/apache/commons/csv/Token.java    |   3 +
 .../org/apache/commons/csv/CSVPrinterTest.java     |   2 +-
 .../apache/commons/csv/issues/JiraCsv253Test.java  |  54 ++++++++
 .../apache/commons/csv/issues/JiraCsv93Test.java   | 150 +++++++++++++++++++++
 7 files changed, 243 insertions(+), 6 deletions(-)

diff --git a/src/main/java/org/apache/commons/csv/CSVParser.java 
b/src/main/java/org/apache/commons/csv/CSVParser.java
index 3224013..94bb361 100644
--- a/src/main/java/org/apache/commons/csv/CSVParser.java
+++ b/src/main/java/org/apache/commons/csv/CSVParser.java
@@ -446,8 +446,7 @@ public final class CSVParser implements 
Iterable<CSVRecord>, Closeable {
         if (lastRecord && inputClean.isEmpty() && 
this.format.getTrailingDelimiter()) {
             return;
         }
-        final String nullString = this.format.getNullString();
-        this.recordList.add(inputClean.equals(nullString) ? null : inputClean);
+        this.recordList.add(handleNull(inputClean));
     }
 
     /**
@@ -636,7 +635,26 @@ public final class CSVParser implements 
Iterable<CSVRecord>, Closeable {
     }
 
     /**
-     * Gets whether this parser is closed.
+     * Handle whether input is parsed as null
+     *
+     * @param input
+     *           the cell data to further processed
+     * @return null if input is parsed as null, or input itself if input isn't 
parsed as null
+     */
+    private String handleNull(final String input) {
+        final boolean isQuoted = this.reusableToken.isQuoted;
+        final String nullString = format.getNullString();
+        final boolean strictQuoteMode = isStrictQuoteMode();
+        if (input.equals(nullString)) {
+            // nullString = NULL(String), distinguish between "NULL" and NULL 
in ALL_NON_NULL or NON_NUMERIC quote mode
+            return strictQuoteMode && isQuoted ? input : null;
+        }
+        // don't set nullString, distinguish between "" and ,, (absent values) 
in All_NON_NULL or NON_NUMERIC quote mode
+        return strictQuoteMode && nullString == null && input.isEmpty() && 
!isQuoted ? null : input;
+    }
+
+    /**
+     * Tests whether this parser is closed.
      *
      * @return whether this parser is closed.
      */
@@ -645,7 +663,18 @@ public final class CSVParser implements 
Iterable<CSVRecord>, Closeable {
     }
 
     /**
-     * Returns an iterator on the records.
+     * Tests whether the format's {@link QuoteMode} is {@link 
QuoteMode#ALL_NON_NULL} or {@link QuoteMode#NON_NUMERIC}.
+     *
+     * @return true if the format's {@link QuoteMode} is {@link 
QuoteMode#ALL_NON_NULL} or
+     *         {@link QuoteMode#NON_NUMERIC}.
+     */
+    private boolean isStrictQuoteMode() {
+        return this.format.getQuoteMode() == QuoteMode.ALL_NON_NULL ||
+               this.format.getQuoteMode() == QuoteMode.NON_NUMERIC;
+    }
+
+    /**
+     * Returns the record iterator.
      *
      * <p>
      * An {@link IOException} caught during the iteration are re-thrown as an
diff --git a/src/main/java/org/apache/commons/csv/Lexer.java 
b/src/main/java/org/apache/commons/csv/Lexer.java
index a0c75aa..1f14543 100644
--- a/src/main/java/org/apache/commons/csv/Lexer.java
+++ b/src/main/java/org/apache/commons/csv/Lexer.java
@@ -324,6 +324,7 @@ final class Lexer implements Closeable {
      *             on invalid state: EOF before closing encapsulator or 
invalid character before delimiter or EOL
      */
     private Token parseEncapsulatedToken(final Token token) throws IOException 
{
+        token.isQuoted = true;
         // save current line number in case needed for IOE
         final long startLineNumber = getCurrentLineNumber();
         int c;
diff --git a/src/main/java/org/apache/commons/csv/QuoteMode.java 
b/src/main/java/org/apache/commons/csv/QuoteMode.java
index 272deb7..a9b33a1 100644
--- a/src/main/java/org/apache/commons/csv/QuoteMode.java
+++ b/src/main/java/org/apache/commons/csv/QuoteMode.java
@@ -17,7 +17,7 @@
 package org.apache.commons.csv;
 
 /**
- * Defines quoting behavior when printing.
+ * Defines quoting behavior.
  */
 public enum QuoteMode {
 
diff --git a/src/main/java/org/apache/commons/csv/Token.java 
b/src/main/java/org/apache/commons/csv/Token.java
index dff7d01..2dedc58 100644
--- a/src/main/java/org/apache/commons/csv/Token.java
+++ b/src/main/java/org/apache/commons/csv/Token.java
@@ -55,10 +55,13 @@ final class Token {
     /** Token ready flag: indicates a valid token with content (ready for the 
parser). */
     boolean isReady;
 
+    boolean isQuoted;
+
     void reset() {
         content.setLength(0);
         type = INVALID;
         isReady = false;
+        isQuoted = false;
     }
 
     /**
diff --git a/src/test/java/org/apache/commons/csv/CSVPrinterTest.java 
b/src/test/java/org/apache/commons/csv/CSVPrinterTest.java
index 780a502..6890bc2 100644
--- a/src/test/java/org/apache/commons/csv/CSVPrinterTest.java
+++ b/src/test/java/org/apache/commons/csv/CSVPrinterTest.java
@@ -873,7 +873,7 @@ public class CSVPrinterTest {
         String expected = "\"NULL\"\tNULL\n";
         assertEquals(expected, writer.toString());
         String[] record0 = toFirstRecordValues(expected, format);
-        assertArrayEquals(new Object[2], record0);
+        assertArrayEquals(s, record0);
 
         s = new String[] { "\\N", null };
         format = CSVFormat.MYSQL.withNullString("\\N");
diff --git a/src/test/java/org/apache/commons/csv/issues/JiraCsv253Test.java 
b/src/test/java/org/apache/commons/csv/issues/JiraCsv253Test.java
new file mode 100644
index 0000000..9050731
--- /dev/null
+++ b/src/test/java/org/apache/commons/csv/issues/JiraCsv253Test.java
@@ -0,0 +1,54 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.csv.issues;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+
+import java.io.IOException;
+import java.io.StringReader;
+import java.util.Iterator;
+
+import org.apache.commons.csv.CSVFormat;
+import org.apache.commons.csv.CSVParser;
+import org.apache.commons.csv.CSVRecord;
+import org.apache.commons.csv.QuoteMode;
+import org.junit.jupiter.api.Test;
+
+/**
+ * Setting QuoteMode:ALL_NON_NULL or NON_NUMERIC can distinguish between empty 
string columns and absent value columns.
+ */
+public class JiraCsv253Test {
+
+    private void assertArrayEqual(final String[] expected, final CSVRecord 
actual) {
+        for (int i = 0; i < expected.length; i++) {
+            assertEquals(expected[i], actual.get(i));
+        }
+    }
+
+    @Test
+    public void testHandleAbsentValues() throws IOException {
+        final String source = "\"John\",,\"Doe\"\n" + ",\"AA\",123\n" + 
"\"John\",90,\n" + "\"\",,90";
+        final CSVFormat csvFormat = 
CSVFormat.DEFAULT.builder().setQuoteMode(QuoteMode.NON_NUMERIC).build();
+        try (final CSVParser parser = csvFormat.parse(new 
StringReader(source))) {
+            final Iterator<CSVRecord> csvRecords = parser.iterator();
+            assertArrayEqual(new String[] {"John", null, "Doe"}, 
csvRecords.next());
+            assertArrayEqual(new String[] {null, "AA", "123"}, 
csvRecords.next());
+            assertArrayEqual(new String[] {"John", "90", null}, 
csvRecords.next());
+            assertArrayEqual(new String[] {"", null, "90"}, csvRecords.next());
+        }
+    }
+}
diff --git a/src/test/java/org/apache/commons/csv/issues/JiraCsv93Test.java 
b/src/test/java/org/apache/commons/csv/issues/JiraCsv93Test.java
new file mode 100644
index 0000000..e122348
--- /dev/null
+++ b/src/test/java/org/apache/commons/csv/issues/JiraCsv93Test.java
@@ -0,0 +1,150 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.csv.issues;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+
+import java.io.IOException;
+import java.io.StringReader;
+
+import org.apache.commons.csv.CSVFormat;
+import org.apache.commons.csv.CSVParser;
+import org.apache.commons.csv.CSVRecord;
+import org.apache.commons.csv.QuoteMode;
+import org.junit.jupiter.api.Test;
+
+/**
+ * Add more tests about null value.
+ * <p>
+ * QuoteMode:ALL_NON_NULL (Quotes all non-null fields, null will not be quoted 
but not null will be quoted). when
+ * withNullString("NULL"), NULL String value ("NULL") and null value (null) 
will be formatted as '"NULL",NULL'. So it
+ * also should be parsed as NULL String value and null value (["NULL", null]), 
It should be distinguish in parsing. And
+ * when don't set nullString in CSVFormat, String '"",' should be parsed as "" 
and null (["", null]) according to null
+ * will not be quoted but not null will be quoted. QuoteMode:NON_NUMERIC, same 
as ALL_NON_NULL.
+ * </p>
+ * <p>
+ * This can solve the problem of distinguishing between empty string columns 
and absent value columns which just like
+ * Jira CSV-253 to a certain extent.
+ * </p>
+ */
+public class JiraCsv93Test {
+    private static Object[] objects1 = {"abc", "", null, "a,b,c", 123};
+
+    private static Object[] objects2 = {"abc", "NULL", null, "a,b,c", 123};
+
+    private void every(final CSVFormat csvFormat, final Object[] objects, 
final String format, final String[] data)
+        throws IOException {
+        final String source = csvFormat.format(objects);
+        assertEquals(format, csvFormat.format(objects));
+        try (final CSVParser csvParser = csvFormat.parse(new 
StringReader(source))) {
+            final CSVRecord csvRecord = csvParser.iterator().next();
+            for (int i = 0; i < data.length; i++) {
+                assertEquals(csvRecord.get(i), data[i]);
+            }
+        }
+    }
+
+    @Test
+    public void testWithNotSetNullString() throws IOException {
+        // @formatter:off
+        every(CSVFormat.DEFAULT,
+                objects1,
+                "abc,,,\"a,b,c\",123",
+                new String[]{"abc", "", "", "a,b,c", "123"});
+        every(CSVFormat.DEFAULT.builder().setQuoteMode(QuoteMode.ALL).build(),
+                objects1,
+                "\"abc\",\"\",,\"a,b,c\",\"123\"",
+                new String[]{"abc", "", "", "a,b,c", "123"});
+        
every(CSVFormat.DEFAULT.builder().setQuoteMode(QuoteMode.ALL_NON_NULL).build(),
+                objects1,
+                "\"abc\",\"\",,\"a,b,c\",\"123\"",
+                new String[]{"abc", "", null, "a,b,c", "123"});
+        
every(CSVFormat.DEFAULT.builder().setQuoteMode(QuoteMode.MINIMAL).build(),
+                objects1,
+                "abc,,,\"a,b,c\",123",
+                new String[]{"abc", "", "", "a,b,c", "123"});
+        
every(CSVFormat.DEFAULT.builder().setEscape('?').setQuoteMode(QuoteMode.NONE).build(),
+                objects1,
+                "abc,,,a?,b?,c,123",
+                new String[]{"abc", "", "", "a,b,c", "123"});
+        
every(CSVFormat.DEFAULT.builder().setQuoteMode(QuoteMode.NON_NUMERIC).build(),
+                objects1,
+                "\"abc\",\"\",,\"a,b,c\",123",
+                new String[]{"abc", "", null, "a,b,c", "123"});
+        // @formatter:on
+    }
+
+    @Test
+    public void testWithSetNullStringEmptyString() throws IOException {
+        // @formatter:off
+        every(CSVFormat.DEFAULT.builder().setNullString("").build(),
+                objects1,
+                "abc,,,\"a,b,c\",123",
+                new String[]{"abc", null, null, "a,b,c", "123"});
+        
every(CSVFormat.DEFAULT.builder().setNullString("").setQuoteMode(QuoteMode.ALL).build(),
+                objects1,
+                "\"abc\",\"\",\"\",\"a,b,c\",\"123\"",
+                new String[]{"abc", null, null, "a,b,c", "123"});
+        
every(CSVFormat.DEFAULT.builder().setNullString("").setQuoteMode(QuoteMode.ALL_NON_NULL).build(),
+                objects1,
+                "\"abc\",\"\",,\"a,b,c\",\"123\"",
+                new String[]{"abc", "", null, "a,b,c", "123"});
+        
every(CSVFormat.DEFAULT.builder().setNullString("").setQuoteMode(QuoteMode.MINIMAL).build(),
+                objects1,
+                "abc,,,\"a,b,c\",123",
+                new String[]{"abc", null, null, "a,b,c", "123"});
+        
every(CSVFormat.DEFAULT.builder().setNullString("").setEscape('?').setQuoteMode(QuoteMode.NONE).build(),
+                objects1,
+                "abc,,,a?,b?,c,123",
+                new String[]{"abc", null, null, "a,b,c", "123"});
+        
every(CSVFormat.DEFAULT.builder().setNullString("").setQuoteMode(QuoteMode.NON_NUMERIC).build(),
+                objects1,
+                "\"abc\",\"\",,\"a,b,c\",123",
+                new String[]{"abc", "", null, "a,b,c", "123"});
+        // @formatter:on
+    }
+
+    @Test
+    public void testWithSetNullStringNULL() throws IOException {
+        // @formatter:off
+        every(CSVFormat.DEFAULT.builder().setNullString("NULL").build(),
+                objects2,
+                "abc,NULL,NULL,\"a,b,c\",123",
+                new String[]{"abc", null, null, "a,b,c", "123"});
+        
every(CSVFormat.DEFAULT.builder().setNullString("NULL").setQuoteMode(QuoteMode.ALL).build(),
+                objects2,
+                "\"abc\",\"NULL\",\"NULL\",\"a,b,c\",\"123\"",
+                new String[]{"abc", null, null, "a,b,c", "123"});
+        
every(CSVFormat.DEFAULT.builder().setNullString("NULL").setQuoteMode(QuoteMode.ALL_NON_NULL).build(),
+                objects2,
+                "\"abc\",\"NULL\",NULL,\"a,b,c\",\"123\"",
+                new String[]{"abc", "NULL", null, "a,b,c", "123"});
+        
every(CSVFormat.DEFAULT.builder().setNullString("NULL").setQuoteMode(QuoteMode.MINIMAL).build(),
+                objects2,
+                "abc,NULL,NULL,\"a,b,c\",123",
+                new String[]{"abc", null, null, "a,b,c", "123"});
+        
every(CSVFormat.DEFAULT.builder().setNullString("NULL").setEscape('?').setQuoteMode(QuoteMode.NONE).build(),
+                objects2,
+                "abc,NULL,NULL,a?,b?,c,123",
+                new String[]{"abc", null, null, "a,b,c", "123"});
+        
every(CSVFormat.DEFAULT.builder().setNullString("NULL").setQuoteMode(QuoteMode.NON_NUMERIC).build(),
+                objects2,
+                "\"abc\",\"NULL\",NULL,\"a,b,c\",123",
+                new String[]{"abc", "NULL", null, "a,b,c", "123"});
+        // @formatter:on
+    }
+}
\ No newline at end of file

Reply via email to