This is an automated email from the ASF dual-hosted git repository.
ggregory pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/commons-csv.git
The following commit(s) were added to refs/heads/master by this push:
new 788f2aa [CSV-239] Cannot get headers in column order from CSVRecord.
788f2aa is described below
commit 788f2aaa7aff361416e11fba5df7cfc31060cbb1
Author: Gary Gregory <[email protected]>
AuthorDate: Sun May 19 10:26:27 2019 -0400
[CSV-239] Cannot get headers in column order from CSVRecord.
---
src/changes/changes.xml | 1 +
.../java/org/apache/commons/csv/CSVParser.java | 219 +++++++++++----------
.../java/org/apache/commons/csv/CSVRecord.java | 89 ++++++---
.../java/org/apache/commons/csv/CSVRecordTest.java | 31 ++-
4 files changed, 196 insertions(+), 144 deletions(-)
diff --git a/src/changes/changes.xml b/src/changes/changes.xml
index ce721ef..2fa9369 100644
--- a/src/changes/changes.xml
+++ b/src/changes/changes.xml
@@ -45,6 +45,7 @@
<action issue="CSV-234" type="add" dev="ggregory" due-to="Roberto
Benedetti, Gary Gregory">Add support for java.sql.Clob.</action>
<action issue="CSV-237" type="update" dev="ggregory" due-to="Gary
Gregory">Update to Java 8.</action>
<action issue="CSV-238" type="fix" dev="ggregory" due-to="Stephen
Olander-Waters">Escape quotes in CLOBs #39.</action>
+ <action issue="CSV-239" type="add" dev="ggregory" due-to="Gary Gregory,
Dave Moten">Cannot get headers in column order from CSVRecord.</action>
<action type="update" dev="ggregory" due-to="Gary
Gregory">Update tests from H2 1.4.198 to 1.4.199.</action>
</release>
<release version="1.6" date="2018-09-22" description="Feature and bug fix
release (Java 7)">
diff --git a/src/main/java/org/apache/commons/csv/CSVParser.java
b/src/main/java/org/apache/commons/csv/CSVParser.java
index a3decdf..6eb97f8 100644
--- a/src/main/java/org/apache/commons/csv/CSVParser.java
+++ b/src/main/java/org/apache/commons/csv/CSVParser.java
@@ -39,6 +39,7 @@ import java.util.List;
import java.util.Map;
import java.util.NoSuchElementException;
import java.util.TreeMap;
+import java.util.stream.Collectors;
/**
* Parses CSV files according to the specified format.
@@ -133,6 +134,61 @@ import java.util.TreeMap;
*/
public final class CSVParser implements Iterable<CSVRecord>, Closeable {
+ class CSVRecordIterator implements Iterator<CSVRecord> {
+ private CSVRecord current;
+
+ private CSVRecord getNextRecord() {
+ try {
+ return CSVParser.this.nextRecord();
+ } catch (final IOException e) {
+ throw new IllegalStateException(
+ e.getClass().getSimpleName() + " reading next record:
" + e.toString(), e);
+ }
+ }
+
+ @Override
+ public boolean hasNext() {
+ if (CSVParser.this.isClosed()) {
+ return false;
+ }
+ if (this.current == null) {
+ this.current = this.getNextRecord();
+ }
+
+ return this.current != null;
+ }
+
+ @Override
+ public CSVRecord next() {
+ if (CSVParser.this.isClosed()) {
+ throw new NoSuchElementException("CSVParser has been closed");
+ }
+ CSVRecord next = this.current;
+ this.current = null;
+
+ if (next == null) {
+ // hasNext() wasn't called before
+ next = this.getNextRecord();
+ if (next == null) {
+ throw new NoSuchElementException("No more CSV records
available");
+ }
+ }
+
+ return next;
+ }
+
+ @Override
+ public void remove() {
+ throw new UnsupportedOperationException();
+ }
+ }
+
+ static List<String> createHeaderNames(final Map<String, Integer>
headerMap) {
+ return headerMap == null ? null
+ :
headerMap.entrySet().stream().sorted(Map.Entry.comparingByValue()).map(Map.Entry::getKey)
+ .collect(Collectors.toList());
+ }
+
/**
* Creates a parser for the given {@link File}.
*
@@ -229,6 +285,8 @@ public final class CSVParser implements
Iterable<CSVRecord>, Closeable {
return new CSVParser(reader, format);
}
+ // the following objects are shared to reduce garbage
+
/**
* Creates a parser for the given {@link String}.
*
@@ -277,13 +335,14 @@ public final class CSVParser implements
Iterable<CSVRecord>, Closeable {
return new CSVParser(new InputStreamReader(url.openStream(), charset),
format);
}
- // the following objects are shared to reduce garbage
-
private final CSVFormat format;
/** A mapping of column names to column indices */
private final Map<String, Integer> headerMap;
+ /** Preserve the column order to avoid re-computing it. */
+ private final List<String> headerNames;
+
private final Lexer lexer;
private final CSVRecordIterator csvRecordIterator;
@@ -349,14 +408,15 @@ public final class CSVParser implements
Iterable<CSVRecord>, Closeable {
*/
@SuppressWarnings("resource")
public CSVParser(final Reader reader, final CSVFormat format, final long
characterOffset, final long recordNumber)
- throws IOException {
+ throws IOException {
Assertions.notNull(reader, "reader");
Assertions.notNull(format, "format");
this.format = format;
this.lexer = new Lexer(format, new ExtendedBufferedReader(reader));
this.csvRecordIterator = new CSVRecordIterator();
- this.headerMap = this.createHeaderMap();
+ this.headerMap = createHeaderMap(); // 1st
+ this.headerNames = createHeaderNames(this.headerMap); // 2nd
this.characterOffset = characterOffset;
this.recordNumber = recordNumber - 1;
}
@@ -385,6 +445,53 @@ public final class CSVParser implements
Iterable<CSVRecord>, Closeable {
}
/**
+ * Creates the name to index mapping if the format defines a header.
+ *
+ * @return null if the format has no header.
+ * @throws IOException if there is a problem reading the header or
skipping the first record
+ */
+ private Map<String, Integer> createHeaderMap() throws IOException {
+ Map<String, Integer> hdrMap = null;
+ final String[] formatHeader = this.format.getHeader();
+ if (formatHeader != null) {
+ hdrMap = this.format.getIgnoreHeaderCase() ?
+ new TreeMap<>(String.CASE_INSENSITIVE_ORDER) :
+ new TreeMap<>();
+
+ String[] headerRecord = null;
+ if (formatHeader.length == 0) {
+ // read the header from the first line of the file
+ final CSVRecord nextRecord = this.nextRecord();
+ if (nextRecord != null) {
+ headerRecord = nextRecord.values();
+ }
+ } else {
+ if (this.format.getSkipHeaderRecord()) {
+ this.nextRecord();
+ }
+ headerRecord = formatHeader;
+ }
+
+ // build the name to index mappings
+ if (headerRecord != null) {
+ for (int i = 0; i < headerRecord.length; i++) {
+ final String header = headerRecord[i];
+ final boolean containsHeader = header == null ? false :
hdrMap.containsKey(header);
+ final boolean emptyHeader = header == null ||
header.trim().isEmpty();
+ if (containsHeader && (!emptyHeader ||
!this.format.getAllowMissingColumnNames())) {
+ throw new IllegalArgumentException("The header
contains a duplicate name: \"" + header
+ + "\" in " + Arrays.toString(headerRecord));
+ }
+ if (header != null) {
+ hdrMap.put(header, Integer.valueOf(i));
+ }
+ }
+ }
+ }
+ return hdrMap;
+ }
+
+ /**
* Returns the current line number in the input stream.
*
* <p>
@@ -409,11 +516,11 @@ public final class CSVParser implements
Iterable<CSVRecord>, Closeable {
}
/**
- * Returns a copy of the header map that iterates in column order.
+ * Returns a copy of the header map.
* <p>
* The map keys are column names. The map values are 0-based indices.
* </p>
- * @return a copy of the header map that iterates in column order.
+ * @return a copy of the header map.
*/
public Map<String, Integer> getHeaderMap() {
return this.headerMap == null ? null : new
LinkedHashMap<>(this.headerMap);
@@ -455,53 +562,6 @@ public final class CSVParser implements
Iterable<CSVRecord>, Closeable {
}
/**
- * Creates the name to index mapping if the format defines a header.
- *
- * @return null if the format has no header.
- * @throws IOException if there is a problem reading the header or
skipping the first record
- */
- private Map<String, Integer> createHeaderMap() throws IOException {
- Map<String, Integer> hdrMap = null;
- final String[] formatHeader = this.format.getHeader();
- if (formatHeader != null) {
- hdrMap = this.format.getIgnoreHeaderCase() ?
- new TreeMap<>(String.CASE_INSENSITIVE_ORDER) :
- new TreeMap<>();
-
- String[] headerRecord = null;
- if (formatHeader.length == 0) {
- // read the header from the first line of the file
- final CSVRecord nextRecord = this.nextRecord();
- if (nextRecord != null) {
- headerRecord = nextRecord.values();
- }
- } else {
- if (this.format.getSkipHeaderRecord()) {
- this.nextRecord();
- }
- headerRecord = formatHeader;
- }
-
- // build the name to index mappings
- if (headerRecord != null) {
- for (int i = 0; i < headerRecord.length; i++) {
- final String header = headerRecord[i];
- final boolean containsHeader = header == null ? false :
hdrMap.containsKey(header);
- final boolean emptyHeader = header == null ||
header.trim().isEmpty();
- if (containsHeader && (!emptyHeader ||
!this.format.getAllowMissingColumnNames())) {
- throw new IllegalArgumentException("The header
contains a duplicate name: \"" + header
- + "\" in " + Arrays.toString(headerRecord));
- }
- if (header != null) {
- hdrMap.put(header, Integer.valueOf(i));
- }
- }
- }
- }
- return hdrMap;
- }
-
- /**
* Gets whether this parser is closed.
*
* @return whether this parser is closed.
@@ -527,55 +587,6 @@ public final class CSVParser implements
Iterable<CSVRecord>, Closeable {
return csvRecordIterator;
}
- class CSVRecordIterator implements Iterator<CSVRecord> {
- private CSVRecord current;
-
- private CSVRecord getNextRecord() {
- try {
- return CSVParser.this.nextRecord();
- } catch (final IOException e) {
- throw new IllegalStateException(
- e.getClass().getSimpleName() + " reading next record:
" + e.toString(), e);
- }
- }
-
- @Override
- public boolean hasNext() {
- if (CSVParser.this.isClosed()) {
- return false;
- }
- if (this.current == null) {
- this.current = this.getNextRecord();
- }
-
- return this.current != null;
- }
-
- @Override
- public CSVRecord next() {
- if (CSVParser.this.isClosed()) {
- throw new NoSuchElementException("CSVParser has been closed");
- }
- CSVRecord next = this.current;
- this.current = null;
-
- if (next == null) {
- // hasNext() wasn't called before
- next = this.getNextRecord();
- if (next == null) {
- throw new NoSuchElementException("No more CSV records
available");
- }
- }
-
- return next;
- }
-
- @Override
- public void remove() {
- throw new UnsupportedOperationException();
- }
- }
-
/**
* Parses the next record from the current point in the stream.
*
@@ -622,8 +633,8 @@ public final class CSVParser implements
Iterable<CSVRecord>, Closeable {
if (!this.recordList.isEmpty()) {
this.recordNumber++;
final String comment = sb == null ? null : sb.toString();
- result = new CSVRecord(this.recordList.toArray(new
String[this.recordList.size()]), this.headerMap, comment,
- this.recordNumber, startCharPosition);
+ result = new CSVRecord(this.recordList.toArray(new
String[this.recordList.size()]), this.headerMap,
+ this.headerNames, comment, this.recordNumber,
startCharPosition);
}
return result;
}
diff --git a/src/main/java/org/apache/commons/csv/CSVRecord.java
b/src/main/java/org/apache/commons/csv/CSVRecord.java
index 34a3ba2..3984405 100644
--- a/src/main/java/org/apache/commons/csv/CSVRecord.java
+++ b/src/main/java/org/apache/commons/csv/CSVRecord.java
@@ -18,9 +18,10 @@
package org.apache.commons.csv;
import java.io.Serializable;
+import java.util.ArrayList;
import java.util.Arrays;
-import java.util.HashMap;
import java.util.Iterator;
+import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
@@ -40,7 +41,10 @@ public final class CSVRecord implements Serializable,
Iterable<String> {
private final String comment;
/** The column name to index mapping. */
- private final Map<String, Integer> mapping;
+ private final Map<String, Integer> headerMap;
+
+ /** The column order to avoid re-computing it. */
+ private final List<String> headerNames;
/** The record number. */
private final long recordNumber;
@@ -48,11 +52,12 @@ public final class CSVRecord implements Serializable,
Iterable<String> {
/** The values of the record */
private final String[] values;
- CSVRecord(final String[] values, final Map<String, Integer> mapping, final
String comment, final long recordNumber,
- final long characterPosition) {
+ CSVRecord(final String[] values, final Map<String, Integer> headerMap,
List<String> headerNames, final String comment,
+ final long recordNumber, final long characterPosition) {
this.recordNumber = recordNumber;
this.values = values != null ? values : EMPTY_STRING_ARRAY;
- this.mapping = mapping;
+ this.headerMap = headerMap;
+ this.headerNames = headerNames;
this.comment = comment;
this.characterPosition = characterPosition;
}
@@ -93,14 +98,10 @@ public final class CSVRecord implements Serializable,
Iterable<String> {
* @see CSVFormat#withNullString(String)
*/
public String get(final String name) {
- if (mapping == null) {
- throw new IllegalStateException(
- "No header mapping was specified, the record values can't be
accessed by name");
- }
- final Integer index = mapping.get(name);
+ final Integer index = getIndex(name);
if (index == null) {
throw new IllegalArgumentException(String.format("Mapping for %s
not found, expected one of %s", name,
- mapping.keySet()));
+ headerMap.keySet()));
}
try {
return values[index.intValue()];
@@ -134,6 +135,32 @@ public final class CSVRecord implements Serializable,
Iterable<String> {
}
/**
+ * Returns a copy of the header names that iterates in column order.
+ *
+ * @return a copy of the header names that iterates in column order.
+ * @since 1.7
+ */
+ public List<String> getHeaderNames() {
+ return new ArrayList<>(headerNames);
+ }
+
+ Integer getIndex(final String name) {
+ if (headerMap == null) {
+ throw new IllegalStateException(
+ "No header mapping was specified, the record values can't be
accessed by name");
+ }
+ return headerMap.get(name);
+ }
+
+ String getName(final int index) {
+ if (headerMap == null) {
+ throw new IllegalStateException(
+ "No header mapping was specified, the record values can't be
accessed by name");
+ }
+ return headerNames.get(index);
+ }
+
+ /**
* Returns the number of this record in the parsed CSV file.
*
* <p>
@@ -149,20 +176,6 @@ public final class CSVRecord implements Serializable,
Iterable<String> {
}
/**
- * Tells whether the record size matches the header size.
- *
- * <p>
- * Returns true if the sizes for this record match and false if not. Some
programs can export files that fail this
- * test but still produce parsable files.
- * </p>
- *
- * @return true of this record is valid, false if not
- */
- public boolean isConsistent() {
- return mapping == null || mapping.size() == values.length;
- }
-
- /**
* Checks whether this record has a comment, false otherwise.
* Note that comments are attached to the following record.
* If there is no following record (i.e. the comment is at EOF)
@@ -176,6 +189,20 @@ public final class CSVRecord implements Serializable,
Iterable<String> {
}
/**
+ * Tells whether the record size matches the header size.
+ *
+ * <p>
+ * Returns true if the sizes for this record match and false if not. Some
programs can export files that fail this
+ * test but still produce parsable files.
+ * </p>
+ *
+ * @return true of this record is valid, false if not
+ */
+ public boolean isConsistent() {
+ return headerMap == null || headerMap.size() == values.length;
+ }
+
+ /**
* Checks whether a given column is mapped, i.e. its name has been defined
to the parser.
*
* @param name
@@ -183,7 +210,7 @@ public final class CSVRecord implements Serializable,
Iterable<String> {
* @return whether a given column is mapped.
*/
public boolean isMapped(final String name) {
- return mapping != null && mapping.containsKey(name);
+ return headerMap != null && headerMap.containsKey(name);
}
/**
@@ -194,7 +221,7 @@ public final class CSVRecord implements Serializable,
Iterable<String> {
* @return whether a given columns is mapped and has a value
*/
public boolean isSet(final String name) {
- return isMapped(name) && mapping.get(name).intValue() < values.length;
+ return isMapped(name) && headerMap.get(name).intValue() <
values.length;
}
/**
@@ -215,10 +242,10 @@ public final class CSVRecord implements Serializable,
Iterable<String> {
* @return the given map.
*/
<M extends Map<String, String>> M putIn(final M map) {
- if (mapping == null) {
+ if (headerMap == null) {
return map;
}
- for (final Entry<String, Integer> entry : mapping.entrySet()) {
+ for (final Entry<String, Integer> entry : headerMap.entrySet()) {
final int col = entry.getValue().intValue();
if (col < values.length) {
map.put(entry.getKey(), values[col]);
@@ -253,7 +280,7 @@ public final class CSVRecord implements Serializable,
Iterable<String> {
* @return A new Map. The map is empty if the record has no headers.
*/
public Map<String, String> toMap() {
- return putIn(new HashMap<String, String>(values.length));
+ return putIn(new LinkedHashMap<String, String>(values.length));
}
/**
@@ -264,7 +291,7 @@ public final class CSVRecord implements Serializable,
Iterable<String> {
*/
@Override
public String toString() {
- return "CSVRecord [comment=" + comment + ", mapping=" + mapping +
+ return "CSVRecord [comment=" + comment + ", mapping=" + headerMap +
", recordNumber=" + recordNumber + ", values=" +
Arrays.toString(values) + "]";
}
diff --git a/src/test/java/org/apache/commons/csv/CSVRecordTest.java
b/src/test/java/org/apache/commons/csv/CSVRecordTest.java
index 6347cc5..dab4d55 100644
--- a/src/test/java/org/apache/commons/csv/CSVRecordTest.java
+++ b/src/test/java/org/apache/commons/csv/CSVRecordTest.java
@@ -26,6 +26,7 @@ import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
+import java.util.List;
import java.util.Map;
import java.util.TreeMap;
import java.util.concurrent.ConcurrentHashMap;
@@ -40,17 +41,17 @@ public class CSVRecordTest {
private String[] values;
private CSVRecord record, recordWithHeader;
- private Map<String, Integer> header;
+ private Map<String, Integer> headerMap;
@Before
public void setUp() throws Exception {
values = new String[] { "A", "B", "C" };
- record = new CSVRecord(values, null, null, 0, -1);
- header = new HashMap<>();
- header.put("first", Integer.valueOf(0));
- header.put("second", Integer.valueOf(1));
- header.put("third", Integer.valueOf(2));
- recordWithHeader = new CSVRecord(values, header, null, 0, -1);
+ record = new CSVRecord(values, null, null, null, 0, -1);
+ headerMap = new HashMap<>();
+ headerMap.put("first", Integer.valueOf(0));
+ headerMap.put("second", Integer.valueOf(1));
+ headerMap.put("third", Integer.valueOf(2));
+ recordWithHeader = new CSVRecord(values, headerMap,
CSVParser.createHeaderNames(headerMap), null, 0, -1);
}
@Test
@@ -69,7 +70,7 @@ public class CSVRecordTest {
@Test(expected = IllegalArgumentException.class)
public void testGetStringInconsistentRecord() {
- header.put("fourth", Integer.valueOf(4));
+ headerMap.put("fourth", Integer.valueOf(4));
recordWithHeader.get("fourth");
}
@@ -103,7 +104,7 @@ public class CSVRecordTest {
assertTrue(record.isConsistent());
assertTrue(recordWithHeader.isConsistent());
- header.put("fourth", Integer.valueOf(4));
+ headerMap.put("fourth", Integer.valueOf(4));
assertFalse(recordWithHeader.isConsistent());
}
@@ -162,6 +163,18 @@ public class CSVRecordTest {
}
@Test
+ public void testGetHeaderNames() {
+ final Map<String, String> nameValueMap = this.recordWithHeader.toMap();
+ final List<String> headerNames =
this.recordWithHeader.getHeaderNames();
+ Assert.assertEquals(nameValueMap.size(), headerNames.size());
+ for (int i = 0; i < headerNames.size(); i++) {
+ String name = headerNames.get(i);
+ Assert.assertEquals(i,
this.recordWithHeader.getIndex(name).intValue());
+ Assert.assertEquals(name, this.recordWithHeader.getName(i));
+ }
+ }
+
+ @Test
public void testToMapWithShortRecord() throws Exception {
try (final CSVParser parser = CSVParser.parse("a,b",
CSVFormat.DEFAULT.withHeader("A", "B", "C"))) {
final CSVRecord shortRec = parser.iterator().next();