On Oct 13, 2012, at 8:37, sebb <seb...@gmail.com> wrote: > On 13 October 2012 13:14, Gary Gregory <garydgreg...@gmail.com> wrote: >> Thank you for the feedback Sebb. I'll do another pass later today. > > See also my subsequent posting on the dev list. > I think that might resolve the performance issue without needing to > revert all the changes to CSVFormat.
Roger that. It'll be a couple of hours before I get to it. Thank you again. Gary > >> Gary >> >> On Oct 13, 2012, at 6:28, sebb <seb...@gmail.com> wrote: >> >>> On 13 October 2012 07:27, <ggreg...@apache.org> wrote: >>>> Author: ggregory >>>> Date: Sat Oct 13 06:27:52 2012 >>>> New Revision: 1397783 >>>> >>>> URL: http://svn.apache.org/viewvc?rev=1397783&view=rev >>>> Log: >>>> Remove DISABLED character hack. >>> >>> -1 (for now) >>> >>> Are you sure this change does not affect performance? >>> The Lexer code now has to do some boxing and unboxing. >>> Unboxing is not expensive, but boxing is potentially so. >>> >>> Also, the code now allows for a null delimiter - is that really sensible? >>> >>> Also CSVFormat.getDelimiter() is inconsistent - it returns char >>> whereas all the others return Character. >>> >>> I think the Lexer should stick to using char, particularly for the >>> delimiter which cannot be null. >>> >>>> Modified: >>>> >>>> commons/proper/csv/trunk/src/main/java/org/apache/commons/csv/CSVFormat.java >>>> commons/proper/csv/trunk/src/main/java/org/apache/commons/csv/Lexer.java >>>> >>>> commons/proper/csv/trunk/src/test/java/org/apache/commons/csv/CSVFormatTest.java >>>> >>>> Modified: >>>> commons/proper/csv/trunk/src/main/java/org/apache/commons/csv/CSVFormat.java >>>> URL: >>>> http://svn.apache.org/viewvc/commons/proper/csv/trunk/src/main/java/org/apache/commons/csv/CSVFormat.java?rev=1397783&r1=1397782&r2=1397783&view=diff >>>> ============================================================================== >>>> --- >>>> commons/proper/csv/trunk/src/main/java/org/apache/commons/csv/CSVFormat.java >>>> (original) >>>> +++ >>>> commons/proper/csv/trunk/src/main/java/org/apache/commons/csv/CSVFormat.java >>>> Sat Oct 13 06:27:52 2012 >>>> @@ -18,6 +18,7 @@ >>>> package org.apache.commons.csv; >>>> >>>> import static org.apache.commons.csv.Constants.COMMA; >>>> +import static org.apache.commons.csv.Constants.CR; >>>> import static org.apache.commons.csv.Constants.CRLF; >>>> import static org.apache.commons.csv.Constants.DOUBLE_QUOTE; >>>> import static org.apache.commons.csv.Constants.ESCAPE; >>>> @@ -38,30 +39,19 @@ public class CSVFormat implements Serial >>>> >>>> private static final long serialVersionUID = 1L; >>>> >>>> - private final char delimiter; >>>> - private final char encapsulator; >>>> - private final char commentStart; >>>> - private final char escape; >>>> + private final Character delimiter; >>>> + private final Character encapsulator; >>>> + private final Character commentStart; >>>> + private final Character escape; >>>> private final boolean ignoreSurroundingSpaces; // Should >>>> leading/trailing spaces be ignored around values? >>>> private final boolean ignoreEmptyLines; >>>> private final String lineSeparator; // for outputs >>>> private final String[] header; >>>> >>>> - private final boolean isEscaping; >>>> - private final boolean isCommentingEnabled; >>>> - private final boolean isEncapsulating; >>>> - >>>> - /** >>>> - * Constant char to be used for disabling comments, escapes and >>>> encapsulation. The value -2 is used because it >>>> - * won't be confused with an EOF signal (-1), and because the Unicode >>>> value {@code FFFE} would be encoded as two chars >>>> - * (using surrogates) and thus there should never be a collision with >>>> a real text char. >>>> - */ >>>> - static final char DISABLED = '\ufffe'; >>>> - >>>> /** >>>> * Starting format with no settings defined; used for creating other >>>> formats from scratch. >>>> */ >>>> - static final CSVFormat PRISTINE = new CSVFormat(DISABLED, DISABLED, >>>> DISABLED, DISABLED, false, false, null, null); >>>> + static final CSVFormat PRISTINE = new CSVFormat(null, null, null, >>>> null, false, false, null, null); >>>> >>>> /** >>>> * Standard comma separated format, as for {@link #RFC4180} but >>>> allowing blank lines. >>>> @@ -73,8 +63,8 @@ public class CSVFormat implements Serial >>>> * </ul> >>>> */ >>>> public static final CSVFormat DEFAULT = >>>> - PRISTINE. >>>> - withDelimiter(COMMA) >>>> + PRISTINE >>>> + .withDelimiter(COMMA) >>>> .withEncapsulator(DOUBLE_QUOTE) >>>> .withIgnoreEmptyLines(true) >>>> .withLineSeparator(CRLF); >>>> @@ -89,8 +79,8 @@ public class CSVFormat implements Serial >>>> * </ul> >>>> */ >>>> public static final CSVFormat RFC4180 = >>>> - PRISTINE. >>>> - withDelimiter(COMMA) >>>> + PRISTINE >>>> + .withDelimiter(COMMA) >>>> .withEncapsulator(DOUBLE_QUOTE) >>>> .withLineSeparator(CRLF); >>>> >>>> @@ -127,7 +117,7 @@ public class CSVFormat implements Serial >>>> * @see <a href="http://dev.mysql.com/doc/refman/5.1/en/load-data.html"> >>>> * http://dev.mysql.com/doc/refman/5.1/en/load-data.html</a> >>>> */ >>>> - public static final CSVFormat MYSQL = >>>> + public static final CSVFormat MYSQL = >>>> PRISTINE >>>> .withDelimiter(TAB) >>>> .withEscape(ESCAPE) >>>> @@ -153,7 +143,7 @@ public class CSVFormat implements Serial >>>> * @param header >>>> * the header >>>> */ >>>> - CSVFormat(final char delimiter, final char encapsulator, final char >>>> commentStart, final char escape, final boolean surroundingSpacesIgnored, >>>> + CSVFormat(final Character delimiter, final Character encapsulator, >>>> final Character commentStart, final Character escape, final boolean >>>> surroundingSpacesIgnored, >>>> final boolean emptyLinesIgnored, final String lineSeparator, >>>> final String[] header) { >>>> this.delimiter = delimiter; >>>> this.encapsulator = encapsulator; >>>> @@ -163,9 +153,6 @@ public class CSVFormat implements Serial >>>> this.ignoreEmptyLines = emptyLinesIgnored; >>>> this.lineSeparator = lineSeparator; >>>> this.header = header; >>>> - this.isEncapsulating = encapsulator != DISABLED; >>>> - this.isCommentingEnabled = commentStart != DISABLED; >>>> - this.isEscaping = escape != DISABLED; >>>> } >>>> >>>> /** >>>> @@ -176,8 +163,8 @@ public class CSVFormat implements Serial >>>> * >>>> * @return true if <code>c</code> is a line break character >>>> */ >>>> - private static boolean isLineBreak(final char c) { >>>> - return c == '\n' || c == '\r'; >>>> + private static boolean isLineBreak(final Character c) { >>>> + return c != null && (c == LF || c == CR); >>>> } >>>> >>>> /** >>>> @@ -199,12 +186,12 @@ public class CSVFormat implements Serial >>>> commentStart + "\")"); >>>> } >>>> >>>> - if (encapsulator != DISABLED && encapsulator == commentStart) { >>>> + if (encapsulator != null && encapsulator == commentStart) { >>>> throw new IllegalArgumentException( >>>> "The comment start character and the encapsulator >>>> cannot be the same (\"" + commentStart + "\")"); >>>> } >>>> >>>> - if (escape != DISABLED && escape == commentStart) { >>>> + if (escape != null && escape == commentStart) { >>>> throw new IllegalArgumentException("The comment start and the >>>> escape character cannot be the same (\"" + >>>> commentStart + "\")"); >>>> } >>>> @@ -229,6 +216,19 @@ public class CSVFormat implements Serial >>>> * thrown if the specified character is a line break >>>> */ >>>> public CSVFormat withDelimiter(final char delimiter) { >>>> + return withDelimiter(Character.valueOf(delimiter)); >>>> + } >>>> + >>>> + /** >>>> + * Returns a copy of this format using the specified delimiter >>>> character. >>>> + * >>>> + * @param delimiter >>>> + * the delimiter character >>>> + * @return A copy of this format using the specified delimiter >>>> character >>>> + * @throws IllegalArgumentException >>>> + * thrown if the specified character is a line break >>>> + */ >>>> + public CSVFormat withDelimiter(final Character delimiter) { >>>> if (isLineBreak(delimiter)) { >>>> throw new IllegalArgumentException("The delimiter cannot be a >>>> line break"); >>>> } >>>> @@ -241,7 +241,7 @@ public class CSVFormat implements Serial >>>> * >>>> * @return the encapsulator character >>>> */ >>>> - public char getEncapsulator() { >>>> + public Character getEncapsulator() { >>>> return encapsulator; >>>> } >>>> >>>> @@ -255,6 +255,19 @@ public class CSVFormat implements Serial >>>> * thrown if the specified character is a line break >>>> */ >>>> public CSVFormat withEncapsulator(final char encapsulator) { >>>> + return withEncapsulator(Character.valueOf(encapsulator)); >>>> + } >>>> + >>>> + /** >>>> + * Returns a copy of this format using the specified encapsulator >>>> character. >>>> + * >>>> + * @param encapsulator >>>> + * the encapsulator character >>>> + * @return A copy of this format using the specified encapsulator >>>> character >>>> + * @throws IllegalArgumentException >>>> + * thrown if the specified character is a line break >>>> + */ >>>> + public CSVFormat withEncapsulator(final Character encapsulator) { >>>> if (isLineBreak(encapsulator)) { >>>> throw new IllegalArgumentException("The encapsulator cannot be >>>> a line break"); >>>> } >>>> @@ -268,7 +281,7 @@ public class CSVFormat implements Serial >>>> * @return {@code true} if an encapsulator is defined >>>> */ >>>> public boolean isEncapsulating() { >>>> - return isEncapsulating; >>>> + return encapsulator != null; >>>> } >>>> >>>> /** >>>> @@ -276,7 +289,7 @@ public class CSVFormat implements Serial >>>> * >>>> * @return the comment start marker. >>>> */ >>>> - public char getCommentStart() { >>>> + public Character getCommentStart() { >>>> return commentStart; >>>> } >>>> >>>> @@ -292,6 +305,21 @@ public class CSVFormat implements Serial >>>> * thrown if the specified character is a line break >>>> */ >>>> public CSVFormat withCommentStart(final char commentStart) { >>>> + return withCommentStart(Character.valueOf(commentStart)); >>>> + } >>>> + >>>> + /** >>>> + * Returns a copy of this format using the specified character as the >>>> comment start marker. >>>> + * >>>> + * Note that the comment introducer character is only recognised at >>>> the start of a line. >>>> + * >>>> + * @param commentStart >>>> + * the comment start marker >>>> + * @return A copy of this format using the specified character as the >>>> comment start marker >>>> + * @throws IllegalArgumentException >>>> + * thrown if the specified character is a line break >>>> + */ >>>> + public CSVFormat withCommentStart(final Character commentStart) { >>>> if (isLineBreak(commentStart)) { >>>> throw new IllegalArgumentException("The comment start character >>>> cannot be a line break"); >>>> } >>>> @@ -307,7 +335,7 @@ public class CSVFormat implements Serial >>>> * @return <tt>true</tt> is comments are supported, <tt>false</tt> >>>> otherwise >>>> */ >>>> public boolean isCommentingEnabled() { >>>> - return isCommentingEnabled; >>>> + return commentStart != null; >>>> } >>>> >>>> /** >>>> @@ -315,7 +343,7 @@ public class CSVFormat implements Serial >>>> * >>>> * @return the escape character >>>> */ >>>> - public char getEscape() { >>>> + public Character getEscape() { >>>> return escape; >>>> } >>>> >>>> @@ -329,6 +357,19 @@ public class CSVFormat implements Serial >>>> * thrown if the specified character is a line break >>>> */ >>>> public CSVFormat withEscape(final char escape) { >>>> + return withEscape(Character.valueOf(escape)); >>>> + } >>>> + >>>> + /** >>>> + * Returns a copy of this format using the specified escape character. >>>> + * >>>> + * @param escape >>>> + * the escape character >>>> + * @return A copy of this format using the specified escape character >>>> + * @throws IllegalArgumentException >>>> + * thrown if the specified character is a line break >>>> + */ >>>> + public CSVFormat withEscape(final Character escape) { >>>> if (isLineBreak(escape)) { >>>> throw new IllegalArgumentException("The escape character cannot >>>> be a line break"); >>>> } >>>> @@ -342,7 +383,7 @@ public class CSVFormat implements Serial >>>> * @return {@code true} if escapes are processed >>>> */ >>>> public boolean isEscaping() { >>>> - return isEscaping; >>>> + return escape != null; >>>> } >>>> >>>> /** >>>> >>>> Modified: >>>> commons/proper/csv/trunk/src/main/java/org/apache/commons/csv/Lexer.java >>>> URL: >>>> http://svn.apache.org/viewvc/commons/proper/csv/trunk/src/main/java/org/apache/commons/csv/Lexer.java?rev=1397783&r1=1397782&r2=1397783&view=diff >>>> ============================================================================== >>>> --- >>>> commons/proper/csv/trunk/src/main/java/org/apache/commons/csv/Lexer.java >>>> (original) >>>> +++ >>>> commons/proper/csv/trunk/src/main/java/org/apache/commons/csv/Lexer.java >>>> Sat Oct 13 06:27:52 2012 >>>> @@ -32,14 +32,10 @@ import java.io.IOException; >>>> */ >>>> abstract class Lexer { >>>> >>>> - private final boolean isEncapsulating; >>>> - private final boolean isEscaping; >>>> - private final boolean isCommentEnabled; >>>> - >>>> - private final char delimiter; >>>> - private final char escape; >>>> - private final char encapsulator; >>>> - private final char commmentStart; >>>> + private final Character delimiter; >>>> + private final Character escape; >>>> + private final Character encapsulator; >>>> + private final Character commmentStart; >>>> >>>> final boolean surroundingSpacesIgnored; >>>> final boolean emptyLinesIgnored; >>>> @@ -52,9 +48,6 @@ abstract class Lexer { >>>> Lexer(final CSVFormat format, final ExtendedBufferedReader in) { >>>> this.format = format; >>>> this.in = in; >>>> - this.isEncapsulating = format.isEncapsulating(); >>>> - this.isEscaping = format.isEscaping(); >>>> - this.isCommentEnabled = format.isCommentingEnabled(); >>>> this.delimiter = format.getDelimiter(); >>>> this.escape = format.getEscape(); >>>> this.encapsulator = format.getEncapsulator(); >>>> @@ -144,14 +137,14 @@ abstract class Lexer { >>>> } >>>> >>>> boolean isEscape(final int c) { >>>> - return isEscaping && c == escape; >>>> + return escape != null && c == escape; >>>> } >>>> >>>> boolean isEncapsulator(final int c) { >>>> - return isEncapsulating && c == encapsulator; >>>> + return encapsulator != null && c == encapsulator; >>>> } >>>> >>>> boolean isCommentStart(final int c) { >>>> - return isCommentEnabled && c == commmentStart; >>>> + return commmentStart != null && c == commmentStart; >>>> } >>>> } >>>> >>>> Modified: >>>> commons/proper/csv/trunk/src/test/java/org/apache/commons/csv/CSVFormatTest.java >>>> URL: >>>> http://svn.apache.org/viewvc/commons/proper/csv/trunk/src/test/java/org/apache/commons/csv/CSVFormatTest.java?rev=1397783&r1=1397782&r2=1397783&view=diff >>>> ============================================================================== >>>> --- >>>> commons/proper/csv/trunk/src/test/java/org/apache/commons/csv/CSVFormatTest.java >>>> (original) >>>> +++ >>>> commons/proper/csv/trunk/src/test/java/org/apache/commons/csv/CSVFormatTest.java >>>> Sat Oct 13 06:27:52 2012 >>>> @@ -46,9 +46,9 @@ public class CSVFormatTest { >>>> format.withIgnoreEmptyLines(false); >>>> >>>> assertEquals('!', format.getDelimiter()); >>>> - assertEquals('!', format.getEncapsulator()); >>>> - assertEquals('!', format.getCommentStart()); >>>> - assertEquals('!', format.getEscape()); >>>> + assertEquals('!', format.getEncapsulator().charValue()); >>>> + assertEquals('!', format.getCommentStart().charValue()); >>>> + assertEquals('!', format.getEscape().charValue()); >>>> assertEquals(CRLF, format.getLineSeparator()); >>>> >>>> assertTrue(format.getIgnoreSurroundingSpaces()); >>>> @@ -60,10 +60,10 @@ public class CSVFormatTest { >>>> final CSVFormat format = new CSVFormat('!', '!', '!', '!', true, >>>> true, CRLF, null); >>>> >>>> assertEquals('?', format.withDelimiter('?').getDelimiter()); >>>> - assertEquals('?', format.withEncapsulator('?').getEncapsulator()); >>>> - assertEquals('?', format.withCommentStart('?').getCommentStart()); >>>> + assertEquals('?', >>>> format.withEncapsulator('?').getEncapsulator().charValue()); >>>> + assertEquals('?', >>>> format.withCommentStart('?').getCommentStart().charValue()); >>>> assertEquals("?", format.withLineSeparator("?").getLineSeparator()); >>>> - assertEquals('?', format.withEscape('?').getEscape()); >>>> + assertEquals('?', format.withEscape('?').getEscape().charValue()); >>>> >>>> >>>> assertFalse(format.withIgnoreSurroundingSpaces(false).getIgnoreSurroundingSpaces()); >>>> >>>> assertFalse(format.withIgnoreEmptyLines(false).getIgnoreEmptyLines()); >>>> @@ -131,7 +131,7 @@ public class CSVFormatTest { >>>> // expected >>>> } >>>> >>>> - >>>> format.withEncapsulator(CSVFormat.DISABLED).withCommentStart(CSVFormat.DISABLED).validate(); >>>> + format.withEncapsulator(null).withCommentStart(null).validate(); >>>> >>>> try { >>>> format.withEscape('!').withCommentStart('!').validate(); >>>> @@ -140,7 +140,7 @@ public class CSVFormatTest { >>>> // expected >>>> } >>>> >>>> - >>>> format.withEscape(CSVFormat.DISABLED).withCommentStart(CSVFormat.DISABLED).validate(); >>>> + format.withEscape(null).withCommentStart(null).validate(); >>>> >>>> >>>> try { >>>> >>>> >>> >>> --------------------------------------------------------------------- >>> To unsubscribe, e-mail: dev-unsubscr...@commons.apache.org >>> For additional commands, e-mail: dev-h...@commons.apache.org >>> >> >> --------------------------------------------------------------------- >> To unsubscribe, e-mail: dev-unsubscr...@commons.apache.org >> For additional commands, e-mail: dev-h...@commons.apache.org >> > > --------------------------------------------------------------------- > To unsubscribe, e-mail: dev-unsubscr...@commons.apache.org > For additional commands, e-mail: dev-h...@commons.apache.org > --------------------------------------------------------------------- To unsubscribe, e-mail: dev-unsubscr...@commons.apache.org For additional commands, e-mail: dev-h...@commons.apache.org