This is an automated email from the ASF dual-hosted git repository. ggregory pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/commons-csv.git
The following commit(s) were added to refs/heads/master by this push: new 21f4f58 Sort methods in AB order. 21f4f58 is described below commit 21f4f584ba1e7ebaef08dc797d9f102a896dd715 Author: Gary Gregory <garydgreg...@gmail.com> AuthorDate: Sun May 24 16:08:41 2020 -0400 Sort methods in AB order. --- .../java/org/apache/commons/csv/CSVParser.java | 44 +-- .../java/org/apache/commons/csv/CSVRecord.java | 22 +- .../apache/commons/csv/ExtendedBufferedReader.java | 128 ++++----- src/main/java/org/apache/commons/csv/Lexer.java | 316 ++++++++++----------- src/main/java/org/apache/commons/csv/Token.java | 6 +- 5 files changed, 258 insertions(+), 258 deletions(-) diff --git a/src/main/java/org/apache/commons/csv/CSVParser.java b/src/main/java/org/apache/commons/csv/CSVParser.java index 8345a15..bf6eb6d 100644 --- a/src/main/java/org/apache/commons/csv/CSVParser.java +++ b/src/main/java/org/apache/commons/csv/CSVParser.java @@ -185,6 +185,26 @@ public final class CSVParser implements Iterable<CSVRecord>, Closeable { } /** + * Header information based on name and position. + */ + private static final class Headers { + /** + * Header column positions (0-based) + */ + final Map<String, Integer> headerMap; + + /** + * Header names in column order + */ + final List<String> headerNames; + + Headers(final Map<String, Integer> headerMap, final List<String> headerNames) { + this.headerMap = headerMap; + this.headerNames = headerNames; + } + } + + /** * Creates a parser for the given {@link File}. * * @param file @@ -281,6 +301,8 @@ public final class CSVParser implements Iterable<CSVRecord>, Closeable { return new CSVParser(reader, format); } + // the following objects are shared to reduce garbage + /** * Creates a parser for the given {@link String}. * @@ -301,8 +323,6 @@ public final class CSVParser implements Iterable<CSVRecord>, Closeable { return new CSVParser(new StringReader(string), format); } - // the following objects are shared to reduce garbage - /** * Creates and returns a parser for the given URL, which the caller MUST close. * @@ -449,26 +469,6 @@ public final class CSVParser implements Iterable<CSVRecord>, Closeable { } /** - * Header information based on name and position. - */ - private static final class Headers { - /** - * Header column positions (0-based) - */ - final Map<String, Integer> headerMap; - - /** - * Header names in column order - */ - final List<String> headerNames; - - Headers(final Map<String, Integer> headerMap, final List<String> headerNames) { - this.headerMap = headerMap; - this.headerNames = headerNames; - } - } - - /** * Creates the name to index mapping if the format defines a header. * * @return null if the format has no header. diff --git a/src/main/java/org/apache/commons/csv/CSVRecord.java b/src/main/java/org/apache/commons/csv/CSVRecord.java index efd36bb..5181bc9 100644 --- a/src/main/java/org/apache/commons/csv/CSVRecord.java +++ b/src/main/java/org/apache/commons/csv/CSVRecord.java @@ -229,17 +229,6 @@ public final class CSVRecord implements Serializable, Iterable<String> { } /** - * Checks whether a given columns is mapped and has a value. - * - * @param name - * the name of the column to be retrieved. - * @return whether a given columns is mapped and has a value - */ - public boolean isSet(final String name) { - return isMapped(name) && getHeaderMapRaw().get(name).intValue() < values.length; - } - - /** * Checks whether a column with given index has a value. * * @param index @@ -251,6 +240,17 @@ public final class CSVRecord implements Serializable, Iterable<String> { } /** + * Checks whether a given columns is mapped and has a value. + * + * @param name + * the name of the column to be retrieved. + * @return whether a given columns is mapped and has a value + */ + public boolean isSet(final String name) { + return isMapped(name) && getHeaderMapRaw().get(name).intValue() < values.length; + } + + /** * Returns an iterator over the values of this record. * * @return an iterator over the values of this record. diff --git a/src/main/java/org/apache/commons/csv/ExtendedBufferedReader.java b/src/main/java/org/apache/commons/csv/ExtendedBufferedReader.java index 23a4042..b9ca79d 100644 --- a/src/main/java/org/apache/commons/csv/ExtendedBufferedReader.java +++ b/src/main/java/org/apache/commons/csv/ExtendedBufferedReader.java @@ -53,15 +53,31 @@ final class ExtendedBufferedReader extends BufferedReader { super(reader); } + /** + * Closes the stream. + * + * @throws IOException + * If an I/O error occurs + */ @Override - public int read() throws IOException { - final int current = super.read(); - if (current == CR || current == LF && lastChar != CR) { - eolCounter++; + public void close() throws IOException { + // Set ivars before calling super close() in case close() throws an IOException. + closed = true; + lastChar = END_OF_STREAM; + super.close(); + } + + /** + * Returns the current line number + * + * @return the current line number + */ + long getCurrentLineNumber() { + // Check if we are at EOL or EOF or just starting + if (lastChar == CR || lastChar == LF || lastChar == UNDEFINED || lastChar == END_OF_STREAM) { + return eolCounter; // counter is accurate } - lastChar = current; - this.position++; - return lastChar; + return eolCounter + 1; // Allow for counter being incremented only at EOL } /** @@ -76,6 +92,47 @@ final class ExtendedBufferedReader extends BufferedReader { return lastChar; } + /** + * Gets the character position in the reader. + * + * @return the current position in the reader (counting characters, not bytes since this is a Reader) + */ + long getPosition() { + return this.position; + } + + public boolean isClosed() { + return closed; + } + + /** + * Returns the next character in the current reader without consuming it. So the next call to {@link #read()} will + * still return this value. Does not affect line number or last character. + * + * @return the next character + * + * @throws IOException + * if there is an error in reading + */ + int lookAhead() throws IOException { + super.mark(1); + final int c = super.read(); + super.reset(); + + return c; + } + + @Override + public int read() throws IOException { + final int current = super.read(); + if (current == CR || current == LF && lastChar != CR) { + eolCounter++; + } + lastChar = current; + this.position++; + return lastChar; + } + @Override public int read(final char[] buf, final int offset, final int length) throws IOException { if (length == 0) { @@ -131,61 +188,4 @@ final class ExtendedBufferedReader extends BufferedReader { return line; } - /** - * Returns the next character in the current reader without consuming it. So the next call to {@link #read()} will - * still return this value. Does not affect line number or last character. - * - * @return the next character - * - * @throws IOException - * if there is an error in reading - */ - int lookAhead() throws IOException { - super.mark(1); - final int c = super.read(); - super.reset(); - - return c; - } - - /** - * Returns the current line number - * - * @return the current line number - */ - long getCurrentLineNumber() { - // Check if we are at EOL or EOF or just starting - if (lastChar == CR || lastChar == LF || lastChar == UNDEFINED || lastChar == END_OF_STREAM) { - return eolCounter; // counter is accurate - } - return eolCounter + 1; // Allow for counter being incremented only at EOL - } - - /** - * Gets the character position in the reader. - * - * @return the current position in the reader (counting characters, not bytes since this is a Reader) - */ - long getPosition() { - return this.position; - } - - public boolean isClosed() { - return closed; - } - - /** - * Closes the stream. - * - * @throws IOException - * If an I/O error occurs - */ - @Override - public void close() throws IOException { - // Set ivars before calling super close() in case close() throws an IOException. - closed = true; - lastChar = END_OF_STREAM; - super.close(); - } - } diff --git a/src/main/java/org/apache/commons/csv/Lexer.java b/src/main/java/org/apache/commons/csv/Lexer.java index b29fc14..2795ca2 100644 --- a/src/main/java/org/apache/commons/csv/Lexer.java +++ b/src/main/java/org/apache/commons/csv/Lexer.java @@ -60,10 +60,6 @@ final class Lexer implements Closeable { private final ExtendedBufferedReader reader; private String firstEol; - String getFirstEol(){ - return firstEol; - } - Lexer(final CSVFormat format, final ExtendedBufferedReader reader) { this.reader = reader; this.delimiter = format.getDelimiter(); @@ -75,6 +71,94 @@ final class Lexer implements Closeable { } /** + * Closes resources. + * + * @throws IOException + * If an I/O error occurs + */ + @Override + public void close() throws IOException { + reader.close(); + } + + /** + * Returns the current character position + * + * @return the current character position + */ + long getCharacterPosition() { + return reader.getPosition(); + } + + /** + * Returns the current line number + * + * @return the current line number + */ + long getCurrentLineNumber() { + return reader.getCurrentLineNumber(); + } + + String getFirstEol(){ + return firstEol; + } + + boolean isClosed() { + return reader.isClosed(); + } + + boolean isCommentStart(final int ch) { + return ch == commentStart; + } + + boolean isDelimiter(final int ch) { + return ch == delimiter; + } + + /** + * @return true if the given character indicates end of file + */ + boolean isEndOfFile(final int ch) { + return ch == END_OF_STREAM; + } + + boolean isEscape(final int ch) { + return ch == escape; + } + + private boolean isMetaChar(final int ch) { + return ch == delimiter || + ch == escape || + ch == quoteChar || + ch == commentStart; + } + + boolean isQuoteChar(final int ch) { + return ch == quoteChar; + } + + /** + * Checks if the current character represents the start of a line: a CR, LF or is at the start of the file. + * + * @param ch the character to check + * @return true if the character is at the start of a line. + */ + boolean isStartOfLine(final int ch) { + return ch == LF || ch == CR || ch == UNDEFINED; + } + + /** + * @return true if the given char is a whitespace character + */ + boolean isWhitespace(final int ch) { + return !isDelimiter(ch) && Character.isWhitespace((char) ch); + } + + private char mapNullToDisabled(final Character c) { + return c == null ? DISABLED : c.charValue(); + } + + /** * Returns the next token. * <p> * A token corresponds to a term, a record change or an end-of-file indicator. @@ -171,59 +255,6 @@ final class Lexer implements Closeable { } /** - * Parses a simple token. - * <p/> - * Simple token are tokens which are not surrounded by encapsulators. A simple token might contain escaped - * delimiters (as \, or \;). The token is finished when one of the following conditions become true: - * <ul> - * <li>end of line has been reached (EORECORD)</li> - * <li>end of stream has been reached (EOF)</li> - * <li>an unescaped delimiter has been reached (TOKEN)</li> - * </ul> - * - * @param token - * the current token - * @param ch - * the current character - * @return the filled token - * @throws IOException - * on stream access error - */ - private Token parseSimpleToken(final Token token, int ch) throws IOException { - // Faster to use while(true)+break than while(token.type == INVALID) - while (true) { - if (readEndOfLine(ch)) { - token.type = EORECORD; - break; - } else if (isEndOfFile(ch)) { - token.type = EOF; - token.isReady = true; // There is data at EOF - break; - } else if (isDelimiter(ch)) { - token.type = TOKEN; - break; - } else if (isEscape(ch)) { - final int unescaped = readEscape(); - if (unescaped == END_OF_STREAM) { // unexpected char after escape - token.content.append((char) ch).append((char) reader.getLastChar()); - } else { - token.content.append((char) unescaped); - } - ch = reader.read(); // continue - } else { - token.content.append((char) ch); - ch = reader.read(); // continue - } - } - - if (ignoreSurroundingSpaces) { - trimTrailingSpaces(token.content); - } - - return token; - } - - /** * Parses an encapsulated token. * <p/> * Encapsulated tokens are surrounded by the given encapsulating-string. The encapsulator itself might be included @@ -294,26 +325,84 @@ final class Lexer implements Closeable { } } - private char mapNullToDisabled(final Character c) { - return c == null ? DISABLED : c.charValue(); - } - /** - * Returns the current line number + * Parses a simple token. + * <p/> + * Simple token are tokens which are not surrounded by encapsulators. A simple token might contain escaped + * delimiters (as \, or \;). The token is finished when one of the following conditions become true: + * <ul> + * <li>end of line has been reached (EORECORD)</li> + * <li>end of stream has been reached (EOF)</li> + * <li>an unescaped delimiter has been reached (TOKEN)</li> + * </ul> * - * @return the current line number + * @param token + * the current token + * @param ch + * the current character + * @return the filled token + * @throws IOException + * on stream access error */ - long getCurrentLineNumber() { - return reader.getCurrentLineNumber(); + private Token parseSimpleToken(final Token token, int ch) throws IOException { + // Faster to use while(true)+break than while(token.type == INVALID) + while (true) { + if (readEndOfLine(ch)) { + token.type = EORECORD; + break; + } else if (isEndOfFile(ch)) { + token.type = EOF; + token.isReady = true; // There is data at EOF + break; + } else if (isDelimiter(ch)) { + token.type = TOKEN; + break; + } else if (isEscape(ch)) { + final int unescaped = readEscape(); + if (unescaped == END_OF_STREAM) { // unexpected char after escape + token.content.append((char) ch).append((char) reader.getLastChar()); + } else { + token.content.append((char) unescaped); + } + ch = reader.read(); // continue + } else { + token.content.append((char) ch); + ch = reader.read(); // continue + } + } + + if (ignoreSurroundingSpaces) { + trimTrailingSpaces(token.content); + } + + return token; } /** - * Returns the current character position + * Greedily accepts \n, \r and \r\n This checker consumes silently the second control-character... * - * @return the current character position + * @return true if the given or next character is a line-terminator */ - long getCharacterPosition() { - return reader.getPosition(); + boolean readEndOfLine(int ch) throws IOException { + // check if we have \r\n... + if (ch == CR && reader.lookAhead() == LF) { + // note: does not change ch outside of this method! + ch = reader.read(); + // Save the EOL state + if (firstEol == null) { + this.firstEol = Constants.CRLF; + } + } + // save EOL state here. + if (firstEol == null) { + if (ch == LF) { + this.firstEol = LF_STRING; + } else if (ch == CR) { + this.firstEol = CR_STRING; + } + } + + return ch == LF || ch == CR; } // TODO escape handling needs more work @@ -369,93 +458,4 @@ final class Lexer implements Closeable { buffer.setLength(length); } } - - /** - * Greedily accepts \n, \r and \r\n This checker consumes silently the second control-character... - * - * @return true if the given or next character is a line-terminator - */ - boolean readEndOfLine(int ch) throws IOException { - // check if we have \r\n... - if (ch == CR && reader.lookAhead() == LF) { - // note: does not change ch outside of this method! - ch = reader.read(); - // Save the EOL state - if (firstEol == null) { - this.firstEol = Constants.CRLF; - } - } - // save EOL state here. - if (firstEol == null) { - if (ch == LF) { - this.firstEol = LF_STRING; - } else if (ch == CR) { - this.firstEol = CR_STRING; - } - } - - return ch == LF || ch == CR; - } - - boolean isClosed() { - return reader.isClosed(); - } - - /** - * @return true if the given char is a whitespace character - */ - boolean isWhitespace(final int ch) { - return !isDelimiter(ch) && Character.isWhitespace((char) ch); - } - - /** - * Checks if the current character represents the start of a line: a CR, LF or is at the start of the file. - * - * @param ch the character to check - * @return true if the character is at the start of a line. - */ - boolean isStartOfLine(final int ch) { - return ch == LF || ch == CR || ch == UNDEFINED; - } - - /** - * @return true if the given character indicates end of file - */ - boolean isEndOfFile(final int ch) { - return ch == END_OF_STREAM; - } - - boolean isDelimiter(final int ch) { - return ch == delimiter; - } - - boolean isEscape(final int ch) { - return ch == escape; - } - - boolean isQuoteChar(final int ch) { - return ch == quoteChar; - } - - boolean isCommentStart(final int ch) { - return ch == commentStart; - } - - private boolean isMetaChar(final int ch) { - return ch == delimiter || - ch == escape || - ch == quoteChar || - ch == commentStart; - } - - /** - * Closes resources. - * - * @throws IOException - * If an I/O error occurs - */ - @Override - public void close() throws IOException { - reader.close(); - } } diff --git a/src/main/java/org/apache/commons/csv/Token.java b/src/main/java/org/apache/commons/csv/Token.java index 861e097..dff7d01 100644 --- a/src/main/java/org/apache/commons/csv/Token.java +++ b/src/main/java/org/apache/commons/csv/Token.java @@ -26,9 +26,6 @@ import static org.apache.commons.csv.Token.Type.INVALID; */ final class Token { - /** length of the initial token (content-)buffer */ - private static final int INITIAL_TOKEN_LENGTH = 50; - enum Type { /** Token has no valid content, i.e. is in its initialized state. */ INVALID, @@ -46,6 +43,9 @@ final class Token { COMMENT } + /** length of the initial token (content-)buffer */ + private static final int INITIAL_TOKEN_LENGTH = 50; + /** Token type */ Token.Type type = INVALID;