PHOENIX-105 Remove commons-csv source Remove the fork of the commons-csv source code and use the released commons-csv artifact.
Project: http://git-wip-us.apache.org/repos/asf/phoenix/repo Commit: http://git-wip-us.apache.org/repos/asf/phoenix/commit/c3697326 Tree: http://git-wip-us.apache.org/repos/asf/phoenix/tree/c3697326 Diff: http://git-wip-us.apache.org/repos/asf/phoenix/diff/c3697326 Branch: refs/heads/master Commit: c36973263296d3eb4b6ffdbf1a43e9f3e4fe5930 Parents: 4774c63 Author: Gabriel Reid <gabri...@ngdata.com> Authored: Tue Sep 2 10:03:13 2014 +0200 Committer: Gabriel Reid <gabri...@ngdata.com> Committed: Tue Sep 2 13:00:42 2014 +0200 ---------------------------------------------------------------------- .../src/build/components-major-client.xml | 1 + .../components/all-common-dependencies.xml | 3 +- phoenix-core/pom.xml | 4 + .../java/org/apache/commons/csv/Assertions.java | 36 - .../java/org/apache/commons/csv/CSVFormat.java | 884 ------------------- .../java/org/apache/commons/csv/CSVParser.java | 470 ---------- .../java/org/apache/commons/csv/CSVPrinter.java | 429 --------- .../java/org/apache/commons/csv/CSVRecord.java | 225 ----- .../java/org/apache/commons/csv/Constants.java | 68 -- .../commons/csv/ExtendedBufferedReader.java | 178 ---- .../main/java/org/apache/commons/csv/Lexer.java | 431 --------- .../main/java/org/apache/commons/csv/Quote.java | 48 - .../main/java/org/apache/commons/csv/Token.java | 75 -- .../org/apache/commons/csv/package-info.java | 82 -- .../apache/phoenix/util/CSVCommonsLoader.java | 6 +- pom.xml | 7 +- 16 files changed, 16 insertions(+), 2931 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/phoenix/blob/c3697326/phoenix-assembly/src/build/components-major-client.xml ---------------------------------------------------------------------- diff --git a/phoenix-assembly/src/build/components-major-client.xml b/phoenix-assembly/src/build/components-major-client.xml index 08083da..f1f37c2 100644 --- a/phoenix-assembly/src/build/components-major-client.xml +++ b/phoenix-assembly/src/build/components-major-client.xml @@ -45,6 +45,7 @@ <include>commons-logging:commons-logging</include> <include>commons-lang:commons-lang</include> <include>commons-cli:commons-cli</include> + <include>org.apache.commons:commons-csv</include> <include>org.codehaus.jackson:jackson-mapper-asl</include> <include>org.codehaus.jackson:jackson-core-asl</include> <include>org.xerial.snappy:snappy-java</include> http://git-wip-us.apache.org/repos/asf/phoenix/blob/c3697326/phoenix-assembly/src/build/components/all-common-dependencies.xml ---------------------------------------------------------------------- diff --git a/phoenix-assembly/src/build/components/all-common-dependencies.xml b/phoenix-assembly/src/build/components/all-common-dependencies.xml index 7d943f6..df4a32f 100644 --- a/phoenix-assembly/src/build/components/all-common-dependencies.xml +++ b/phoenix-assembly/src/build/components/all-common-dependencies.xml @@ -32,6 +32,7 @@ <include>commons-io:commons-io</include> <include>commons-lang:commons-lang</include> <include>commons-logging:commons-logging</include> + <include>org.apache.commons:commons-csv</include> <include>com.google.guava:guava</include> <include>org.apache.hadoop:hadoop*</include> <include>com.google.protobuf:protobuf-java</include> @@ -59,4 +60,4 @@ </includes> </dependencySet> </dependencySets> -</component> \ No newline at end of file +</component> http://git-wip-us.apache.org/repos/asf/phoenix/blob/c3697326/phoenix-core/pom.xml ---------------------------------------------------------------------- diff --git a/phoenix-core/pom.xml b/phoenix-core/pom.xml index 73b27bf..d5774ea 100644 --- a/phoenix-core/pom.xml +++ b/phoenix-core/pom.xml @@ -321,6 +321,10 @@ <groupId>commons-collections</groupId> <artifactId>commons-collections</artifactId> </dependency> + <dependency> + <groupId>org.apache.commons</groupId> + <artifactId>commons-csv</artifactId> + </dependency> </dependencies> <profiles> http://git-wip-us.apache.org/repos/asf/phoenix/blob/c3697326/phoenix-core/src/main/java/org/apache/commons/csv/Assertions.java ---------------------------------------------------------------------- diff --git a/phoenix-core/src/main/java/org/apache/commons/csv/Assertions.java b/phoenix-core/src/main/java/org/apache/commons/csv/Assertions.java deleted file mode 100644 index 63c330a..0000000 --- a/phoenix-core/src/main/java/org/apache/commons/csv/Assertions.java +++ /dev/null @@ -1,36 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.commons.csv; - -/** - * Utility class for input parameter validation - * - * @version $Id: Assertions.java 1559908 2014-01-21 02:44:30Z ggregory $ - */ -final class Assertions { - - private Assertions() { - // can not be instantiated - } - - public static void notNull(final Object parameter, final String parameterName) { - if (parameter == null) { - throw new IllegalArgumentException("Parameter '" + parameterName + "' must not be null!"); - } - } -} http://git-wip-us.apache.org/repos/asf/phoenix/blob/c3697326/phoenix-core/src/main/java/org/apache/commons/csv/CSVFormat.java ---------------------------------------------------------------------- diff --git a/phoenix-core/src/main/java/org/apache/commons/csv/CSVFormat.java b/phoenix-core/src/main/java/org/apache/commons/csv/CSVFormat.java deleted file mode 100644 index 88c2a7f..0000000 --- a/phoenix-core/src/main/java/org/apache/commons/csv/CSVFormat.java +++ /dev/null @@ -1,884 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.commons.csv; - -import static org.apache.commons.csv.Constants.BACKSLASH; -import static org.apache.commons.csv.Constants.COMMA; -import static org.apache.commons.csv.Constants.CR; -import static org.apache.commons.csv.Constants.CRLF; -import static org.apache.commons.csv.Constants.DOUBLE_QUOTE_CHAR; -import static org.apache.commons.csv.Constants.LF; -import static org.apache.commons.csv.Constants.TAB; - -import java.io.IOException; -import java.io.Reader; -import java.io.Serializable; -import java.io.StringWriter; -import java.util.Arrays; -import java.util.HashSet; -import java.util.Set; - -/** - * Specifies the format of a CSV file and parses input. - * - * <h4>Using predefined formats</h4> - * - * <p> - * You can use one of the predefined formats: - * </p> - * - * <ul> - * <li>{@link #DEFAULT}</li> - * <li>{@link #EXCEL}</li> - * <li>{@link #MYSQL}</li> - * <li>{@link #RFC4180}</li> - * <li>{@link #TDF}</li> - * </ul> - * - * <p> - * For example: - * </p> - * - * <pre> - * CSVParser parser = CSVFormat.EXCEL.parse(reader); - * </pre> - * - * <p> - * The {@link CSVRecord} provides static methods to parse other input types, for example: - * </p> - * - * <pre>CSVParser parser = CSVFormat.parseFile(file, CSVFormat.EXCEL);</pre> - * - * <h4>Defining formats</h4> - * - * <p> - * You can extend a format by calling the {@code with} methods. For example: - * </p> - * - * <pre> - * CSVFormat.EXCEL - * .withNullString("N/A") - * .withIgnoreSurroundingSpaces(true); - * </pre> - * - * <h4>Defining column names</h4> - * - * <p> - * To define the column names you want to use to access records, write: - * </p> - * - * <pre> - * CSVFormat.EXCEL.withHeader("Col1", "Col2", "Col3"); - * </pre> - * - * <p> - * Calling {@link #withHeader(String...)} let's you use the given names to address values in a {@link CSVRecord}, and - * assumes that your CSV source does not contain a first record that also defines column names. - * - * If it does, then you are overriding this metadata with your names and you should skip the first record by calling - * {@link #withSkipHeaderRecord(boolean)} with {@code true}. - * </p> - * - * <h4>Parsing</h4> - * - * <p> - * You can use a format directly to parse a reader. For example, to parse an Excel file with columns header, write: - * </p> - * - * <pre> - * Reader in = ...; - * CSVFormat.EXCEL.withHeader("Col1", "Col2", "Col3").parse(in); - * </pre> - * - * <p> - * For other input types, like resources, files, and URLs, use the static methods on {@link CSVParser}. - * </p> - * - * <h4>Referencing columns safely</h4> - * - * <p> - * If your source contains a header record, you can simplify your code and safely reference columns, - * by using {@link #withHeader(String...)} with no arguments: - * </p> - * - * <pre> - * CSVFormat.EXCEL.withHeader(); - * </pre> - * - * <p> - * This causes the parser to read the first record and use its values as column names. - * - * Then, call one of the {@link CSVRecord} get method that takes a String column name argument: - * </p> - * - * <pre> - * String value = record.get("Col1"); - * </pre> - * - * <p> - * This makes your code impervious to changes in column order in the CSV file. - * </p> - * - * <h4>Notes</h4> - * - * <p> - * This class is immutable. - * </p> - * - * @version $Id: CSVFormat.java 1559908 2014-01-21 02:44:30Z ggregory $ - */ -public final class CSVFormat implements Serializable { - - private static final long serialVersionUID = 1L; - - private final char delimiter; - private final Character quoteChar; // null if quoting is disabled - private final Quote quotePolicy; - private final Character commentStart; // null if commenting is disabled - private final Character escape; // null if escaping is disabled - private final boolean ignoreSurroundingSpaces; // Should leading/trailing spaces be ignored around values? - private final boolean ignoreEmptyLines; - private final String recordSeparator; // for outputs - private final String nullString; // the string to be used for null values - private final String[] header; - private final boolean skipHeaderRecord; - - /** - * Standard comma separated format, as for {@link #RFC4180} but allowing empty lines. - * <h3>RFC 4180:</h3> - * <ul> - * <li>withDelimiter(',')</li> - * <li>withQuoteChar('"')</li> - * <li>withRecordSeparator(CRLF)</li> - * </ul> - * <h3>Additional:</h3> - * <ul> - * <li>withIgnoreEmptyLines(true)</li> - * </ul> - */ - public static final CSVFormat DEFAULT = new CSVFormat(COMMA, DOUBLE_QUOTE_CHAR, null, null, null, - false, true, CRLF, null, null, false); - - /** - * Comma separated format as defined by <a href="http://tools.ietf.org/html/rfc4180">RFC 4180</a>. - * <h3>RFC 4180:</h3> - * <ul> - * <li>withDelimiter(',')</li> - * <li>withQuoteChar('"')</li> - * <li>withRecordSeparator(CRLF)</li> - * </ul> - */ - public static final CSVFormat RFC4180 = DEFAULT.withIgnoreEmptyLines(false); - - /** - * Excel file format (using a comma as the value delimiter). Note that the actual value delimiter used by Excel is - * locale dependent, it might be necessary to customize this format to accommodate to your regional settings. - * <p/> - * For example for parsing or generating a CSV file on a French system the following format will be used: - * - * <pre> - * CSVFormat fmt = CSVFormat.newBuilder(EXCEL).withDelimiter(';'); - * </pre> - * Settings are: - * <ul> - * <li>withDelimiter(',')</li> - * <li>withQuoteChar('"')</li> - * <li>withRecordSeparator(CRLF)</li> - * </ul> - * Note: this is currently the same as RFC4180 - */ - public static final CSVFormat EXCEL = DEFAULT.withIgnoreEmptyLines(false); - - /** Tab-delimited format, with quote; leading and trailing spaces ignored. */ - public static final CSVFormat TDF = - DEFAULT - .withDelimiter(TAB) - .withIgnoreSurroundingSpaces(true); - - /** - * Default MySQL format used by the <tt>SELECT INTO OUTFILE</tt> and <tt>LOAD DATA INFILE</tt> operations. This is - * a tab-delimited format with a LF character as the line separator. Values are not quoted and special characters - * are escaped with '\'. - * - * @see <a href="http://dev.mysql.com/doc/refman/5.1/en/load-data.html"> - * http://dev.mysql.com/doc/refman/5.1/en/load-data.html</a> - */ - public static final CSVFormat MYSQL = - DEFAULT - .withDelimiter(TAB) - .withEscape(BACKSLASH) - .withIgnoreEmptyLines(false) - .withQuoteChar(null) - .withRecordSeparator(LF); - - /** - * Returns true if the given character is a line break character. - * - * @param c - * the character to check - * - * @return true if <code>c</code> is a line break character - */ - private static boolean isLineBreak(final char c) { - return c == LF || c == CR; - } - - /** - * Returns true if the given character is a line break character. - * - * @param c - * the character to check, may be null - * - * @return true if <code>c</code> is a line break character (and not null) - */ - private static boolean isLineBreak(final Character c) { - return c != null && isLineBreak(c.charValue()); - } - - /** - * Creates a new CSV format with the specified delimiter. - * - * @param delimiter - * the char used for value separation, must not be a line break character - * @return a new CSV format. - * @throws IllegalArgumentException if the delimiter is a line break character - */ - public static CSVFormat newFormat(final char delimiter) { - return new CSVFormat(delimiter, null, null, null, null, false, false, null, null, null, false); - } - - /** - * Creates a customized CSV format. - * - * @param delimiter - * the char used for value separation, must not be a line break character - * @param quoteChar - * the Character used as value encapsulation marker, may be {@code null} to disable - * @param quotePolicy - * the quote policy - * @param commentStart - * the Character used for comment identification, may be {@code null} to disable - * @param escape - * the Character used to escape special characters in values, may be {@code null} to disable - * @param ignoreSurroundingSpaces - * <tt>true</tt> when whitespaces enclosing values should be ignored - * @param ignoreEmptyLines - * <tt>true</tt> when the parser should skip empty lines - * @param recordSeparator - * the line separator to use for output - * @param nullString - * the line separator to use for output - * @param header - * the header - * @param skipHeaderRecord TODO - * @throws IllegalArgumentException if the delimiter is a line break character - */ - // package protected to give access without needing a synthetic accessor - CSVFormat(final char delimiter, final Character quoteChar, - final Quote quotePolicy, final Character commentStart, - final Character escape, final boolean ignoreSurroundingSpaces, - final boolean ignoreEmptyLines, final String recordSeparator, - final String nullString, final String[] header, final boolean skipHeaderRecord) { - if (isLineBreak(delimiter)) { - throw new IllegalArgumentException("The delimiter cannot be a line break"); - } - this.delimiter = delimiter; - this.quoteChar = quoteChar; - this.quotePolicy = quotePolicy; - this.commentStart = commentStart; - this.escape = escape; - this.ignoreSurroundingSpaces = ignoreSurroundingSpaces; - this.ignoreEmptyLines = ignoreEmptyLines; - this.recordSeparator = recordSeparator; - this.nullString = nullString; - this.header = header == null ? null : header.clone(); - this.skipHeaderRecord = skipHeaderRecord; - } - - @Override - public boolean equals(final Object obj) { - if (this == obj) { - return true; - } - if (obj == null) { - return false; - } - if (getClass() != obj.getClass()) { - return false; - } - - final CSVFormat other = (CSVFormat) obj; - if (delimiter != other.delimiter) { - return false; - } - if (quotePolicy != other.quotePolicy) { - return false; - } - if (quoteChar == null) { - if (other.quoteChar != null) { - return false; - } - } else if (!quoteChar.equals(other.quoteChar)) { - return false; - } - if (commentStart == null) { - if (other.commentStart != null) { - return false; - } - } else if (!commentStart.equals(other.commentStart)) { - return false; - } - if (escape == null) { - if (other.escape != null) { - return false; - } - } else if (!escape.equals(other.escape)) { - return false; - } - if (!Arrays.equals(header, other.header)) { - return false; - } - if (ignoreSurroundingSpaces != other.ignoreSurroundingSpaces) { - return false; - } - if (ignoreEmptyLines != other.ignoreEmptyLines) { - return false; - } - if (recordSeparator == null) { - if (other.recordSeparator != null) { - return false; - } - } else if (!recordSeparator.equals(other.recordSeparator)) { - return false; - } - return true; - } - - /** - * Formats the specified values. - * - * @param values - * the values to format - * @return the formatted values - */ - public String format(final Object... values) { - final StringWriter out = new StringWriter(); - try { - new CSVPrinter(out, this).printRecord(values); - return out.toString().trim(); - } catch (final IOException e) { - // should not happen because a StringWriter does not do IO. - throw new IllegalStateException(e); - } - } - - /** - * Returns the character marking the start of a line comment. - * - * @return the comment start marker, may be {@code null} - */ - public Character getCommentStart() { - return commentStart; - } - - /** - * Returns the character delimiting the values (typically ';', ',' or '\t'). - * - * @return the delimiter character - */ - public char getDelimiter() { - return delimiter; - } - - /** - * Returns the escape character. - * - * @return the escape character, may be {@code null} - */ - public Character getEscape() { - return escape; - } - - /** - * Returns a copy of the header array. - * - * @return a copy of the header array - */ - public String[] getHeader() { - return header != null ? header.clone() : null; - } - - /** - * Specifies whether empty lines between records are ignored when parsing input. - * - * @return <tt>true</tt> if empty lines between records are ignored, <tt>false</tt> if they are turned into empty - * records. - */ - public boolean getIgnoreEmptyLines() { - return ignoreEmptyLines; - } - - /** - * Specifies whether spaces around values are ignored when parsing input. - * - * @return <tt>true</tt> if spaces around values are ignored, <tt>false</tt> if they are treated as part of the - * value. - */ - public boolean getIgnoreSurroundingSpaces() { - return ignoreSurroundingSpaces; - } - - /** - * Gets the String to convert to and from {@code null}. - * <ul> - * <li> - * <strong>Reading:</strong> Converts strings equal to the given {@code nullString} to {@code null} when reading - * records. - * </li> - * <li> - * <strong>Writing:</strong> Writes {@code null} as the given {@code nullString} when writing records.</li> - * </ul> - * - * @return the String to convert to and from {@code null}. No substitution occurs if {@code null} - */ - public String getNullString() { - return nullString; - } - - /** - * Returns the character used to encapsulate values containing special characters. - * - * @return the quoteChar character, may be {@code null} - */ - public Character getQuoteChar() { - return quoteChar; - } - - /** - * Returns the quote policy output fields. - * - * @return the quote policy - */ - public Quote getQuotePolicy() { - return quotePolicy; - } - - /** - * Returns the line separator delimiting output records. - * - * @return the line separator - */ - public String getRecordSeparator() { - return recordSeparator; - } - - /** - * Returns whether to skip the header record. - * - * @return whether to skip the header record. - */ - public boolean getSkipHeaderRecord() { - return skipHeaderRecord; - } - - @Override - public int hashCode() - { - final int prime = 31; - int result = 1; - - result = prime * result + delimiter; - result = prime * result + ((quotePolicy == null) ? 0 : quotePolicy.hashCode()); - result = prime * result + ((quoteChar == null) ? 0 : quoteChar.hashCode()); - result = prime * result + ((commentStart == null) ? 0 : commentStart.hashCode()); - result = prime * result + ((escape == null) ? 0 : escape.hashCode()); - result = prime * result + (ignoreSurroundingSpaces ? 1231 : 1237); - result = prime * result + (ignoreEmptyLines ? 1231 : 1237); - result = prime * result + ((recordSeparator == null) ? 0 : recordSeparator.hashCode()); - result = prime * result + Arrays.hashCode(header); - return result; - } - - /** - * Specifies whether comments are supported by this format. - * - * Note that the comment introducer character is only recognized at the start of a line. - * - * @return <tt>true</tt> is comments are supported, <tt>false</tt> otherwise - */ - public boolean isCommentingEnabled() { - return commentStart != null; - } - - /** - * Returns whether escape are being processed. - * - * @return {@code true} if escapes are processed - */ - public boolean isEscaping() { - return escape != null; - } - - /** - * Returns whether a nullString has been defined. - * - * @return {@code true} if a nullString is defined - */ - public boolean isNullHandling() { - return nullString != null; - } - - /** - * Returns whether a quoteChar has been defined. - * - * @return {@code true} if a quoteChar is defined - */ - public boolean isQuoting() { - return quoteChar != null; - } - - /** - * Parses the specified content. - * - * <p> - * See also the various static parse methods on {@link CSVParser}. - * </p> - * - * @param in - * the input stream - * @return a parser over a stream of {@link CSVRecord}s. - * @throws IOException - * If an I/O error occurs - */ - public CSVParser parse(final Reader in) throws IOException { - return new CSVParser(in, this); - } - - @Override - public String toString() { - final StringBuilder sb = new StringBuilder(); - sb.append("Delimiter=<").append(delimiter).append('>'); - if (isEscaping()) { - sb.append(' '); - sb.append("Escape=<").append(escape).append('>'); - } - if (isQuoting()) { - sb.append(' '); - sb.append("QuoteChar=<").append(quoteChar).append('>'); - } - if (isCommentingEnabled()) { - sb.append(' '); - sb.append("CommentStart=<").append(commentStart).append('>'); - } - if (isNullHandling()) { - sb.append(' '); - sb.append("NullString=<").append(nullString).append('>'); - } - if(recordSeparator != null) { - sb.append(' '); - sb.append("RecordSeparator=<").append(recordSeparator).append('>'); - } - if (getIgnoreEmptyLines()) { - sb.append(" EmptyLines:ignored"); - } - if (getIgnoreSurroundingSpaces()) { - sb.append(" SurroundingSpaces:ignored"); - } - sb.append(" SkipHeaderRecord:").append(skipHeaderRecord); - if (header != null) { - sb.append(' '); - sb.append("Header:").append(Arrays.toString(header)); - } - return sb.toString(); - } - - /** - * Verifies the consistency of the parameters and throws an IllegalStateException if necessary. - * - * @throws IllegalStateException - */ - void validate() throws IllegalStateException { - if (quoteChar != null && delimiter == quoteChar.charValue()) { - throw new IllegalStateException( - "The quoteChar character and the delimiter cannot be the same ('" + quoteChar + "')"); - } - - if (escape != null && delimiter == escape.charValue()) { - throw new IllegalStateException( - "The escape character and the delimiter cannot be the same ('" + escape + "')"); - } - - if (commentStart != null && delimiter == commentStart.charValue()) { - throw new IllegalStateException( - "The comment start character and the delimiter cannot be the same ('" + commentStart + "')"); - } - - if (quoteChar != null && quoteChar.equals(commentStart)) { - throw new IllegalStateException( - "The comment start character and the quoteChar cannot be the same ('" + commentStart + "')"); - } - - if (escape != null && escape.equals(commentStart)) { - throw new IllegalStateException( - "The comment start and the escape character cannot be the same ('" + commentStart + "')"); - } - - if (escape == null && quotePolicy == Quote.NONE) { - throw new IllegalStateException("No quotes mode set but no escape character is set"); - } - - if (header != null) { - final Set<String> set = new HashSet<String>(header.length); - set.addAll(Arrays.asList(header)); - if (set.size() != header.length) { - throw new IllegalStateException("The header contains duplicate names: " + Arrays.toString(header)); - } - } - } - - /** - * Sets the comment start marker of the format to the specified character. - * - * Note that the comment start character is only recognized at the start of a line. - * - * @param commentStart - * the comment start marker - * @return A new CSVFormat that is equal to this one but with the specified character as the comment start marker - * @throws IllegalArgumentException - * thrown if the specified character is a line break - */ - public CSVFormat withCommentStart(final char commentStart) { - return withCommentStart(Character.valueOf(commentStart)); - } - - /** - * Sets the comment start marker of the format to the specified character. - * - * Note that the comment start character is only recognized at the start of a line. - * - * @param commentStart - * the comment start marker, use {@code null} to disable - * @return A new CSVFormat that is equal to this one but with the specified character as the comment start marker - * @throws IllegalArgumentException - * thrown if the specified character is a line break - */ - public CSVFormat withCommentStart(final Character commentStart) { - if (isLineBreak(commentStart)) { - throw new IllegalArgumentException("The comment start character cannot be a line break"); - } - return new CSVFormat(delimiter, quoteChar, quotePolicy, commentStart, escape, - ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, header, skipHeaderRecord); - } - - /** - * Sets the delimiter of the format to the specified character. - * - * @param delimiter - * the delimiter character - * @return A new CSVFormat that is equal to this with the specified character as delimiter - * @throws IllegalArgumentException - * thrown if the specified character is a line break - */ - public CSVFormat withDelimiter(final char delimiter) { - if (isLineBreak(delimiter)) { - throw new IllegalArgumentException("The delimiter cannot be a line break"); - } - return new CSVFormat(delimiter, quoteChar, quotePolicy, commentStart, escape, - ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, header, skipHeaderRecord); - } - - /** - * Sets the escape character of the format to the specified character. - * - * @param escape - * the escape character - * @return A new CSVFormat that is equal to his but with the specified character as the escape character - * @throws IllegalArgumentException - * thrown if the specified character is a line break - */ - public CSVFormat withEscape(final char escape) { - return withEscape(Character.valueOf(escape)); - } - - /** - * Sets the escape character of the format to the specified character. - * - * @param escape - * the escape character, use {@code null} to disable - * @return A new CSVFormat that is equal to this but with the specified character as the escape character - * @throws IllegalArgumentException - * thrown if the specified character is a line break - */ - public CSVFormat withEscape(final Character escape) { - if (isLineBreak(escape)) { - throw new IllegalArgumentException("The escape character cannot be a line break"); - } - return new CSVFormat(delimiter, quoteChar, quotePolicy, commentStart, escape, - ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, header, skipHeaderRecord); - } - - /** - * Sets the header of the format. The header can either be parsed automatically from the input file with: - * - * <pre> - * CSVFormat format = aformat.withHeader();</pre> - * - * or specified manually with: - * - * <pre> - * CSVFormat format = aformat.withHeader("name", "email", "phone");</pre> - * - * @param header - * the header, <tt>null</tt> if disabled, empty if parsed automatically, user specified otherwise. - * - * @return A new CSVFormat that is equal to this but with the specified header - * @see #withSkipHeaderRecord(boolean) - */ - public CSVFormat withHeader(final String... header) { - return new CSVFormat(delimiter, quoteChar, quotePolicy, commentStart, escape, - ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, header, skipHeaderRecord); - } - - /** - * Sets the empty line skipping behavior of the format. - * - * @param ignoreEmptyLines - * the empty line skipping behavior, <tt>true</tt> to ignore the empty lines between the records, - * <tt>false</tt> to translate empty lines to empty records. - * @return A new CSVFormat that is equal to this but with the specified empty line skipping behavior. - */ - public CSVFormat withIgnoreEmptyLines(final boolean ignoreEmptyLines) { - return new CSVFormat(delimiter, quoteChar, quotePolicy, commentStart, escape, - ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, header, skipHeaderRecord); - } - - /** - * Sets the trimming behavior of the format. - * - * @param ignoreSurroundingSpaces - * the trimming behavior, <tt>true</tt> to remove the surrounding spaces, <tt>false</tt> to leave the - * spaces as is. - * @return A new CSVFormat that is equal to this but with the specified trimming behavior. - */ - public CSVFormat withIgnoreSurroundingSpaces(final boolean ignoreSurroundingSpaces) { - return new CSVFormat(delimiter, quoteChar, quotePolicy, commentStart, escape, - ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, header, skipHeaderRecord); - } - - /** - * Performs conversions to and from null for strings on input and output. - * <ul> - * <li> - * <strong>Reading:</strong> Converts strings equal to the given {@code nullString} to {@code null} when reading - * records.</li> - * <li> - * <strong>Writing:</strong> Writes {@code null} as the given {@code nullString} when writing records.</li> - * </ul> - * - * @param nullString - * the String to convert to and from {@code null}. No substitution occurs if {@code null} - * - * @return A new CSVFormat that is equal to this but with the specified null conversion string. - */ - public CSVFormat withNullString(final String nullString) { - return new CSVFormat(delimiter, quoteChar, quotePolicy, commentStart, escape, - ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, header, skipHeaderRecord); - } - - /** - * Sets the quoteChar of the format to the specified character. - * - * @param quoteChar - * the quoteChar character - * @return A new CSVFormat that is equal to this but with the specified character as quoteChar - * @throws IllegalArgumentException - * thrown if the specified character is a line break - */ - public CSVFormat withQuoteChar(final char quoteChar) { - return withQuoteChar(Character.valueOf(quoteChar)); - } - - /** - * Sets the quoteChar of the format to the specified character. - * - * @param quoteChar - * the quoteChar character, use {@code null} to disable - * @return A new CSVFormat that is equal to this but with the specified character as quoteChar - * @throws IllegalArgumentException - * thrown if the specified character is a line break - */ - public CSVFormat withQuoteChar(final Character quoteChar) { - if (isLineBreak(quoteChar)) { - throw new IllegalArgumentException("The quoteChar cannot be a line break"); - } - return new CSVFormat(delimiter, quoteChar, quotePolicy, commentStart, escape, - ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, header, skipHeaderRecord); - } - - /** - * Sets the output quote policy of the format to the specified value. - * - * @param quotePolicy - * the quote policy to use for output. - * - * @return A new CSVFormat that is equal to this but with the specified quote policy - */ - public CSVFormat withQuotePolicy(final Quote quotePolicy) { - return new CSVFormat(delimiter, quoteChar, quotePolicy, commentStart, escape, - ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, header, skipHeaderRecord); - } - - /** - * Sets the record separator of the format to the specified character. - * - * @param recordSeparator - * the record separator to use for output. - * - * @return A new CSVFormat that is equal to this but with the the specified output record separator - */ - public CSVFormat withRecordSeparator(final char recordSeparator) { - return withRecordSeparator(String.valueOf(recordSeparator)); - } - - /** - * Sets the record separator of the format to the specified String. - * - * @param recordSeparator - * the record separator to use for output. - * - * @return A new CSVFormat that is equal to this but with the the specified output record separator - */ - public CSVFormat withRecordSeparator(final String recordSeparator) { - return new CSVFormat(delimiter, quoteChar, quotePolicy, commentStart, escape, - ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, header, skipHeaderRecord); - } - - /** - * Sets whether to skip the header record. - * - * @param skipHeaderRecord - * whether to skip the header record. - * - * @return A new CSVFormat that is equal to this but with the the specified skipHeaderRecord setting. - * @see #withHeader(String...) - */ - public CSVFormat withSkipHeaderRecord(final boolean skipHeaderRecord) { - return new CSVFormat(delimiter, quoteChar, quotePolicy, commentStart, escape, - ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, header, skipHeaderRecord); - } -} http://git-wip-us.apache.org/repos/asf/phoenix/blob/c3697326/phoenix-core/src/main/java/org/apache/commons/csv/CSVParser.java ---------------------------------------------------------------------- diff --git a/phoenix-core/src/main/java/org/apache/commons/csv/CSVParser.java b/phoenix-core/src/main/java/org/apache/commons/csv/CSVParser.java deleted file mode 100644 index 19c7f40..0000000 --- a/phoenix-core/src/main/java/org/apache/commons/csv/CSVParser.java +++ /dev/null @@ -1,470 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.commons.csv; - -import static org.apache.commons.csv.Token.Type.TOKEN; - -import java.io.Closeable; -import java.io.File; -import java.io.FileReader; -import java.io.IOException; -import java.io.InputStreamReader; -import java.io.Reader; -import java.io.StringReader; -import java.net.URL; -import java.nio.charset.Charset; -import java.util.ArrayList; -import java.util.Iterator; -import java.util.LinkedHashMap; -import java.util.List; -import java.util.Map; -import java.util.NoSuchElementException; - -/** - * Parses CSV files according to the specified format. - * - * Because CSV appears in many different dialects, the parser supports many formats by allowing the - * specification of a {@link CSVFormat}. - * - * The parser works record wise. It is not possible to go back, once a record has been parsed from the input stream. - * - * <h4>Creating instances</h4> - * There are several static factory methods that can be used to create instances for various types of resources: - * <p> - * <ul> - * <li>{@link #parse(java.io.File, CSVFormat)}</li> - * <li>{@link #parse(String, CSVFormat)}</li> - * <li>{@link #parse(java.net.URL, java.nio.charset.Charset, CSVFormat)}</li> - * </ul> - * </p> - * <p> - * Alternatively parsers can also be created by passing a {@link Reader} directly to the sole constructor. - * - * For those who like fluent APIs, parsers can be created using {@link CSVFormat#parse(java.io.Reader)} as a shortcut: - * </p> - * <pre> - * for(CSVRecord record : CSVFormat.EXCEL.parse(in)) { - * ... - * } - * </pre> - * - * <h4>Parsing record wise</h4> - * <p> - * To parse a CSV input from a file, you write: - * </p> - * - * <pre> - * File csvData = new File("/path/to/csv"); - * CSVParser parser = CSVParser.parse(csvData, CSVFormat.RFC4180); - * for (CSVRecord csvRecord : parser) { - * ... - * } - * </pre> - * - * <p> - * This will read the parse the contents of the file using the - * <a href="http://tools.ietf.org/html/rfc4180" target="_blank">RFC 4180</a> format. - * </p> - * - * <p> - * To parse CSV input in a format like Excel, you write: - * </p> - * - * <pre> - * CSVParser parser = CSVParser.parse(csvData, CSVFormat.EXCEL); - * for (CSVRecord csvRecord : parser) { - * ... - * } - * </pre> - * - * <p> - * If the predefined formats don't match the format at hands, custom formats can be defined. More information about - * customising CSVFormats is available in {@link CSVFormat CSVFormat JavaDoc}. - * </p> - * - * <h4>Parsing into memory</h4> - * <p> - * If parsing record wise is not desired, the contents of the input can be read completely into memory. - * </p> - * - * <pre> - * Reader in = new StringReader("a;b\nc;d"); - * CSVParser parser = new CSVParser(in, CSVFormat.EXCEL); - * List<CSVRecord> list = parser.getRecords(); - * </pre> - * - * <p> - * There are two constraints that have to be kept in mind: - * </p> - * - * <p> - * <ol> - * <li>Parsing into memory starts at the current position of the parser. If you have already parsed records from - * the input, those records will not end up in the in memory representation of your CSV data.</li> - * <li>Parsing into memory may consume a lot of system resources depending on the input. For example if you're - * parsing a 150MB file of CSV data the contents will be read completely into memory.</li> - * </ol> - * </p> - * - * <h4>Notes</h4> - * <p> - * Internal parser state is completely covered by the format and the reader-state. - * </p> - * - * @version $Id: CSVParser.java 1559908 2014-01-21 02:44:30Z ggregory $ - * - * @see <a href="package-summary.html">package documentation for more details</a> - */ -public final class CSVParser implements Iterable<CSVRecord>, Closeable { - - /** - * Creates a parser for the given {@link File}. - * - * @param file - * a CSV file. Must not be null. - * @param format - * the CSVFormat used for CSV parsing. Must not be null. - * @return a new parser - * @throws IllegalArgumentException - * If the parameters of the format are inconsistent or if either file or format are null. - * @throws IOException - * If an I/O error occurs - */ - public static CSVParser parse(final File file, final CSVFormat format) throws IOException { - Assertions.notNull(file, "file"); - Assertions.notNull(format, "format"); - - return new CSVParser(new FileReader(file), format); - } - - /** - * Creates a parser for the given {@link String}. - * - * @param string - * a CSV string. Must not be null. - * @param format - * the CSVFormat used for CSV parsing. Must not be null. - * @return a new parser - * @throws IllegalArgumentException - * If the parameters of the format are inconsistent or if either string or format are null. - * @throws IOException - * If an I/O error occurs - */ - public static CSVParser parse(final String string, final CSVFormat format) throws IOException { - Assertions.notNull(string, "string"); - Assertions.notNull(format, "format"); - - return new CSVParser(new StringReader(string), format); - } - - /** - * Creates a parser for the given URL. - * - * <p> - * If you do not read all records from the given {@code url}, you should call {@link #close()} on the parser, unless - * you close the {@code url}. - * </p> - * - * @param url - * a URL. Must not be null. - * @param charset - * the charset for the resource. Must not be null. - * @param format - * the CSVFormat used for CSV parsing. Must not be null. - * @return a new parser - * @throws IllegalArgumentException - * If the parameters of the format are inconsistent or if either url, charset or format are null. - * @throws IOException - * If an I/O error occurs - */ - public static CSVParser parse(final URL url, final Charset charset, final CSVFormat format) throws IOException { - Assertions.notNull(url, "url"); - Assertions.notNull(charset, "charset"); - Assertions.notNull(format, "format"); - - return new CSVParser(new InputStreamReader(url.openStream(), - charset == null ? Charset.forName("UTF-8") : charset), format); - } - - // the following objects are shared to reduce garbage - - private final CSVFormat format; - - /** A mapping of column names to column indices */ - private final Map<String, Integer> headerMap; - - private final Lexer lexer; - - /** A record buffer for getRecord(). Grows as necessary and is reused. */ - private final List<String> record = new ArrayList<String>(); - - private long recordNumber; - - private final Token reusableToken = new Token(); - - /** - * Customized CSV parser using the given {@link CSVFormat} - * - * <p> - * If you do not read all records from the given {@code reader}, you should call {@link #close()} on the parser, - * unless you close the {@code reader}. - * </p> - * - * @param reader - * a Reader containing CSV-formatted input. Must not be null. - * @param format - * the CSVFormat used for CSV parsing. Must not be null. - * @throws IllegalArgumentException - * If the parameters of the format are inconsistent or if either reader or format are null. - * @throws IOException - * If an I/O error occurs - */ - public CSVParser(final Reader reader, final CSVFormat format) throws IOException { - Assertions.notNull(reader, "reader"); - Assertions.notNull(format, "format"); - - format.validate(); - this.format = format; - this.lexer = new Lexer(format, new ExtendedBufferedReader(reader)); - this.headerMap = this.initializeHeader(); - } - - private void addRecordValue() { - final String input = this.reusableToken.content.toString(); - final String nullString = this.format.getNullString(); - if (nullString == null) { - this.record.add(input); - } else { - this.record.add(input.equalsIgnoreCase(nullString) ? null : input); - } - } - - /** - * Closes resources. - * - * @throws IOException - * If an I/O error occurs - */ - @Override - public void close() throws IOException { - if (this.lexer != null) { - this.lexer.close(); - } - } - - /** - * Returns the current line number in the input stream. - * <p/> - * ATTENTION: If your CSV input has multi-line values, the returned number does not correspond to the record number. - * - * @return current line number - */ - public long getCurrentLineNumber() { - return this.lexer.getCurrentLineNumber(); - } - - /** - * Returns a copy of the header map that iterates in column order. - * <p> - * The map keys are column names. The map values are 0-based indices. - * </p> - * @return a copy of the header map that iterates in column order. - */ - public Map<String, Integer> getHeaderMap() { - return this.headerMap == null ? null : new LinkedHashMap<String, Integer>(this.headerMap); - } - - /** - * Returns the current record number in the input stream. - * <p/> - * ATTENTION: If your CSV input has multi-line values, the returned number does not correspond to the line number. - * - * @return current line number - */ - public long getRecordNumber() { - return this.recordNumber; - } - - /** - * Parses the CSV input according to the given format and returns the content as a list of - * {@link CSVRecord CSVRecords}. - * <p/> - * The returned content starts at the current parse-position in the stream. - * - * @return list of {@link CSVRecord CSVRecords}, may be empty - * @throws IOException - * on parse error or input read-failure - */ - public List<CSVRecord> getRecords() throws IOException { - final List<CSVRecord> records = new ArrayList<CSVRecord>(); - CSVRecord rec; - while ((rec = this.nextRecord()) != null) { - records.add(rec); - } - return records; - } - - /** - * Initializes the name to index mapping if the format defines a header. - * - * @return null if the format has no header. - */ - private Map<String, Integer> initializeHeader() throws IOException { - Map<String, Integer> hdrMap = null; - final String[] formatHeader = this.format.getHeader(); - if (formatHeader != null) { - hdrMap = new LinkedHashMap<String, Integer>(); - - String[] header = null; - if (formatHeader.length == 0) { - // read the header from the first line of the file - final CSVRecord nextRecord = this.nextRecord(); - if (nextRecord != null) { - header = nextRecord.values(); - } - } else { - if (this.format.getSkipHeaderRecord()) { - this.nextRecord(); - } - header = formatHeader; - } - - // build the name to index mappings - if (header != null) { - for (int i = 0; i < header.length; i++) { - hdrMap.put(header[i], Integer.valueOf(i)); - } - } - } - return hdrMap; - } - - public boolean isClosed() { - return this.lexer.isClosed(); - } - - /** - * Returns an iterator on the records. - * - * <p>IOExceptions occurring during the iteration are wrapped in a - * RuntimeException. - * If the parser is closed a call to {@code next()} will throw a - * NoSuchElementException.</p> - */ - @Override - public Iterator<CSVRecord> iterator() { - return new Iterator<CSVRecord>() { - private CSVRecord current; - - private CSVRecord getNextRecord() { - try { - return CSVParser.this.nextRecord(); - } catch (final IOException e) { - // TODO: This is not great, throw an ISE instead? - throw new RuntimeException(e); - } - } - - @Override - public boolean hasNext() { - if (CSVParser.this.isClosed()) { - return false; - } - if (this.current == null) { - this.current = this.getNextRecord(); - } - - return this.current != null; - } - - @Override - public CSVRecord next() { - if (CSVParser.this.isClosed()) { - throw new NoSuchElementException("CSVParser has been closed"); - } - CSVRecord next = this.current; - this.current = null; - - if (next == null) { - // hasNext() wasn't called before - next = this.getNextRecord(); - if (next == null) { - throw new NoSuchElementException("No more CSV records available"); - } - } - - return next; - } - - @Override - public void remove() { - throw new UnsupportedOperationException(); - } - }; - } - - /** - * Parses the next record from the current point in the stream. - * - * @return the record as an array of values, or <tt>null</tt> if the end of the stream has been reached - * @throws IOException - * on parse error or input read-failure - */ - CSVRecord nextRecord() throws IOException { - CSVRecord result = null; - this.record.clear(); - StringBuilder sb = null; - do { - this.reusableToken.reset(); - this.lexer.nextToken(this.reusableToken); - switch (this.reusableToken.type) { - case TOKEN: - this.addRecordValue(); - break; - case EORECORD: - this.addRecordValue(); - break; - case EOF: - if (this.reusableToken.isReady) { - this.addRecordValue(); - } - break; - case INVALID: - throw new IOException("(line " + this.getCurrentLineNumber() + ") invalid parse sequence"); - case COMMENT: // Ignored currently - if (sb == null) { // first comment for this record - sb = new StringBuilder(); - } else { - sb.append(Constants.LF); - } - sb.append(this.reusableToken.content); - this.reusableToken.type = TOKEN; // Read another token - break; - } - } while (this.reusableToken.type == TOKEN); - - if (!this.record.isEmpty()) { - this.recordNumber++; - final String comment = sb == null ? null : sb.toString(); - result = new CSVRecord(this.record.toArray(new String[this.record.size()]), this.headerMap, comment, - this.recordNumber); - } - return result; - } - -} http://git-wip-us.apache.org/repos/asf/phoenix/blob/c3697326/phoenix-core/src/main/java/org/apache/commons/csv/CSVPrinter.java ---------------------------------------------------------------------- diff --git a/phoenix-core/src/main/java/org/apache/commons/csv/CSVPrinter.java b/phoenix-core/src/main/java/org/apache/commons/csv/CSVPrinter.java deleted file mode 100644 index 542c43f..0000000 --- a/phoenix-core/src/main/java/org/apache/commons/csv/CSVPrinter.java +++ /dev/null @@ -1,429 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.commons.csv; - -import static org.apache.commons.csv.Constants.COMMENT; -import static org.apache.commons.csv.Constants.CR; -import static org.apache.commons.csv.Constants.LF; -import static org.apache.commons.csv.Constants.SP; - -import java.io.Closeable; -import java.io.Flushable; -import java.io.IOException; -import java.sql.ResultSet; -import java.sql.SQLException; - -/** - * Prints values in a CSV format. - * - * @version $Id: CSVPrinter.java 1560384 2014-01-22 15:27:35Z ggregory $ - */ -public final class CSVPrinter implements Flushable, Closeable { - - /** The place that the values get written. */ - private final Appendable out; - private final CSVFormat format; - - /** True if we just began a new record. */ - private boolean newRecord = true; - - /** - * Creates a printer that will print values to the given stream following the CSVFormat. - * <p/> - * Currently, only a pure encapsulation format or a pure escaping format is supported. Hybrid formats - * (encapsulation and escaping with a different character) are not supported. - * - * @param out - * stream to which to print. Must not be null. - * @param format - * the CSV format. Must not be null. - * @throws IllegalArgumentException - * thrown if the parameters of the format are inconsistent or if either out or format are null. - */ - public CSVPrinter(final Appendable out, final CSVFormat format) { - Assertions.notNull(out, "out"); - Assertions.notNull(format, "format"); - - this.out = out; - this.format = format; - this.format.validate(); - } - - // ====================================================== - // printing implementation - // ====================================================== - - @Override - public void close() throws IOException { - if (out instanceof Closeable) { - ((Closeable) out).close(); - } - } - - /** - * Flushes the underlying stream. - * - * @throws IOException - * If an I/O error occurs - */ - @Override - public void flush() throws IOException { - if (out instanceof Flushable) { - ((Flushable) out).flush(); - } - } - - /** - * Prints the string as the next value on the line. The value will be escaped or encapsulated as needed. - * - * @param value - * value to be output. - * @throws IOException - * If an I/O error occurs - */ - public void print(final Object value) throws IOException { - // null values are considered empty - String strValue; - if (value == null) { - final String nullString = format.getNullString(); - strValue = nullString == null ? Constants.EMPTY : nullString; - } else { - strValue = value.toString(); - } - this.print(value, strValue, 0, strValue.length()); - } - - private void print(final Object object, final CharSequence value, - final int offset, final int len) throws IOException { - if (!newRecord) { - out.append(format.getDelimiter()); - } - if (format.isQuoting()) { - // the original object is needed so can check for Number - printAndQuote(object, value, offset, len); - } else if (format.isEscaping()) { - printAndEscape(value, offset, len); - } else { - out.append(value, offset, offset + len); - } - newRecord = false; - } - - /* - * Note: must only be called if escaping is enabled, otherwise will generate NPE - */ - private void printAndEscape(final CharSequence value, final int offset, final int len) throws IOException { - int start = offset; - int pos = offset; - final int end = offset + len; - - final char delim = format.getDelimiter(); - final char escape = format.getEscape().charValue(); - - while (pos < end) { - char c = value.charAt(pos); - if (c == CR || c == LF || c == delim || c == escape) { - // write out segment up until this char - if (pos > start) { - out.append(value, start, pos); - } - if (c == LF) { - c = 'n'; - } else if (c == CR) { - c = 'r'; - } - - out.append(escape); - out.append(c); - - start = pos + 1; // start on the current char after this one - } - - pos++; - } - - // write last segment - if (pos > start) { - out.append(value, start, pos); - } - } - - /* - * Note: must only be called if quoting is enabled, otherwise will generate NPE - */ - // the original object is needed so can check for Number - private void printAndQuote(final Object object, final CharSequence value, - final int offset, final int len) throws IOException { - boolean quote = false; - int start = offset; - int pos = offset; - final int end = offset + len; - - final char delimChar = format.getDelimiter(); - final char quoteChar = format.getQuoteChar().charValue(); - - Quote quotePolicy = format.getQuotePolicy(); - if (quotePolicy == null) { - quotePolicy = Quote.MINIMAL; - } - switch (quotePolicy) { - case ALL: - quote = true; - break; - case NON_NUMERIC: - quote = !(object instanceof Number); - break; - case NONE: - // Use the existing escaping code - printAndEscape(value, offset, len); - return; - case MINIMAL: - if (len <= 0) { - // always quote an empty token that is the first - // on the line, as it may be the only thing on the - // line. If it were not quoted in that case, - // an empty line has no tokens. - if (newRecord) { - quote = true; - } - } else { - char c = value.charAt(pos); - - // Hmmm, where did this rule come from? - if (newRecord && (c < '0' || (c > '9' && c < 'A') || (c > 'Z' && c < 'a') || (c > 'z'))) { - quote = true; - // } else if (c == ' ' || c == '\f' || c == '\t') { - } else if (c <= COMMENT) { - // Some other chars at the start of a value caused the parser to fail, so for now - // encapsulate if we start in anything less than '#'. We are being conservative - // by including the default comment char too. - quote = true; - } else { - while (pos < end) { - c = value.charAt(pos); - if (c == LF || c == CR || c == quoteChar || c == delimChar) { - quote = true; - break; - } - pos++; - } - - if (!quote) { - pos = end - 1; - c = value.charAt(pos); - // if (c == ' ' || c == '\f' || c == '\t') { - // Some other chars at the end caused the parser to fail, so for now - // encapsulate if we end in anything less than ' ' - if (c <= SP) { - quote = true; - } - } - } - } - - if (!quote) { - // no encapsulation needed - write out the original value - out.append(value, start, end); - return; - } - break; - } - - if (!quote) { - // no encapsulation needed - write out the original value - out.append(value, start, end); - return; - } - - // we hit something that needed encapsulation - out.append(quoteChar); - - // Pick up where we left off: pos should be positioned on the first character that caused - // the need for encapsulation. - while (pos < end) { - final char c = value.charAt(pos); - if (c == quoteChar) { - // write out the chunk up until this point - - // add 1 to the length to write out the encapsulator also - out.append(value, start, pos + 1); - // put the next starting position on the encapsulator so we will - // write it out again with the next string (effectively doubling it) - start = pos; - } - pos++; - } - - // write the last segment - out.append(value, start, pos); - out.append(quoteChar); - } - - /** - * Prints a comment on a new line among the delimiter separated values. Comments will always begin on a new line - * and occupy a least one full line. The character specified to start comments and a space will be inserted at the - * beginning of each new line in the comment. - * <p/> - * If comments are disabled in the current CSV format this method does nothing. - * - * @param comment - * the comment to output - * @throws IOException - * If an I/O error occurs - */ - public void printComment(final String comment) throws IOException { - if (!format.isCommentingEnabled()) { - return; - } - if (!newRecord) { - println(); - } - out.append(format.getCommentStart().charValue()); - out.append(SP); - for (int i = 0; i < comment.length(); i++) { - final char c = comment.charAt(i); - switch (c) { - case CR: - if (i + 1 < comment.length() && comment.charAt(i + 1) == LF) { - i++; - } - //$FALL-THROUGH$ break intentionally excluded. - case LF: - println(); - out.append(format.getCommentStart().charValue()); - out.append(SP); - break; - default: - out.append(c); - break; - } - } - println(); - } - - /** - * Outputs the record separator. - * - * @throws IOException - * If an I/O error occurs - */ - public void println() throws IOException { - out.append(format.getRecordSeparator()); - newRecord = true; - } - - /** - * Prints a single line of delimiter separated values. The values will be quoted if needed. Quotes and newLine - * characters will be escaped. - * - * @param values - * values to output. - * @throws IOException - * If an I/O error occurs - */ - public void printRecord(final Iterable<?> values) throws IOException { - for (final Object value : values) { - print(value); - } - println(); - } - - /** - * Prints a single line of delimiter separated values. The values will be quoted if needed. Quotes and newLine - * characters will be escaped. - * - * @param values - * values to output. - * @throws IOException - * If an I/O error occurs - */ - public void printRecord(final Object... values) throws IOException { - for (final Object value : values) { - print(value); - } - println(); - } - - /** - * Prints all the objects in the given collection. - * - * @param values - * the values to print. - * @throws IOException - * If an I/O error occurs - */ - public void printRecords(final Iterable<?> values) throws IOException { - for (final Object value : values) { - if (value instanceof Object[]) { - this.printRecord((Object[]) value); - } else if (value instanceof Iterable) { - this.printRecord((Iterable<?>) value); - } else { - this.printRecord(value); - } - } - } - - /** - * Prints all the objects in the given array. - * - * @param values - * the values to print. - * @throws IOException - * If an I/O error occurs - */ - public void printRecords(final Object[] values) throws IOException { - for (final Object value : values) { - if (value instanceof Object[]) { - this.printRecord((Object[]) value); - } else if (value instanceof Iterable) { - this.printRecord((Iterable<?>) value); - } else { - this.printRecord(value); - } - } - } - - /** - * Prints all the objects in the given JDBC result set. - * - * @param resultSet result set - * the values to print. - * @throws IOException - * If an I/O error occurs - * @throws SQLException if a database access error occurs - */ - public void printRecords(final ResultSet resultSet) throws SQLException, IOException { - final int columnCount = resultSet.getMetaData().getColumnCount(); - while (resultSet.next()) { - for (int i = 1; i <= columnCount; i++) { - print(resultSet.getString(i)); - } - println(); - } - } - - /** - * Gets the target Appendable. - * - * @return the target Appendable. - */ - public Appendable getOut() { - return this.out; - } -} http://git-wip-us.apache.org/repos/asf/phoenix/blob/c3697326/phoenix-core/src/main/java/org/apache/commons/csv/CSVRecord.java ---------------------------------------------------------------------- diff --git a/phoenix-core/src/main/java/org/apache/commons/csv/CSVRecord.java b/phoenix-core/src/main/java/org/apache/commons/csv/CSVRecord.java deleted file mode 100644 index 3b265f4..0000000 --- a/phoenix-core/src/main/java/org/apache/commons/csv/CSVRecord.java +++ /dev/null @@ -1,225 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.commons.csv; - -import java.io.Serializable; -import java.util.Arrays; -import java.util.HashMap; -import java.util.Iterator; -import java.util.List; -import java.util.Map; -import java.util.Map.Entry; - -/** - * A CSV record parsed from a CSV file. - * - * @version $Id: CSVRecord.java 1560399 2014-01-22 16:07:23Z ggregory $ - */ -public final class CSVRecord implements Serializable, Iterable<String> { - - private static final String[] EMPTY_STRING_ARRAY = new String[0]; - - private static final long serialVersionUID = 1L; - - /** The accumulated comments (if any) */ - private final String comment; - - /** The column name to index mapping. */ - private final Map<String, Integer> mapping; - - /** The record number. */ - private final long recordNumber; - - /** The values of the record */ - private final String[] values; - - CSVRecord(final String[] values, final Map<String, Integer> mapping, - final String comment, final long recordNumber) { - this.recordNumber = recordNumber; - this.values = values != null ? values : EMPTY_STRING_ARRAY; - this.mapping = mapping; - this.comment = comment; - } - - /** - * Returns a value by {@link Enum}. - * - * @param e - * an enum - * @return the String at the given enum String - */ - public String get(final Enum<?> e) { - return get(e.toString()); - } - - /** - * Returns a value by index. - * - * @param i - * a column index (0-based) - * @return the String at the given index - */ - public String get(final int i) { - return values[i]; - } - - /** - * Returns a value by name. - * - * @param name - * the name of the column to be retrieved. - * @return the column value, maybe null depending on {@link CSVFormat#getNullString()}. - * @throws IllegalStateException - * if no header mapping was provided - * @throws IllegalArgumentException - * if {@code name} is not mapped or if the record is inconsistent - * @see #isConsistent() - * @see CSVFormat#withNullString(String) - */ - public String get(final String name) { - if (mapping == null) { - throw new IllegalStateException( - "No header mapping was specified, the record values can't be accessed by name"); - } - final Integer index = mapping.get(name); - if (index == null) { - throw new IllegalArgumentException(String.format("Mapping for %s not found, expected one of %s", name, - mapping.keySet())); - } - try { - return values[index.intValue()]; - } catch (final ArrayIndexOutOfBoundsException e) { - throw new IllegalArgumentException(String.format( - "Index for header '%s' is %d but CSVRecord only has %d values!", name, index, - Integer.valueOf(values.length))); - } - } - - /** - * Returns the comment for this record, if any. - * - * @return the comment for this record, or null if no comment for this - * record is available. - */ - public String getComment() { - return comment; - } - - /** - * Returns the number of this record in the parsed CSV file. - * - * @return the number of this record. - */ - public long getRecordNumber() { - return recordNumber; - } - - /** - * Returns true if this record is consistent, false if not. Currently, the only check is matching the record size to - * the header size. Some programs can export files that fails this test but still produce parsable files. - * - * @return true of this record is valid, false if not - */ - public boolean isConsistent() { - return mapping == null ? true : mapping.size() == values.length; - } - - /** - * Checks whether a given column is mapped, i.e. its name has been defined to the parser. - * - * @param name - * the name of the column to be retrieved. - * @return whether a given column is mapped. - */ - public boolean isMapped(final String name) { - return mapping != null ? mapping.containsKey(name) : false; - } - - /** - * Checks whether a given columns is mapped and has a value. - * - * @param name - * the name of the column to be retrieved. - * @return whether a given columns is mapped and has a value - */ - public boolean isSet(final String name) { - return isMapped(name) && mapping.get(name).intValue() < values.length; - } - - /** - * Returns an iterator over the values of this record. - * - * @return an iterator over the values of this record. - */ - @Override - public Iterator<String> iterator() { - return toList().iterator(); - } - - /** - * Puts all values of this record into the given Map. - * - * @param map The Map to populate. - * @return the given map. - */ - <M extends Map<String, String>> M putIn(final M map) { - for (final Entry<String, Integer> entry : mapping.entrySet()) { - map.put(entry.getKey(), values[entry.getValue().intValue()]); - } - return map; - } - - /** - * Returns the number of values in this record. - * - * @return the number of values. - */ - public int size() { - return values.length; - } - - /** - * Converts the values to a List. - * - * TODO: Maybe make this public? - * @return a new List - */ - private List<String> toList() { - return Arrays.asList(values); - } - - /** - * Copies this record into a new Map. The new map is not connect - * - * @return A new Map. The map is empty if the record has no headers. - */ - public Map<String, String> toMap() { - return putIn(new HashMap<String, String>(values.length)); - } - - @Override - public String toString() { - return Arrays.toString(values); - } - - String[] values() { - return values; - } - - -} http://git-wip-us.apache.org/repos/asf/phoenix/blob/c3697326/phoenix-core/src/main/java/org/apache/commons/csv/Constants.java ---------------------------------------------------------------------- diff --git a/phoenix-core/src/main/java/org/apache/commons/csv/Constants.java b/phoenix-core/src/main/java/org/apache/commons/csv/Constants.java deleted file mode 100644 index 9817158..0000000 --- a/phoenix-core/src/main/java/org/apache/commons/csv/Constants.java +++ /dev/null @@ -1,68 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.commons.csv; - -/** - * Constants for this package. - * - * @version $Id: Constants.java 1509069 2013-08-01 02:04:27Z ggregory $ - */ -final class Constants { - - static final char BACKSPACE = '\b'; - static final char COMMA = ','; - - /** - * Starts a comment, the remainder of the line is the comment. - */ - static final char COMMENT = '#'; - - static final char CR = '\r'; - static final Character DOUBLE_QUOTE_CHAR = Character.valueOf('"'); - static final char BACKSLASH = '\\'; - static final char FF = '\f'; - static final char LF = '\n'; - static final char SP = ' '; - static final char TAB = '\t'; - static final String EMPTY = ""; - - /** The end of stream symbol */ - static final int END_OF_STREAM = -1; - - /** Undefined state for the lookahead char */ - static final int UNDEFINED = -2; - - /** According to RFC 4180, line breaks are delimited by CRLF */ - static final String CRLF = "\r\n"; - - /** - * Unicode line separator. - */ - static final String LINE_SEPARATOR = "\u2028"; - - /** - * Unicode paragraph separator. - */ - static final String PARAGRAPH_SEPARATOR = "\u2029"; - - /** - * Unicode next line. - */ - static final String NEXT_LINE = "\u0085"; - -} http://git-wip-us.apache.org/repos/asf/phoenix/blob/c3697326/phoenix-core/src/main/java/org/apache/commons/csv/ExtendedBufferedReader.java ---------------------------------------------------------------------- diff --git a/phoenix-core/src/main/java/org/apache/commons/csv/ExtendedBufferedReader.java b/phoenix-core/src/main/java/org/apache/commons/csv/ExtendedBufferedReader.java deleted file mode 100644 index c50d339..0000000 --- a/phoenix-core/src/main/java/org/apache/commons/csv/ExtendedBufferedReader.java +++ /dev/null @@ -1,178 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.commons.csv; - -import static org.apache.commons.csv.Constants.CR; -import static org.apache.commons.csv.Constants.END_OF_STREAM; -import static org.apache.commons.csv.Constants.LF; -import static org.apache.commons.csv.Constants.UNDEFINED; - -import java.io.BufferedReader; -import java.io.IOException; -import java.io.Reader; - -/** - * A special buffered reader which supports sophisticated read access. - * <p> - * In particular the reader supports a look-ahead option, which allows you to see the next char returned by - * {@link #read()}. - * - * @version $Id: ExtendedBufferedReader.java 1512625 2013-08-10 11:07:15Z britter $ - */ -final class ExtendedBufferedReader extends BufferedReader { - - /** The last char returned */ - private int lastChar = UNDEFINED; - - /** The count of EOLs (CR/LF/CRLF) seen so far */ - private long eolCounter = 0; - - private boolean closed; - - /** - * Created extended buffered reader using default buffer-size - */ - ExtendedBufferedReader(final Reader reader) { - super(reader); - } - - @Override - public int read() throws IOException { - final int current = super.read(); - if (current == CR || (current == LF && lastChar != CR)) { - eolCounter++; - } - lastChar = current; - return lastChar; - } - - /** - * Returns the last character that was read as an integer (0 to 65535). This will be the last character returned by - * any of the read methods. This will not include a character read using the {@link #lookAhead()} method. If no - * character has been read then this will return {@link Constants#UNDEFINED}. If the end of the stream was reached - * on the last read then this will return {@link Constants#END_OF_STREAM}. - * - * @return the last character that was read - */ - int getLastChar() { - return lastChar; - } - - @Override - public int read(final char[] buf, final int offset, final int length) throws IOException { - if (length == 0) { - return 0; - } - - final int len = super.read(buf, offset, length); - - if (len > 0) { - - for (int i = offset; i < offset + len; i++) { - final char ch = buf[i]; - if (ch == LF) { - if (CR != (i > 0 ? buf[i - 1] : lastChar)) { - eolCounter++; - } - } else if (ch == CR) { - eolCounter++; - } - } - - lastChar = buf[offset + len - 1]; - - } else if (len == -1) { - lastChar = END_OF_STREAM; - } - - return len; - } - - /** - * Calls {@link BufferedReader#readLine()} which drops the line terminator(s). This method should only be called - * when processing a comment, otherwise information can be lost. - * <p> - * Increments {@link #eolCounter} - * <p> - * Sets {@link #lastChar} to {@link Constants#END_OF_STREAM} at EOF, otherwise to LF - * - * @return the line that was read, or null if reached EOF. - */ - @Override - public String readLine() throws IOException { - final String line = super.readLine(); - - if (line != null) { - lastChar = LF; // needed for detecting start of line - eolCounter++; - } else { - lastChar = END_OF_STREAM; - } - - return line; - } - - /** - * Returns the next character in the current reader without consuming it. So the next call to {@link #read()} will - * still return this value. Does not affect line number or last character. - * - * @return the next character - * - * @throws IOException - * if there is an error in reading - */ - int lookAhead() throws IOException { - super.mark(1); - final int c = super.read(); - super.reset(); - - return c; - } - - /** - * Returns the current line number - * - * @return the current line number - */ - long getCurrentLineNumber() { - // Check if we are at EOL or EOF or just starting - if (lastChar == CR || lastChar == LF || lastChar == UNDEFINED || lastChar == END_OF_STREAM) { - return eolCounter; // counter is accurate - } - return eolCounter + 1; // Allow for counter being incremented only at EOL - } - - public boolean isClosed() { - return closed; - } - - /** - * Closes the stream. - * - * @throws IOException - * If an I/O error occurs - */ - @Override - public void close() throws IOException { - // Set ivars before calling super close() in case close() throws an IOException. - closed = true; - lastChar = END_OF_STREAM; - super.close(); - } - -}