Repository: commons-text Updated Branches: refs/heads/master 995c44b71 -> 6d8b511f2
[TEXT-116] Add a StrTokenizer replacement based on the StringMatcher interface: StringTokenizer. Project: http://git-wip-us.apache.org/repos/asf/commons-text/repo Commit: http://git-wip-us.apache.org/repos/asf/commons-text/commit/6d8b511f Tree: http://git-wip-us.apache.org/repos/asf/commons-text/tree/6d8b511f Diff: http://git-wip-us.apache.org/repos/asf/commons-text/diff/6d8b511f Branch: refs/heads/master Commit: 6d8b511f2081117a3c07a5e54392b1948df79248 Parents: 995c44b Author: Gary Gregory <garydgreg...@gmail.com> Authored: Mon Feb 12 11:47:03 2018 -0700 Committer: Gary Gregory <garydgreg...@gmail.com> Committed: Mon Feb 12 11:47:03 2018 -0700 ---------------------------------------------------------------------- src/changes/changes.xml | 1 + .../org/apache/commons/text/StrTokenizer.java | 2 + .../apache/commons/text/StringTokenizer.java | 1176 ++++++++++++++++++ .../apache/commons/text/TextStringBuilder.java | 14 +- .../text/StrBuilderAppendInsertTest.java | 3 + .../org/apache/commons/text/StrBuilderTest.java | 3 + .../org/apache/commons/text/StrLookupTest.java | 3 + .../org/apache/commons/text/StrMatcherTest.java | 3 + .../apache/commons/text/StrSubstitutorTest.java | 3 + .../apache/commons/text/StrTokenizerTest.java | 3 + .../commons/text/StringTokenizerTest.java | 962 ++++++++++++++ .../commons/text/TextStringBuilderTest.java | 2 +- 12 files changed, 2167 insertions(+), 8 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/commons-text/blob/6d8b511f/src/changes/changes.xml ---------------------------------------------------------------------- diff --git a/src/changes/changes.xml b/src/changes/changes.xml index 59a17cd..584c343 100644 --- a/src/changes/changes.xml +++ b/src/changes/changes.xml @@ -51,6 +51,7 @@ The <action> type attribute can be add,update,fix,remove. <action issue="TEXT-113" type="add" dev="ggregory">Add an interpolator string lookup</action> <action issue="TEXT-114" type="add" dev="ggregory">Add a StrSubstitutor replacement based on interfaces: StringSubstitutor</action> <action issue="TEXT-115" type="add" dev="ggregory">Add a StrBuilder replacement based on the StringMatcher interface: TextStringBuilder</action> + <action issue="TEXT-116" type="add" dev="ggregory">Add a StrTokenizer replacement based on the StringMatcher interface: StringTokenizer</action> </release> <release version="1.2" date="2017-12-12" description="Release 1.2"> http://git-wip-us.apache.org/repos/asf/commons-text/blob/6d8b511f/src/main/java/org/apache/commons/text/StrTokenizer.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/commons/text/StrTokenizer.java b/src/main/java/org/apache/commons/text/StrTokenizer.java index c07ce31..3ae662d 100644 --- a/src/main/java/org/apache/commons/text/StrTokenizer.java +++ b/src/main/java/org/apache/commons/text/StrTokenizer.java @@ -80,7 +80,9 @@ import java.util.NoSuchElementException; * </table> * * @since 1.0 + * @deprecated Use {@link StringTokenizer}. This class will be removed in 2.0. */ +@Deprecated public class StrTokenizer implements ListIterator<String>, Cloneable { /** Comma separated values tokenizer internal variable. */ http://git-wip-us.apache.org/repos/asf/commons-text/blob/6d8b511f/src/main/java/org/apache/commons/text/StringTokenizer.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/commons/text/StringTokenizer.java b/src/main/java/org/apache/commons/text/StringTokenizer.java new file mode 100644 index 0000000..aaea4fb --- /dev/null +++ b/src/main/java/org/apache/commons/text/StringTokenizer.java @@ -0,0 +1,1176 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.text; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; +import java.util.ListIterator; +import java.util.NoSuchElementException; + +import org.apache.commons.text.matcher.StringMatcher; +import org.apache.commons.text.matcher.StringMatcherFactory; + +/** + * Tokenizes a string based on delimiters (separators) and supporting quoting and ignored character concepts. + * <p> + * This class can split a String into many smaller strings. It aims to do a similar job to + * {@link java.util.StringTokenizer StringTokenizer}, however it offers much more control and flexibility including + * implementing the <code>ListIterator</code> interface. By default, it is set up like <code>StringTokenizer</code>. + * <p> + * The input String is split into a number of <i>tokens</i>. Each token is separated from the next String by a + * <i>delimiter</i>. One or more delimiter characters must be specified. + * <p> + * Each token may be surrounded by quotes. The <i>quote</i> matcher specifies the quote character(s). A quote may be + * escaped within a quoted section by duplicating itself. + * <p> + * Between each token and the delimiter are potentially characters that need trimming. The <i>trimmer</i> matcher + * specifies these characters. One usage might be to trim whitespace characters. + * <p> + * At any point outside the quotes there might potentially be invalid characters. The <i>ignored</i> matcher specifies + * these characters to be removed. One usage might be to remove new line characters. + * <p> + * Empty tokens may be removed or returned as null. + * + * <pre> + * "a,b,c" - Three tokens "a","b","c" (comma delimiter) + * " a, b , c " - Three tokens "a","b","c" (default CSV processing trims whitespace) + * "a, ", b ,", c" - Three tokens "a, " , " b ", ", c" (quoted text untouched) + * </pre> + * <p> + * + * This tokenizer has the following properties and options: + * + * <table summary="Tokenizer Properties"> + * <tr> + * <th>Property</th> + * <th>Type</th> + * <th>Default</th> + * </tr> + * <tr> + * <td>delim</td> + * <td>CharSetMatcher</td> + * <td>{ \t\n\r\f}</td> + * </tr> + * <tr> + * <td>quote</td> + * <td>NoneMatcher</td> + * <td>{}</td> + * </tr> + * <tr> + * <td>ignore</td> + * <td>NoneMatcher</td> + * <td>{}</td> + * </tr> + * <tr> + * <td>emptyTokenAsNull</td> + * <td>boolean</td> + * <td>false</td> + * </tr> + * <tr> + * <td>ignoreEmptyTokens</td> + * <td>boolean</td> + * <td>true</td> + * </tr> + * </table> + * + * @since 1.3 + */ +public class StringTokenizer implements ListIterator<String>, Cloneable { + + /** Comma separated values tokenizer internal variable. */ + private static final StringTokenizer CSV_TOKENIZER_PROTOTYPE; + /** Tab separated values tokenizer internal variable. */ + private static final StringTokenizer TSV_TOKENIZER_PROTOTYPE; + static { + CSV_TOKENIZER_PROTOTYPE = new StringTokenizer(); + CSV_TOKENIZER_PROTOTYPE.setDelimiterMatcher(StringMatcherFactory.INSTANCE.commaMatcher()); + CSV_TOKENIZER_PROTOTYPE.setQuoteMatcher(StringMatcherFactory.INSTANCE.doubleQuoteMatcher()); + CSV_TOKENIZER_PROTOTYPE.setIgnoredMatcher(StringMatcherFactory.INSTANCE.noneMatcher()); + CSV_TOKENIZER_PROTOTYPE.setTrimmerMatcher(StringMatcherFactory.INSTANCE.trimMatcher()); + CSV_TOKENIZER_PROTOTYPE.setEmptyTokenAsNull(false); + CSV_TOKENIZER_PROTOTYPE.setIgnoreEmptyTokens(false); + + TSV_TOKENIZER_PROTOTYPE = new StringTokenizer(); + TSV_TOKENIZER_PROTOTYPE.setDelimiterMatcher(StringMatcherFactory.INSTANCE.tabMatcher()); + TSV_TOKENIZER_PROTOTYPE.setQuoteMatcher(StringMatcherFactory.INSTANCE.doubleQuoteMatcher()); + TSV_TOKENIZER_PROTOTYPE.setIgnoredMatcher(StringMatcherFactory.INSTANCE.noneMatcher()); + TSV_TOKENIZER_PROTOTYPE.setTrimmerMatcher(StringMatcherFactory.INSTANCE.trimMatcher()); + TSV_TOKENIZER_PROTOTYPE.setEmptyTokenAsNull(false); + TSV_TOKENIZER_PROTOTYPE.setIgnoreEmptyTokens(false); + } + + /** The text to work on. */ + private char[] chars; + /** The parsed tokens. */ + private String[] tokens; + /** The current iteration position. */ + private int tokenPos; + + /** The delimiter matcher. */ + private StringMatcher delimMatcher = StringMatcherFactory.INSTANCE.splitMatcher(); + /** The quote matcher. */ + private StringMatcher quoteMatcher = StringMatcherFactory.INSTANCE.noneMatcher(); + /** The ignored matcher. */ + private StringMatcher ignoredMatcher = StringMatcherFactory.INSTANCE.noneMatcher(); + /** The trimmer matcher. */ + private StringMatcher trimmerMatcher = StringMatcherFactory.INSTANCE.noneMatcher(); + + /** Whether to return empty tokens as null. */ + private boolean emptyAsNull = false; + /** Whether to ignore empty tokens. */ + private boolean ignoreEmptyTokens = true; + + // ----------------------------------------------------------------------- + + /** + * Returns a clone of <code>CSV_TOKENIZER_PROTOTYPE</code>. + * + * @return a clone of <code>CSV_TOKENIZER_PROTOTYPE</code>. + */ + private static StringTokenizer getCSVClone() { + return (StringTokenizer) CSV_TOKENIZER_PROTOTYPE.clone(); + } + + /** + * Gets a new tokenizer instance which parses Comma Separated Value strings initializing it with the given input. + * The default for CSV processing will be trim whitespace from both ends (which can be overridden with the + * setTrimmer method). + * <p> + * You must call a "reset" method to set the string which you want to parse. + * + * @return a new tokenizer instance which parses Comma Separated Value strings + */ + public static StringTokenizer getCSVInstance() { + return getCSVClone(); + } + + /** + * Gets a new tokenizer instance which parses Comma Separated Value strings initializing it with the given input. + * The default for CSV processing will be trim whitespace from both ends (which can be overridden with the + * setTrimmer method). + * + * @param input + * the text to parse + * @return a new tokenizer instance which parses Comma Separated Value strings + */ + public static StringTokenizer getCSVInstance(final String input) { + final StringTokenizer tok = getCSVClone(); + tok.reset(input); + return tok; + } + + /** + * Gets a new tokenizer instance which parses Comma Separated Value strings initializing it with the given input. + * The default for CSV processing will be trim whitespace from both ends (which can be overridden with the + * setTrimmer method). + * + * @param input + * the text to parse + * @return a new tokenizer instance which parses Comma Separated Value strings + */ + public static StringTokenizer getCSVInstance(final char[] input) { + final StringTokenizer tok = getCSVClone(); + tok.reset(input); + return tok; + } + + /** + * Returns a clone of <code>TSV_TOKENIZER_PROTOTYPE</code>. + * + * @return a clone of <code>TSV_TOKENIZER_PROTOTYPE</code>. + */ + private static StringTokenizer getTSVClone() { + return (StringTokenizer) TSV_TOKENIZER_PROTOTYPE.clone(); + } + + /** + * Gets a new tokenizer instance which parses Tab Separated Value strings. The default for CSV processing will be + * trim whitespace from both ends (which can be overridden with the setTrimmer method). + * <p> + * You must call a "reset" method to set the string which you want to parse. + * + * @return a new tokenizer instance which parses Tab Separated Value strings. + */ + public static StringTokenizer getTSVInstance() { + return getTSVClone(); + } + + /** + * Gets a new tokenizer instance which parses Tab Separated Value strings. The default for CSV processing will be + * trim whitespace from both ends (which can be overridden with the setTrimmer method). + * + * @param input + * the string to parse + * @return a new tokenizer instance which parses Tab Separated Value strings. + */ + public static StringTokenizer getTSVInstance(final String input) { + final StringTokenizer tok = getTSVClone(); + tok.reset(input); + return tok; + } + + /** + * Gets a new tokenizer instance which parses Tab Separated Value strings. The default for CSV processing will be + * trim whitespace from both ends (which can be overridden with the setTrimmer method). + * + * @param input + * the string to parse + * @return a new tokenizer instance which parses Tab Separated Value strings. + */ + public static StringTokenizer getTSVInstance(final char[] input) { + final StringTokenizer tok = getTSVClone(); + tok.reset(input); + return tok; + } + + // ----------------------------------------------------------------------- + /** + * Constructs a tokenizer splitting on space, tab, newline and form feed as per StringTokenizer, but with no text to + * tokenize. + * <p> + * This constructor is normally used with {@link #reset(String)}. + */ + public StringTokenizer() { + super(); + this.chars = null; + } + + /** + * Constructs a tokenizer splitting on space, tab, newline and form feed as per StringTokenizer. + * + * @param input + * the string which is to be parsed + */ + public StringTokenizer(final String input) { + super(); + if (input != null) { + chars = input.toCharArray(); + } else { + chars = null; + } + } + + /** + * Constructs a tokenizer splitting on the specified delimiter character. + * + * @param input + * the string which is to be parsed + * @param delim + * the field delimiter character + */ + public StringTokenizer(final String input, final char delim) { + this(input); + setDelimiterChar(delim); + } + + /** + * Constructs a tokenizer splitting on the specified delimiter string. + * + * @param input + * the string which is to be parsed + * @param delim + * the field delimiter string + */ + public StringTokenizer(final String input, final String delim) { + this(input); + setDelimiterString(delim); + } + + /** + * Constructs a tokenizer splitting using the specified delimiter matcher. + * + * @param input + * the string which is to be parsed + * @param delim + * the field delimiter matcher + */ + public StringTokenizer(final String input, final StringMatcher delim) { + this(input); + setDelimiterMatcher(delim); + } + + /** + * Constructs a tokenizer splitting on the specified delimiter character and handling quotes using the specified + * quote character. + * + * @param input + * the string which is to be parsed + * @param delim + * the field delimiter character + * @param quote + * the field quoted string character + */ + public StringTokenizer(final String input, final char delim, final char quote) { + this(input, delim); + setQuoteChar(quote); + } + + /** + * Constructs a tokenizer splitting using the specified delimiter matcher and handling quotes using the specified + * quote matcher. + * + * @param input + * the string which is to be parsed + * @param delim + * the field delimiter matcher + * @param quote + * the field quoted string matcher + */ + public StringTokenizer(final String input, final StringMatcher delim, final StringMatcher quote) { + this(input, delim); + setQuoteMatcher(quote); + } + + /** + * Constructs a tokenizer splitting on space, tab, newline and form feed as per StringTokenizer. + * + * @param input + * the string which is to be parsed, not cloned + */ + public StringTokenizer(final char[] input) { + super(); + if (input == null) { + this.chars = null; + } else { + this.chars = input.clone(); + } + } + + /** + * Constructs a tokenizer splitting on the specified character. + * + * @param input + * the string which is to be parsed, not cloned + * @param delim + * the field delimiter character + */ + public StringTokenizer(final char[] input, final char delim) { + this(input); + setDelimiterChar(delim); + } + + /** + * Constructs a tokenizer splitting on the specified string. + * + * @param input + * the string which is to be parsed, not cloned + * @param delim + * the field delimiter string + */ + public StringTokenizer(final char[] input, final String delim) { + this(input); + setDelimiterString(delim); + } + + /** + * Constructs a tokenizer splitting using the specified delimiter matcher. + * + * @param input + * the string which is to be parsed, not cloned + * @param delim + * the field delimiter matcher + */ + public StringTokenizer(final char[] input, final StringMatcher delim) { + this(input); + setDelimiterMatcher(delim); + } + + /** + * Constructs a tokenizer splitting on the specified delimiter character and handling quotes using the specified + * quote character. + * + * @param input + * the string which is to be parsed, not cloned + * @param delim + * the field delimiter character + * @param quote + * the field quoted string character + */ + public StringTokenizer(final char[] input, final char delim, final char quote) { + this(input, delim); + setQuoteChar(quote); + } + + /** + * Constructs a tokenizer splitting using the specified delimiter matcher and handling quotes using the specified + * quote matcher. + * + * @param input + * the string which is to be parsed, not cloned + * @param delim + * the field delimiter character + * @param quote + * the field quoted string character + */ + public StringTokenizer(final char[] input, final StringMatcher delim, final StringMatcher quote) { + this(input, delim); + setQuoteMatcher(quote); + } + + // API + // ----------------------------------------------------------------------- + /** + * Gets the number of tokens found in the String. + * + * @return the number of matched tokens + */ + public int size() { + checkTokenized(); + return tokens.length; + } + + /** + * Gets the next token from the String. Equivalent to {@link #next()} except it returns null rather than throwing + * {@link NoSuchElementException} when no tokens remain. + * + * @return the next sequential token, or null when no more tokens are found + */ + public String nextToken() { + if (hasNext()) { + return tokens[tokenPos++]; + } + return null; + } + + /** + * Gets the previous token from the String. + * + * @return the previous sequential token, or null when no more tokens are found + */ + public String previousToken() { + if (hasPrevious()) { + return tokens[--tokenPos]; + } + return null; + } + + /** + * Gets a copy of the full token list as an independent modifiable array. + * + * @return the tokens as a String array + */ + public String[] getTokenArray() { + checkTokenized(); + return tokens.clone(); + } + + /** + * Gets a copy of the full token list as an independent modifiable list. + * + * @return the tokens as a String array + */ + public List<String> getTokenList() { + checkTokenized(); + final List<String> list = new ArrayList<>(tokens.length); + Collections.addAll(list, tokens); + + return list; + } + + /** + * Resets this tokenizer, forgetting all parsing and iteration already completed. + * <p> + * This method allows the same tokenizer to be reused for the same String. + * + * @return this, to enable chaining + */ + public StringTokenizer reset() { + tokenPos = 0; + tokens = null; + return this; + } + + /** + * Reset this tokenizer, giving it a new input string to parse. In this manner you can re-use a tokenizer with the + * same settings on multiple input lines. + * + * @param input + * the new string to tokenize, null sets no text to parse + * @return this, to enable chaining + */ + public StringTokenizer reset(final String input) { + reset(); + if (input != null) { + this.chars = input.toCharArray(); + } else { + this.chars = null; + } + return this; + } + + /** + * Reset this tokenizer, giving it a new input string to parse. In this manner you can re-use a tokenizer with the + * same settings on multiple input lines. + * + * @param input + * the new character array to tokenize, not cloned, null sets no text to parse + * @return this, to enable chaining + */ + public StringTokenizer reset(final char[] input) { + reset(); + if (input != null) { + this.chars = input.clone(); + } else { + this.chars = null; + } + return this; + } + + // ListIterator + // ----------------------------------------------------------------------- + /** + * Checks whether there are any more tokens. + * + * @return true if there are more tokens + */ + @Override + public boolean hasNext() { + checkTokenized(); + return tokenPos < tokens.length; + } + + /** + * Gets the next token. + * + * @return the next String token + * @throws NoSuchElementException + * if there are no more elements + */ + @Override + public String next() { + if (hasNext()) { + return tokens[tokenPos++]; + } + throw new NoSuchElementException(); + } + + /** + * Gets the index of the next token to return. + * + * @return the next token index + */ + @Override + public int nextIndex() { + return tokenPos; + } + + /** + * Checks whether there are any previous tokens that can be iterated to. + * + * @return true if there are previous tokens + */ + @Override + public boolean hasPrevious() { + checkTokenized(); + return tokenPos > 0; + } + + /** + * Gets the token previous to the last returned token. + * + * @return the previous token + */ + @Override + public String previous() { + if (hasPrevious()) { + return tokens[--tokenPos]; + } + throw new NoSuchElementException(); + } + + /** + * Gets the index of the previous token. + * + * @return the previous token index + */ + @Override + public int previousIndex() { + return tokenPos - 1; + } + + /** + * Unsupported ListIterator operation. + * + * @throws UnsupportedOperationException + * always + */ + @Override + public void remove() { + throw new UnsupportedOperationException("remove() is unsupported"); + } + + /** + * Unsupported ListIterator operation. + * + * @param obj + * this parameter ignored. + * @throws UnsupportedOperationException + * always + */ + @Override + public void set(final String obj) { + throw new UnsupportedOperationException("set() is unsupported"); + } + + /** + * Unsupported ListIterator operation. + * + * @param obj + * this parameter ignored. + * @throws UnsupportedOperationException + * always + */ + @Override + public void add(final String obj) { + throw new UnsupportedOperationException("add() is unsupported"); + } + + // Implementation + // ----------------------------------------------------------------------- + /** + * Checks if tokenization has been done, and if not then do it. + */ + private void checkTokenized() { + if (tokens == null) { + if (chars == null) { + // still call tokenize as subclass may do some work + final List<String> split = tokenize(null, 0, 0); + tokens = split.toArray(new String[split.size()]); + } else { + final List<String> split = tokenize(chars, 0, chars.length); + tokens = split.toArray(new String[split.size()]); + } + } + } + + /** + * Internal method to performs the tokenization. + * <p> + * Most users of this class do not need to call this method. This method will be called automatically by other + * (public) methods when required. + * <p> + * This method exists to allow subclasses to add code before or after the tokenization. For example, a subclass + * could alter the character array, offset or count to be parsed, or call the tokenizer multiple times on multiple + * strings. It is also be possible to filter the results. + * <p> + * <code>StrTokenizer</code> will always pass a zero offset and a count equal to the length of the array to this + * method, however a subclass may pass other values, or even an entirely different array. + * + * @param srcChars + * the character array being tokenized, may be null + * @param offset + * the start position within the character array, must be valid + * @param count + * the number of characters to tokenize, must be valid + * @return the modifiable list of String tokens, unmodifiable if null array or zero count + */ + protected List<String> tokenize(final char[] srcChars, final int offset, final int count) { + if (srcChars == null || count == 0) { + return Collections.emptyList(); + } + final TextStringBuilder buf = new TextStringBuilder(); + final List<String> tokenList = new ArrayList<>(); + int pos = offset; + + // loop around the entire buffer + while (pos >= 0 && pos < count) { + // find next token + pos = readNextToken(srcChars, pos, count, buf, tokenList); + + // handle case where end of string is a delimiter + if (pos >= count) { + addToken(tokenList, ""); + } + } + return tokenList; + } + + /** + * Adds a token to a list, paying attention to the parameters we've set. + * + * @param list + * the list to add to + * @param tok + * the token to add + */ + private void addToken(final List<String> list, String tok) { + if (tok == null || tok.length() == 0) { + if (isIgnoreEmptyTokens()) { + return; + } + if (isEmptyTokenAsNull()) { + tok = null; + } + } + list.add(tok); + } + + /** + * Reads character by character through the String to get the next token. + * + * @param srcChars + * the character array being tokenized + * @param start + * the first character of field + * @param len + * the length of the character array being tokenized + * @param workArea + * a temporary work area + * @param tokenList + * the list of parsed tokens + * @return the starting position of the next field (the character immediately after the delimiter), or -1 if end of + * string found + */ + private int readNextToken(final char[] srcChars, int start, final int len, final TextStringBuilder workArea, + final List<String> tokenList) { + // skip all leading whitespace, unless it is the + // field delimiter or the quote character + while (start < len) { + final int removeLen = Math.max(getIgnoredMatcher().isMatch(srcChars, start, start, len), + getTrimmerMatcher().isMatch(srcChars, start, start, len)); + if (removeLen == 0 || getDelimiterMatcher().isMatch(srcChars, start, start, len) > 0 + || getQuoteMatcher().isMatch(srcChars, start, start, len) > 0) { + break; + } + start += removeLen; + } + + // handle reaching end + if (start >= len) { + addToken(tokenList, ""); + return -1; + } + + // handle empty token + final int delimLen = getDelimiterMatcher().isMatch(srcChars, start, start, len); + if (delimLen > 0) { + addToken(tokenList, ""); + return start + delimLen; + } + + // handle found token + final int quoteLen = getQuoteMatcher().isMatch(srcChars, start, start, len); + if (quoteLen > 0) { + return readWithQuotes(srcChars, start + quoteLen, len, workArea, tokenList, start, quoteLen); + } + return readWithQuotes(srcChars, start, len, workArea, tokenList, 0, 0); + } + + /** + * Reads a possibly quoted string token. + * + * @param srcChars + * the character array being tokenized + * @param start + * the first character of field + * @param len + * the length of the character array being tokenized + * @param workArea + * a temporary work area + * @param tokenList + * the list of parsed tokens + * @param quoteStart + * the start position of the matched quote, 0 if no quoting + * @param quoteLen + * the length of the matched quote, 0 if no quoting + * @return the starting position of the next field (the character immediately after the delimiter, or if end of + * string found, then the length of string + */ + private int readWithQuotes(final char[] srcChars, final int start, final int len, final TextStringBuilder workArea, + final List<String> tokenList, final int quoteStart, final int quoteLen) { + // Loop until we've found the end of the quoted + // string or the end of the input + workArea.clear(); + int pos = start; + boolean quoting = quoteLen > 0; + int trimStart = 0; + + while (pos < len) { + // quoting mode can occur several times throughout a string + // we must switch between quoting and non-quoting until we + // encounter a non-quoted delimiter, or end of string + if (quoting) { + // In quoting mode + + // If we've found a quote character, see if it's + // followed by a second quote. If so, then we need + // to actually put the quote character into the token + // rather than end the token. + if (isQuote(srcChars, pos, len, quoteStart, quoteLen)) { + if (isQuote(srcChars, pos + quoteLen, len, quoteStart, quoteLen)) { + // matched pair of quotes, thus an escaped quote + workArea.append(srcChars, pos, quoteLen); + pos += quoteLen * 2; + trimStart = workArea.size(); + continue; + } + + // end of quoting + quoting = false; + pos += quoteLen; + continue; + } + + // copy regular character from inside quotes + workArea.append(srcChars[pos++]); + trimStart = workArea.size(); + + } else { + // Not in quoting mode + + // check for delimiter, and thus end of token + final int delimLen = getDelimiterMatcher().isMatch(srcChars, pos, start, len); + if (delimLen > 0) { + // return condition when end of token found + addToken(tokenList, workArea.substring(0, trimStart)); + return pos + delimLen; + } + + // check for quote, and thus back into quoting mode + if (quoteLen > 0 && isQuote(srcChars, pos, len, quoteStart, quoteLen)) { + quoting = true; + pos += quoteLen; + continue; + } + + // check for ignored (outside quotes), and ignore + final int ignoredLen = getIgnoredMatcher().isMatch(srcChars, pos, start, len); + if (ignoredLen > 0) { + pos += ignoredLen; + continue; + } + + // check for trimmed character + // don't yet know if its at the end, so copy to workArea + // use trimStart to keep track of trim at the end + final int trimmedLen = getTrimmerMatcher().isMatch(srcChars, pos, start, len); + if (trimmedLen > 0) { + workArea.append(srcChars, pos, trimmedLen); + pos += trimmedLen; + continue; + } + + // copy regular character from outside quotes + workArea.append(srcChars[pos++]); + trimStart = workArea.size(); + } + } + + // return condition when end of string found + addToken(tokenList, workArea.substring(0, trimStart)); + return -1; + } + + /** + * Checks if the characters at the index specified match the quote already matched in readNextToken(). + * + * @param srcChars + * the character array being tokenized + * @param pos + * the position to check for a quote + * @param len + * the length of the character array being tokenized + * @param quoteStart + * the start position of the matched quote, 0 if no quoting + * @param quoteLen + * the length of the matched quote, 0 if no quoting + * @return true if a quote is matched + */ + private boolean isQuote(final char[] srcChars, final int pos, final int len, final int quoteStart, + final int quoteLen) { + for (int i = 0; i < quoteLen; i++) { + if (pos + i >= len || srcChars[pos + i] != srcChars[quoteStart + i]) { + return false; + } + } + return true; + } + + // Delimiter + // ----------------------------------------------------------------------- + /** + * Gets the field delimiter matcher. + * + * @return the delimiter matcher in use + */ + public StringMatcher getDelimiterMatcher() { + return this.delimMatcher; + } + + /** + * Sets the field delimiter matcher. + * <p> + * The delimiter is used to separate one token from another. + * + * @param delim + * the delimiter matcher to use + * @return this, to enable chaining + */ + public StringTokenizer setDelimiterMatcher(final StringMatcher delim) { + if (delim == null) { + this.delimMatcher = StringMatcherFactory.INSTANCE.noneMatcher(); + } else { + this.delimMatcher = delim; + } + return this; + } + + /** + * Sets the field delimiter character. + * + * @param delim + * the delimiter character to use + * @return this, to enable chaining + */ + public StringTokenizer setDelimiterChar(final char delim) { + return setDelimiterMatcher(StringMatcherFactory.INSTANCE.charMatcher(delim)); + } + + /** + * Sets the field delimiter string. + * + * @param delim + * the delimiter string to use + * @return this, to enable chaining + */ + public StringTokenizer setDelimiterString(final String delim) { + return setDelimiterMatcher(StringMatcherFactory.INSTANCE.stringMatcher(delim)); + } + + // Quote + // ----------------------------------------------------------------------- + /** + * Gets the quote matcher currently in use. + * <p> + * The quote character is used to wrap data between the tokens. This enables delimiters to be entered as data. The + * default value is '"' (double quote). + * + * @return the quote matcher in use + */ + public StringMatcher getQuoteMatcher() { + return quoteMatcher; + } + + /** + * Set the quote matcher to use. + * <p> + * The quote character is used to wrap data between the tokens. This enables delimiters to be entered as data. + * + * @param quote + * the quote matcher to use, null ignored + * @return this, to enable chaining + */ + public StringTokenizer setQuoteMatcher(final StringMatcher quote) { + if (quote != null) { + this.quoteMatcher = quote; + } + return this; + } + + /** + * Sets the quote character to use. + * <p> + * The quote character is used to wrap data between the tokens. This enables delimiters to be entered as data. + * + * @param quote + * the quote character to use + * @return this, to enable chaining + */ + public StringTokenizer setQuoteChar(final char quote) { + return setQuoteMatcher(StringMatcherFactory.INSTANCE.charMatcher(quote)); + } + + // Ignored + // ----------------------------------------------------------------------- + /** + * Gets the ignored character matcher. + * <p> + * These characters are ignored when parsing the String, unless they are within a quoted region. The default value + * is not to ignore anything. + * + * @return the ignored matcher in use + */ + public StringMatcher getIgnoredMatcher() { + return ignoredMatcher; + } + + /** + * Set the matcher for characters to ignore. + * <p> + * These characters are ignored when parsing the String, unless they are within a quoted region. + * + * @param ignored + * the ignored matcher to use, null ignored + * @return this, to enable chaining + */ + public StringTokenizer setIgnoredMatcher(final StringMatcher ignored) { + if (ignored != null) { + this.ignoredMatcher = ignored; + } + return this; + } + + /** + * Set the character to ignore. + * <p> + * This character is ignored when parsing the String, unless it is within a quoted region. + * + * @param ignored + * the ignored character to use + * @return this, to enable chaining + */ + public StringTokenizer setIgnoredChar(final char ignored) { + return setIgnoredMatcher(StringMatcherFactory.INSTANCE.charMatcher(ignored)); + } + + // Trimmer + // ----------------------------------------------------------------------- + /** + * Gets the trimmer character matcher. + * <p> + * These characters are trimmed off on each side of the delimiter until the token or quote is found. The default + * value is not to trim anything. + * + * @return the trimmer matcher in use + */ + public StringMatcher getTrimmerMatcher() { + return trimmerMatcher; + } + + /** + * Sets the matcher for characters to trim. + * <p> + * These characters are trimmed off on each side of the delimiter until the token or quote is found. + * + * @param trimmer + * the trimmer matcher to use, null ignored + * @return this, to enable chaining + */ + public StringTokenizer setTrimmerMatcher(final StringMatcher trimmer) { + if (trimmer != null) { + this.trimmerMatcher = trimmer; + } + return this; + } + + // ----------------------------------------------------------------------- + /** + * Gets whether the tokenizer currently returns empty tokens as null. The default for this property is false. + * + * @return true if empty tokens are returned as null + */ + public boolean isEmptyTokenAsNull() { + return this.emptyAsNull; + } + + /** + * Sets whether the tokenizer should return empty tokens as null. The default for this property is false. + * + * @param emptyAsNull + * whether empty tokens are returned as null + * @return this, to enable chaining + */ + public StringTokenizer setEmptyTokenAsNull(final boolean emptyAsNull) { + this.emptyAsNull = emptyAsNull; + return this; + } + + // ----------------------------------------------------------------------- + /** + * Gets whether the tokenizer currently ignores empty tokens. The default for this property is true. + * + * @return true if empty tokens are not returned + */ + public boolean isIgnoreEmptyTokens() { + return ignoreEmptyTokens; + } + + /** + * Sets whether the tokenizer should ignore and not return empty tokens. The default for this property is true. + * + * @param ignoreEmptyTokens + * whether empty tokens are not returned + * @return this, to enable chaining + */ + public StringTokenizer setIgnoreEmptyTokens(final boolean ignoreEmptyTokens) { + this.ignoreEmptyTokens = ignoreEmptyTokens; + return this; + } + + // ----------------------------------------------------------------------- + /** + * Gets the String content that the tokenizer is parsing. + * + * @return the string content being parsed + */ + public String getContent() { + if (chars == null) { + return null; + } + return new String(chars); + } + + // ----------------------------------------------------------------------- + /** + * Creates a new instance of this Tokenizer. The new instance is reset so that it will be at the start of the token + * list. If a {@link CloneNotSupportedException} is caught, return <code>null</code>. + * + * @return a new instance of this Tokenizer which has been reset. + */ + @Override + public Object clone() { + try { + return cloneReset(); + } catch (final CloneNotSupportedException ex) { + return null; + } + } + + /** + * Creates a new instance of this Tokenizer. The new instance is reset so that it will be at the start of the token + * list. + * + * @return a new instance of this Tokenizer which has been reset. + * @throws CloneNotSupportedException + * if there is a problem cloning + */ + Object cloneReset() throws CloneNotSupportedException { + // this method exists to enable 100% test coverage + final StringTokenizer cloned = (StringTokenizer) super.clone(); + if (cloned.chars != null) { + cloned.chars = cloned.chars.clone(); + } + cloned.reset(); + return cloned; + } + + // ----------------------------------------------------------------------- + /** + * Gets the String content that the tokenizer is parsing. + * + * @return the string content being parsed + */ + @Override + public String toString() { + if (tokens == null) { + return "StringTokenizer[not tokenized yet]"; + } + return "StringTokenizer" + getTokenList(); + } + +} http://git-wip-us.apache.org/repos/asf/commons-text/blob/6d8b511f/src/main/java/org/apache/commons/text/TextStringBuilder.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/commons/text/TextStringBuilder.java b/src/main/java/org/apache/commons/text/TextStringBuilder.java index 8ab9322..8943d03 100644 --- a/src/main/java/org/apache/commons/text/TextStringBuilder.java +++ b/src/main/java/org/apache/commons/text/TextStringBuilder.java @@ -2779,7 +2779,7 @@ public class TextStringBuilder implements CharSequence, Appendable, Serializable * <p> * The returned tokenizer is linked to this builder. You may intermix calls to the builder and tokenizer within * certain limits, however there is no synchronization. Once the tokenizer has been used once, it must be - * {@link StrTokenizer#reset() reset} to pickup the latest changes in the builder. For example: + * {@link StringTokenizer#reset() reset} to pickup the latest changes in the builder. For example: * * <pre> * StrBuilder b = new StrBuilder(); @@ -2795,13 +2795,13 @@ public class TextStringBuilder implements CharSequence, Appendable, Serializable * In addition to simply intermixing appends and tokenization, you can also call the set methods on the tokenizer to * alter how it tokenizes. Just remember to call reset when you want to pickup builder changes. * <p> - * Calling {@link StrTokenizer#reset(String)} or {@link StrTokenizer#reset(char[])} with a non-null value will break - * the link with the builder. + * Calling {@link StringTokenizer#reset(String)} or {@link StringTokenizer#reset(char[])} with a non-null value will + * break the link with the builder. * * @return a tokenizer that is linked to this builder */ - public StrTokenizer asTokenizer() { - return new StrBuilderTokenizer(); + public StringTokenizer asTokenizer() { + return new TextStringBuilderTokenizer(); } // ----------------------------------------------------------------------- @@ -3038,12 +3038,12 @@ public class TextStringBuilder implements CharSequence, Appendable, Serializable /** * Inner class to allow StrBuilder to operate as a tokenizer. */ - class StrBuilderTokenizer extends StrTokenizer { + class TextStringBuilderTokenizer extends StringTokenizer { /** * Default constructor. */ - StrBuilderTokenizer() { + TextStringBuilderTokenizer() { super(); } http://git-wip-us.apache.org/repos/asf/commons-text/blob/6d8b511f/src/test/java/org/apache/commons/text/StrBuilderAppendInsertTest.java ---------------------------------------------------------------------- diff --git a/src/test/java/org/apache/commons/text/StrBuilderAppendInsertTest.java b/src/test/java/org/apache/commons/text/StrBuilderAppendInsertTest.java index 3078808..db93410 100644 --- a/src/test/java/org/apache/commons/text/StrBuilderAppendInsertTest.java +++ b/src/test/java/org/apache/commons/text/StrBuilderAppendInsertTest.java @@ -30,7 +30,10 @@ import org.junit.Test; /** * Unit tests for {@link StrBuilder}. + * + * @deprecated This class will be removed in 2.0. */ +@Deprecated public class StrBuilderAppendInsertTest { /** The system line separator. */ http://git-wip-us.apache.org/repos/asf/commons-text/blob/6d8b511f/src/test/java/org/apache/commons/text/StrBuilderTest.java ---------------------------------------------------------------------- diff --git a/src/test/java/org/apache/commons/text/StrBuilderTest.java b/src/test/java/org/apache/commons/text/StrBuilderTest.java index 1cc5752..528588a 100644 --- a/src/test/java/org/apache/commons/text/StrBuilderTest.java +++ b/src/test/java/org/apache/commons/text/StrBuilderTest.java @@ -40,7 +40,10 @@ import org.junit.Test; /** * Unit tests for {@link StrBuilder}. + * + * @deprecated This class will be removed in 2.0. */ +@Deprecated public class StrBuilderTest { // ----------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/commons-text/blob/6d8b511f/src/test/java/org/apache/commons/text/StrLookupTest.java ---------------------------------------------------------------------- diff --git a/src/test/java/org/apache/commons/text/StrLookupTest.java b/src/test/java/org/apache/commons/text/StrLookupTest.java index fce36d2..62330e7 100644 --- a/src/test/java/org/apache/commons/text/StrLookupTest.java +++ b/src/test/java/org/apache/commons/text/StrLookupTest.java @@ -29,7 +29,10 @@ import org.junit.Test; /** * Test class for {@link StrLookup}. + * + * @deprecated This class will be removed in 2.0. */ +@Deprecated public class StrLookupTest { //----------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/commons-text/blob/6d8b511f/src/test/java/org/apache/commons/text/StrMatcherTest.java ---------------------------------------------------------------------- diff --git a/src/test/java/org/apache/commons/text/StrMatcherTest.java b/src/test/java/org/apache/commons/text/StrMatcherTest.java index cac9670..22278f8 100644 --- a/src/test/java/org/apache/commons/text/StrMatcherTest.java +++ b/src/test/java/org/apache/commons/text/StrMatcherTest.java @@ -22,7 +22,10 @@ import org.junit.Test; /** * Unit tests for {@link StrMatcher}. + * + * @deprecated This class will be removed in 2.0. */ +@Deprecated public class StrMatcherTest { private static final char[] BUFFER1 = "0,1\t2 3\n\r\f\u0000'\"".toCharArray(); http://git-wip-us.apache.org/repos/asf/commons-text/blob/6d8b511f/src/test/java/org/apache/commons/text/StrSubstitutorTest.java ---------------------------------------------------------------------- diff --git a/src/test/java/org/apache/commons/text/StrSubstitutorTest.java b/src/test/java/org/apache/commons/text/StrSubstitutorTest.java index cbd95c3..04f95bb 100644 --- a/src/test/java/org/apache/commons/text/StrSubstitutorTest.java +++ b/src/test/java/org/apache/commons/text/StrSubstitutorTest.java @@ -35,7 +35,10 @@ import org.junit.Test; /** * Test class for {@link StrSubstitutor}. + * + * @deprecated This class will be removed in 2.0. */ +@Deprecated public class StrSubstitutorTest { private Map<String, String> values; http://git-wip-us.apache.org/repos/asf/commons-text/blob/6d8b511f/src/test/java/org/apache/commons/text/StrTokenizerTest.java ---------------------------------------------------------------------- diff --git a/src/test/java/org/apache/commons/text/StrTokenizerTest.java b/src/test/java/org/apache/commons/text/StrTokenizerTest.java index 63a6ec0..35b9cd8 100644 --- a/src/test/java/org/apache/commons/text/StrTokenizerTest.java +++ b/src/test/java/org/apache/commons/text/StrTokenizerTest.java @@ -32,7 +32,10 @@ import org.junit.Test; /** * Unit test for {@link StrTokenizer}. + * + * @deprecated This class will be removed in 2.0. */ +@Deprecated public class StrTokenizerTest { private static final String CSV_SIMPLE_FIXTURE = "A,b,c"; http://git-wip-us.apache.org/repos/asf/commons-text/blob/6d8b511f/src/test/java/org/apache/commons/text/StringTokenizerTest.java ---------------------------------------------------------------------- diff --git a/src/test/java/org/apache/commons/text/StringTokenizerTest.java b/src/test/java/org/apache/commons/text/StringTokenizerTest.java new file mode 100644 index 0000000..79a61db --- /dev/null +++ b/src/test/java/org/apache/commons/text/StringTokenizerTest.java @@ -0,0 +1,962 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.commons.text; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertNull; +import static org.junit.Assert.assertTrue; +import static org.junit.Assert.fail; + +import java.util.Arrays; +import java.util.Collections; +import java.util.List; +import java.util.NoSuchElementException; + +import org.apache.commons.text.matcher.StringMatcher; +import org.apache.commons.text.matcher.StringMatcherFactory; +import org.junit.Test; + +/** + * Unit test for {@link StringTokenizer}. + */ +public class StringTokenizerTest { + + private static final String CSV_SIMPLE_FIXTURE = "A,b,c"; + + private static final String TSV_SIMPLE_FIXTURE = "A\tb\tc"; + + private void checkClone(final StringTokenizer tokenizer) { + assertFalse(StringTokenizer.getCSVInstance() == tokenizer); + assertFalse(StringTokenizer.getTSVInstance() == tokenizer); + } + + // ----------------------------------------------------------------------- + @Test + public void test1() { + + final String input = "a;b;c;\"d;\"\"e\";f; ; ; "; + final StringTokenizer tok = new StringTokenizer(input); + tok.setDelimiterChar(';'); + tok.setQuoteChar('"'); + tok.setIgnoredMatcher(StringMatcherFactory.INSTANCE.trimMatcher()); + tok.setIgnoreEmptyTokens(false); + final String[] tokens = tok.getTokenArray(); + + final String[] expected = { "a", "b", "c", "d;\"e", "f", "", "", "" }; + + assertEquals(Arrays.toString(tokens), expected.length, tokens.length); + for (int i = 0; i < expected.length; i++) { + assertEquals("token[" + i + "] was '" + tokens[i] + "' but was expected to be '" + expected[i] + "'", + expected[i], tokens[i]); + } + + } + + @Test + public void test2() { + + final String input = "a;b;c ;\"d;\"\"e\";f; ; ;"; + final StringTokenizer tok = new StringTokenizer(input); + tok.setDelimiterChar(';'); + tok.setQuoteChar('"'); + tok.setIgnoredMatcher(StringMatcherFactory.INSTANCE.noneMatcher()); + tok.setIgnoreEmptyTokens(false); + final String[] tokens = tok.getTokenArray(); + + final String[] expected = { "a", "b", "c ", "d;\"e", "f", " ", " ", "" }; + + assertEquals(Arrays.toString(tokens), expected.length, tokens.length); + for (int i = 0; i < expected.length; i++) { + assertEquals("token[" + i + "] was '" + tokens[i] + "' but was expected to be '" + expected[i] + "'", + expected[i], tokens[i]); + } + + } + + @Test + public void test3() { + + final String input = "a;b; c;\"d;\"\"e\";f; ; ;"; + final StringTokenizer tok = new StringTokenizer(input); + tok.setDelimiterChar(';'); + tok.setQuoteChar('"'); + tok.setIgnoredMatcher(StringMatcherFactory.INSTANCE.noneMatcher()); + tok.setIgnoreEmptyTokens(false); + final String[] tokens = tok.getTokenArray(); + + final String[] expected = { "a", "b", " c", "d;\"e", "f", " ", " ", "" }; + + assertEquals(Arrays.toString(tokens), expected.length, tokens.length); + for (int i = 0; i < expected.length; i++) { + assertEquals("token[" + i + "] was '" + tokens[i] + "' but was expected to be '" + expected[i] + "'", + expected[i], tokens[i]); + } + + } + + @Test + public void test4() { + + final String input = "a;b; c;\"d;\"\"e\";f; ; ;"; + final StringTokenizer tok = new StringTokenizer(input); + tok.setDelimiterChar(';'); + tok.setQuoteChar('"'); + tok.setIgnoredMatcher(StringMatcherFactory.INSTANCE.trimMatcher()); + tok.setIgnoreEmptyTokens(true); + final String[] tokens = tok.getTokenArray(); + + final String[] expected = { "a", "b", "c", "d;\"e", "f" }; + + assertEquals(Arrays.toString(tokens), expected.length, tokens.length); + for (int i = 0; i < expected.length; i++) { + assertEquals("token[" + i + "] was '" + tokens[i] + "' but was expected to be '" + expected[i] + "'", + expected[i], tokens[i]); + } + + } + + @Test + public void test5() { + + final String input = "a;b; c;\"d;\"\"e\";f; ; ;"; + final StringTokenizer tok = new StringTokenizer(input); + tok.setDelimiterChar(';'); + tok.setQuoteChar('"'); + tok.setIgnoredMatcher(StringMatcherFactory.INSTANCE.trimMatcher()); + tok.setIgnoreEmptyTokens(false); + tok.setEmptyTokenAsNull(true); + final String[] tokens = tok.getTokenArray(); + + final String[] expected = { "a", "b", "c", "d;\"e", "f", null, null, null }; + + assertEquals(Arrays.toString(tokens), expected.length, tokens.length); + for (int i = 0; i < expected.length; i++) { + assertEquals("token[" + i + "] was '" + tokens[i] + "' but was expected to be '" + expected[i] + "'", + expected[i], tokens[i]); + } + + } + + @Test + public void test6() { + + final String input = "a;b; c;\"d;\"\"e\";f; ; ;"; + final StringTokenizer tok = new StringTokenizer(input); + tok.setDelimiterChar(';'); + tok.setQuoteChar('"'); + tok.setIgnoredMatcher(StringMatcherFactory.INSTANCE.trimMatcher()); + tok.setIgnoreEmptyTokens(false); + // tok.setTreatingEmptyAsNull(true); + final String[] tokens = tok.getTokenArray(); + + final String[] expected = { "a", "b", " c", "d;\"e", "f", null, null, null }; + + int nextCount = 0; + while (tok.hasNext()) { + tok.next(); + nextCount++; + } + + int prevCount = 0; + while (tok.hasPrevious()) { + tok.previous(); + prevCount++; + } + + assertEquals(Arrays.toString(tokens), expected.length, tokens.length); + + assertTrue("could not cycle through entire token list" + " using the 'hasNext' and 'next' methods", + nextCount == expected.length); + + assertTrue("could not cycle through entire token list" + " using the 'hasPrevious' and 'previous' methods", + prevCount == expected.length); + + } + + @Test + public void test7() { + + final String input = "a b c \"d e\" f "; + final StringTokenizer tok = new StringTokenizer(input); + tok.setDelimiterMatcher(StringMatcherFactory.INSTANCE.spaceMatcher()); + tok.setQuoteMatcher(StringMatcherFactory.INSTANCE.doubleQuoteMatcher()); + tok.setIgnoredMatcher(StringMatcherFactory.INSTANCE.noneMatcher()); + tok.setIgnoreEmptyTokens(false); + final String[] tokens = tok.getTokenArray(); + + final String[] expected = { "a", "", "", "b", "c", "d e", "f", "" }; + + assertEquals(Arrays.toString(tokens), expected.length, tokens.length); + for (int i = 0; i < expected.length; i++) { + assertEquals("token[" + i + "] was '" + tokens[i] + "' but was expected to be '" + expected[i] + "'", + expected[i], tokens[i]); + } + + } + + @Test + public void test8() { + + final String input = "a b c \"d e\" f "; + final StringTokenizer tok = new StringTokenizer(input); + tok.setDelimiterMatcher(StringMatcherFactory.INSTANCE.spaceMatcher()); + tok.setQuoteMatcher(StringMatcherFactory.INSTANCE.doubleQuoteMatcher()); + tok.setIgnoredMatcher(StringMatcherFactory.INSTANCE.noneMatcher()); + tok.setIgnoreEmptyTokens(true); + final String[] tokens = tok.getTokenArray(); + + final String[] expected = { "a", "b", "c", "d e", "f" }; + + assertEquals(Arrays.toString(tokens), expected.length, tokens.length); + for (int i = 0; i < expected.length; i++) { + assertEquals("token[" + i + "] was '" + tokens[i] + "' but was expected to be '" + expected[i] + "'", + expected[i], tokens[i]); + } + + } + + @Test + public void testBasic1() { + final String input = "a b c"; + final StringTokenizer tok = new StringTokenizer(input); + assertEquals("a", tok.next()); + assertEquals("b", tok.next()); + assertEquals("c", tok.next()); + assertFalse(tok.hasNext()); + } + + @Test + public void testBasic2() { + final String input = "a \nb\fc"; + final StringTokenizer tok = new StringTokenizer(input); + assertEquals("a", tok.next()); + assertEquals("b", tok.next()); + assertEquals("c", tok.next()); + assertFalse(tok.hasNext()); + } + + @Test + public void testBasic3() { + final String input = "a \nb\u0001\fc"; + final StringTokenizer tok = new StringTokenizer(input); + assertEquals("a", tok.next()); + assertEquals("b\u0001", tok.next()); + assertEquals("c", tok.next()); + assertFalse(tok.hasNext()); + } + + @Test + public void testBasic4() { + final String input = "a \"b\" c"; + final StringTokenizer tok = new StringTokenizer(input); + assertEquals("a", tok.next()); + assertEquals("\"b\"", tok.next()); + assertEquals("c", tok.next()); + assertFalse(tok.hasNext()); + } + + @Test + public void testBasic5() { + final String input = "a:b':c"; + final StringTokenizer tok = new StringTokenizer(input, ':', '\''); + assertEquals("a", tok.next()); + assertEquals("b'", tok.next()); + assertEquals("c", tok.next()); + assertFalse(tok.hasNext()); + } + + @Test + public void testBasicDelim1() { + final String input = "a:b:c"; + final StringTokenizer tok = new StringTokenizer(input, ':'); + assertEquals("a", tok.next()); + assertEquals("b", tok.next()); + assertEquals("c", tok.next()); + assertFalse(tok.hasNext()); + } + + @Test + public void testBasicDelim2() { + final String input = "a:b:c"; + final StringTokenizer tok = new StringTokenizer(input, ','); + assertEquals("a:b:c", tok.next()); + assertFalse(tok.hasNext()); + } + + @Test + public void testDelimString() { + final String input = "a##b##c"; + final StringTokenizer tok = new StringTokenizer(input, "##"); + + assertEquals("a", tok.next()); + assertEquals("b", tok.next()); + assertEquals("c", tok.next()); + assertFalse(tok.hasNext()); + } + + @Test + public void testDelimMatcher() { + final String input = "a/b\\c"; + final StringMatcher delimMatcher = StringMatcherFactory.INSTANCE.charSetMatcher(new char[] { '/', '\\' }); + + final StringTokenizer tok = new StringTokenizer(input, delimMatcher); + assertEquals("a", tok.next()); + assertEquals("b", tok.next()); + assertEquals("c", tok.next()); + assertFalse(tok.hasNext()); + } + + @Test + public void testDelimMatcherQuoteMatcher() { + final String input = "`a`;`b`;`c`"; + final StringMatcher delimMatcher = StringMatcherFactory.INSTANCE.charSetMatcher(new char[] { ';' }); + final StringMatcher quoteMatcher = StringMatcherFactory.INSTANCE.charSetMatcher(new char[] { '`' }); + + final StringTokenizer tok = new StringTokenizer(input, delimMatcher, quoteMatcher); + assertEquals("a", tok.next()); + assertEquals("b", tok.next()); + assertEquals("c", tok.next()); + assertFalse(tok.hasNext()); + } + + @Test + public void testBasicEmpty1() { + final String input = "a b c"; + final StringTokenizer tok = new StringTokenizer(input); + tok.setIgnoreEmptyTokens(false); + assertEquals("a", tok.next()); + assertEquals("", tok.next()); + assertEquals("b", tok.next()); + assertEquals("c", tok.next()); + assertFalse(tok.hasNext()); + } + + @Test + public void testBasicEmpty2() { + final String input = "a b c"; + final StringTokenizer tok = new StringTokenizer(input); + tok.setIgnoreEmptyTokens(false); + tok.setEmptyTokenAsNull(true); + assertEquals("a", tok.next()); + assertNull(tok.next()); + assertEquals("b", tok.next()); + assertEquals("c", tok.next()); + assertFalse(tok.hasNext()); + } + + @Test + public void testBasicQuoted1() { + final String input = "a 'b' c"; + final StringTokenizer tok = new StringTokenizer(input, ' ', '\''); + assertEquals("a", tok.next()); + assertEquals("b", tok.next()); + assertEquals("c", tok.next()); + assertFalse(tok.hasNext()); + } + + @Test + public void testBasicQuoted2() { + final String input = "a:'b':"; + final StringTokenizer tok = new StringTokenizer(input, ':', '\''); + tok.setIgnoreEmptyTokens(false); + tok.setEmptyTokenAsNull(true); + assertEquals("a", tok.next()); + assertEquals("b", tok.next()); + assertNull(tok.next()); + assertFalse(tok.hasNext()); + } + + @Test + public void testBasicQuoted3() { + final String input = "a:'b''c'"; + final StringTokenizer tok = new StringTokenizer(input, ':', '\''); + tok.setIgnoreEmptyTokens(false); + tok.setEmptyTokenAsNull(true); + assertEquals("a", tok.next()); + assertEquals("b'c", tok.next()); + assertFalse(tok.hasNext()); + } + + @Test + public void testBasicQuoted4() { + final String input = "a: 'b' 'c' :d"; + final StringTokenizer tok = new StringTokenizer(input, ':', '\''); + tok.setTrimmerMatcher(StringMatcherFactory.INSTANCE.trimMatcher()); + tok.setIgnoreEmptyTokens(false); + tok.setEmptyTokenAsNull(true); + assertEquals("a", tok.next()); + assertEquals("b c", tok.next()); + assertEquals("d", tok.next()); + assertFalse(tok.hasNext()); + } + + @Test + public void testBasicQuoted5() { + final String input = "a: 'b'x'c' :d"; + final StringTokenizer tok = new StringTokenizer(input, ':', '\''); + tok.setTrimmerMatcher(StringMatcherFactory.INSTANCE.trimMatcher()); + tok.setIgnoreEmptyTokens(false); + tok.setEmptyTokenAsNull(true); + assertEquals("a", tok.next()); + assertEquals("bxc", tok.next()); + assertEquals("d", tok.next()); + assertFalse(tok.hasNext()); + } + + @Test + public void testBasicQuoted6() { + final String input = "a:'b'\"c':d"; + final StringTokenizer tok = new StringTokenizer(input, ':'); + tok.setQuoteMatcher(StringMatcherFactory.INSTANCE.quoteMatcher()); + assertEquals("a", tok.next()); + assertEquals("b\"c:d", tok.next()); + assertFalse(tok.hasNext()); + } + + @Test + public void testBasicQuoted7() { + final String input = "a:\"There's a reason here\":b"; + final StringTokenizer tok = new StringTokenizer(input, ':'); + tok.setQuoteMatcher(StringMatcherFactory.INSTANCE.quoteMatcher()); + assertEquals("a", tok.next()); + assertEquals("There's a reason here", tok.next()); + assertEquals("b", tok.next()); + assertFalse(tok.hasNext()); + } + + @Test + public void testBasicQuotedTrimmed1() { + final String input = "a: 'b' :"; + final StringTokenizer tok = new StringTokenizer(input, ':', '\''); + tok.setTrimmerMatcher(StringMatcherFactory.INSTANCE.trimMatcher()); + tok.setIgnoreEmptyTokens(false); + tok.setEmptyTokenAsNull(true); + assertEquals("a", tok.next()); + assertEquals("b", tok.next()); + assertNull(tok.next()); + assertFalse(tok.hasNext()); + } + + @Test + public void testBasicTrimmed1() { + final String input = "a: b : "; + final StringTokenizer tok = new StringTokenizer(input, ':'); + tok.setTrimmerMatcher(StringMatcherFactory.INSTANCE.trimMatcher()); + tok.setIgnoreEmptyTokens(false); + tok.setEmptyTokenAsNull(true); + assertEquals("a", tok.next()); + assertEquals("b", tok.next()); + assertNull(tok.next()); + assertFalse(tok.hasNext()); + } + + @Test + public void testBasicTrimmed2() { + final String input = "a: b :"; + final StringTokenizer tok = new StringTokenizer(input, ':'); + tok.setTrimmerMatcher(StringMatcherFactory.INSTANCE.stringMatcher(" ")); + tok.setIgnoreEmptyTokens(false); + tok.setEmptyTokenAsNull(true); + assertEquals("a", tok.next()); + assertEquals("b", tok.next()); + assertNull(tok.next()); + assertFalse(tok.hasNext()); + } + + @Test + public void testBasicIgnoreTrimmed1() { + final String input = "a: bIGNOREc : "; + final StringTokenizer tok = new StringTokenizer(input, ':'); + tok.setIgnoredMatcher(StringMatcherFactory.INSTANCE.stringMatcher("IGNORE")); + tok.setTrimmerMatcher(StringMatcherFactory.INSTANCE.trimMatcher()); + tok.setIgnoreEmptyTokens(false); + tok.setEmptyTokenAsNull(true); + assertEquals("a", tok.next()); + assertEquals("bc", tok.next()); + assertNull(tok.next()); + assertFalse(tok.hasNext()); + } + + @Test + public void testBasicIgnoreTrimmed2() { + final String input = "IGNOREaIGNORE: IGNORE bIGNOREc IGNORE : IGNORE "; + final StringTokenizer tok = new StringTokenizer(input, ':'); + tok.setIgnoredMatcher(StringMatcherFactory.INSTANCE.stringMatcher("IGNORE")); + tok.setTrimmerMatcher(StringMatcherFactory.INSTANCE.trimMatcher()); + tok.setIgnoreEmptyTokens(false); + tok.setEmptyTokenAsNull(true); + assertEquals("a", tok.next()); + assertEquals("bc", tok.next()); + assertNull(tok.next()); + assertFalse(tok.hasNext()); + } + + @Test + public void testBasicIgnoreTrimmed3() { + final String input = "IGNOREaIGNORE: IGNORE bIGNOREc IGNORE : IGNORE "; + final StringTokenizer tok = new StringTokenizer(input, ':'); + tok.setIgnoredMatcher(StringMatcherFactory.INSTANCE.stringMatcher("IGNORE")); + tok.setIgnoreEmptyTokens(false); + tok.setEmptyTokenAsNull(true); + assertEquals("a", tok.next()); + assertEquals(" bc ", tok.next()); + assertEquals(" ", tok.next()); + assertFalse(tok.hasNext()); + } + + @Test + public void testBasicIgnoreTrimmed4() { + final String input = "IGNOREaIGNORE: IGNORE 'bIGNOREc'IGNORE'd' IGNORE : IGNORE "; + final StringTokenizer tok = new StringTokenizer(input, ':', '\''); + tok.setIgnoredMatcher(StringMatcherFactory.INSTANCE.stringMatcher("IGNORE")); + tok.setTrimmerMatcher(StringMatcherFactory.INSTANCE.trimMatcher()); + tok.setIgnoreEmptyTokens(false); + tok.setEmptyTokenAsNull(true); + assertEquals("a", tok.next()); + assertEquals("bIGNOREcd", tok.next()); + assertNull(tok.next()); + assertFalse(tok.hasNext()); + } + + // ----------------------------------------------------------------------- + @Test + public void testListArray() { + final String input = "a b c"; + final StringTokenizer tok = new StringTokenizer(input); + final String[] array = tok.getTokenArray(); + final List<?> list = tok.getTokenList(); + + assertEquals(Arrays.asList(array), list); + assertEquals(3, list.size()); + } + + // ----------------------------------------------------------------------- + private void testCSV(final String data) { + this.testXSVAbc(StringTokenizer.getCSVInstance(data)); + this.testXSVAbc(StringTokenizer.getCSVInstance(data.toCharArray())); + } + + @Test + public void testCSVEmpty() { + this.testEmpty(StringTokenizer.getCSVInstance()); + this.testEmpty(StringTokenizer.getCSVInstance("")); + } + + @Test + public void testCSVSimple() { + this.testCSV(CSV_SIMPLE_FIXTURE); + } + + @Test + public void testCSVSimpleNeedsTrim() { + this.testCSV(" " + CSV_SIMPLE_FIXTURE); + this.testCSV(" \n\t " + CSV_SIMPLE_FIXTURE); + this.testCSV(" \n " + CSV_SIMPLE_FIXTURE + "\n\n\r"); + } + + void testEmpty(final StringTokenizer tokenizer) { + this.checkClone(tokenizer); + assertFalse(tokenizer.hasNext()); + assertFalse(tokenizer.hasPrevious()); + assertNull(tokenizer.nextToken()); + assertEquals(0, tokenizer.size()); + try { + tokenizer.next(); + fail(); + } catch (final NoSuchElementException ex) { + } + } + + @Test + public void testGetContent() { + final String input = "a b c \"d e\" f "; + StringTokenizer tok = new StringTokenizer(input); + assertEquals(input, tok.getContent()); + + tok = new StringTokenizer(input.toCharArray()); + assertEquals(input, tok.getContent()); + + tok = new StringTokenizer(); + assertNull(tok.getContent()); + } + + // ----------------------------------------------------------------------- + @Test + public void testChaining() { + final StringTokenizer tok = new StringTokenizer(); + assertEquals(tok, tok.reset()); + assertEquals(tok, tok.reset("")); + assertEquals(tok, tok.reset(new char[0])); + assertEquals(tok, tok.setDelimiterChar(' ')); + assertEquals(tok, tok.setDelimiterString(" ")); + assertEquals(tok, tok.setDelimiterMatcher(null)); + assertEquals(tok, tok.setQuoteChar(' ')); + assertEquals(tok, tok.setQuoteMatcher(null)); + assertEquals(tok, tok.setIgnoredChar(' ')); + assertEquals(tok, tok.setIgnoredMatcher(null)); + assertEquals(tok, tok.setTrimmerMatcher(null)); + assertEquals(tok, tok.setEmptyTokenAsNull(false)); + assertEquals(tok, tok.setIgnoreEmptyTokens(false)); + } + + /** + * Tests that the {@link StringTokenizer#clone()} clone method catches {@link CloneNotSupportedException} and + * returns <code>null</code>. + */ + @Test + public void testCloneNotSupportedException() { + final Object notCloned = new StringTokenizer() { + + @Override + Object cloneReset() throws CloneNotSupportedException { + throw new CloneNotSupportedException("test"); + } + }.clone(); + assertNull(notCloned); + } + + @Test + public void testCloneNull() { + final StringTokenizer tokenizer = new StringTokenizer((char[]) null); + // Start sanity check + assertNull(tokenizer.nextToken()); + tokenizer.reset(); + assertNull(tokenizer.nextToken()); + // End sanity check + final StringTokenizer clonedTokenizer = (StringTokenizer) tokenizer.clone(); + tokenizer.reset(); + assertNull(tokenizer.nextToken()); + assertNull(clonedTokenizer.nextToken()); + } + + @Test + public void testCloneReset() { + final char[] input = new char[] { 'a' }; + final StringTokenizer tokenizer = new StringTokenizer(input); + // Start sanity check + assertEquals("a", tokenizer.nextToken()); + tokenizer.reset(input); + assertEquals("a", tokenizer.nextToken()); + // End sanity check + final StringTokenizer clonedTokenizer = (StringTokenizer) tokenizer.clone(); + input[0] = 'b'; + tokenizer.reset(input); + assertEquals("b", tokenizer.nextToken()); + assertEquals("a", clonedTokenizer.nextToken()); + } + + // ----------------------------------------------------------------------- + @Test + public void testConstructor_String() { + StringTokenizer tok = new StringTokenizer("a b"); + assertEquals("a", tok.next()); + assertEquals("b", tok.next()); + assertFalse(tok.hasNext()); + + tok = new StringTokenizer(""); + assertFalse(tok.hasNext()); + + tok = new StringTokenizer((String) null); + assertFalse(tok.hasNext()); + } + + // ----------------------------------------------------------------------- + @Test + public void testConstructor_String_char() { + StringTokenizer tok = new StringTokenizer("a b", ' '); + assertEquals(1, tok.getDelimiterMatcher().isMatch(" ".toCharArray(), 0, 0, 1)); + assertEquals("a", tok.next()); + assertEquals("b", tok.next()); + assertFalse(tok.hasNext()); + + tok = new StringTokenizer("", ' '); + assertFalse(tok.hasNext()); + + tok = new StringTokenizer((String) null, ' '); + assertFalse(tok.hasNext()); + } + + // ----------------------------------------------------------------------- + @Test + public void testConstructor_String_char_char() { + StringTokenizer tok = new StringTokenizer("a b", ' ', '"'); + assertEquals(1, tok.getDelimiterMatcher().isMatch(" ".toCharArray(), 0, 0, 1)); + assertEquals(1, tok.getQuoteMatcher().isMatch("\"".toCharArray(), 0, 0, 1)); + assertEquals("a", tok.next()); + assertEquals("b", tok.next()); + assertFalse(tok.hasNext()); + + tok = new StringTokenizer("", ' ', '"'); + assertFalse(tok.hasNext()); + + tok = new StringTokenizer((String) null, ' ', '"'); + assertFalse(tok.hasNext()); + } + + // ----------------------------------------------------------------------- + @Test + public void testConstructor_charArray() { + StringTokenizer tok = new StringTokenizer("a b".toCharArray()); + assertEquals("a", tok.next()); + assertEquals("b", tok.next()); + assertFalse(tok.hasNext()); + + tok = new StringTokenizer(new char[0]); + assertFalse(tok.hasNext()); + + tok = new StringTokenizer((char[]) null); + assertFalse(tok.hasNext()); + } + + // ----------------------------------------------------------------------- + @Test + public void testConstructor_charArray_char() { + StringTokenizer tok = new StringTokenizer("a b".toCharArray(), ' '); + assertEquals(1, tok.getDelimiterMatcher().isMatch(" ".toCharArray(), 0, 0, 1)); + assertEquals("a", tok.next()); + assertEquals("b", tok.next()); + assertFalse(tok.hasNext()); + + tok = new StringTokenizer(new char[0], ' '); + assertFalse(tok.hasNext()); + + tok = new StringTokenizer((char[]) null, ' '); + assertFalse(tok.hasNext()); + } + + // ----------------------------------------------------------------------- + @Test + public void testConstructor_charArray_char_char() { + StringTokenizer tok = new StringTokenizer("a b".toCharArray(), ' ', '"'); + assertEquals(1, tok.getDelimiterMatcher().isMatch(" ".toCharArray(), 0, 0, 1)); + assertEquals(1, tok.getQuoteMatcher().isMatch("\"".toCharArray(), 0, 0, 1)); + assertEquals("a", tok.next()); + assertEquals("b", tok.next()); + assertFalse(tok.hasNext()); + + tok = new StringTokenizer(new char[0], ' ', '"'); + assertFalse(tok.hasNext()); + + tok = new StringTokenizer((char[]) null, ' ', '"'); + assertFalse(tok.hasNext()); + } + + // ----------------------------------------------------------------------- + @Test + public void testReset() { + final StringTokenizer tok = new StringTokenizer("a b c"); + assertEquals("a", tok.next()); + assertEquals("b", tok.next()); + assertEquals("c", tok.next()); + assertFalse(tok.hasNext()); + + tok.reset(); + assertEquals("a", tok.next()); + assertEquals("b", tok.next()); + assertEquals("c", tok.next()); + assertFalse(tok.hasNext()); + } + + // ----------------------------------------------------------------------- + @Test + public void testReset_String() { + final StringTokenizer tok = new StringTokenizer("x x x"); + tok.reset("d e"); + assertEquals("d", tok.next()); + assertEquals("e", tok.next()); + assertFalse(tok.hasNext()); + + tok.reset((String) null); + assertFalse(tok.hasNext()); + } + + // ----------------------------------------------------------------------- + @Test + public void testReset_charArray() { + final StringTokenizer tok = new StringTokenizer("x x x"); + + final char[] array = new char[] { 'a', 'b', 'c' }; + tok.reset(array); + assertEquals("abc", tok.next()); + assertFalse(tok.hasNext()); + + tok.reset((char[]) null); + assertFalse(tok.hasNext()); + } + + // ----------------------------------------------------------------------- + @Test + public void testTSV() { + this.testXSVAbc(StringTokenizer.getTSVInstance(TSV_SIMPLE_FIXTURE)); + this.testXSVAbc(StringTokenizer.getTSVInstance(TSV_SIMPLE_FIXTURE.toCharArray())); + } + + @Test + public void testTSVEmpty() { + this.testEmpty(StringTokenizer.getTSVInstance()); + this.testEmpty(StringTokenizer.getTSVInstance("")); + } + + void testXSVAbc(final StringTokenizer tokenizer) { + this.checkClone(tokenizer); + assertEquals(-1, tokenizer.previousIndex()); + assertEquals(0, tokenizer.nextIndex()); + assertNull(tokenizer.previousToken()); + assertEquals("A", tokenizer.nextToken()); + assertEquals(1, tokenizer.nextIndex()); + assertEquals("b", tokenizer.nextToken()); + assertEquals(2, tokenizer.nextIndex()); + assertEquals("c", tokenizer.nextToken()); + assertEquals(3, tokenizer.nextIndex()); + assertNull(tokenizer.nextToken()); + assertEquals(3, tokenizer.nextIndex()); + assertEquals("c", tokenizer.previousToken()); + assertEquals(2, tokenizer.nextIndex()); + assertEquals("b", tokenizer.previousToken()); + assertEquals(1, tokenizer.nextIndex()); + assertEquals("A", tokenizer.previousToken()); + assertEquals(0, tokenizer.nextIndex()); + assertNull(tokenizer.previousToken()); + assertEquals(0, tokenizer.nextIndex()); + assertEquals(-1, tokenizer.previousIndex()); + assertEquals(3, tokenizer.size()); + } + + @Test + public void testIteration() { + final StringTokenizer tkn = new StringTokenizer("a b c"); + assertFalse(tkn.hasPrevious()); + try { + tkn.previous(); + fail(); + } catch (final NoSuchElementException ex) { + } + assertTrue(tkn.hasNext()); + + assertEquals("a", tkn.next()); + try { + tkn.remove(); + fail(); + } catch (final UnsupportedOperationException ex) { + } + try { + tkn.set("x"); + fail(); + } catch (final UnsupportedOperationException ex) { + } + try { + tkn.add("y"); + fail(); + } catch (final UnsupportedOperationException ex) { + } + assertTrue(tkn.hasPrevious()); + assertTrue(tkn.hasNext()); + + assertEquals("b", tkn.next()); + assertTrue(tkn.hasPrevious()); + assertTrue(tkn.hasNext()); + + assertEquals("c", tkn.next()); + assertTrue(tkn.hasPrevious()); + assertFalse(tkn.hasNext()); + + try { + tkn.next(); + fail(); + } catch (final NoSuchElementException ex) { + } + assertTrue(tkn.hasPrevious()); + assertFalse(tkn.hasNext()); + } + + // ----------------------------------------------------------------------- + @Test + public void testTokenizeSubclassInputChange() { + final StringTokenizer tkn = new StringTokenizer("a b c d e") { + + @Override + protected List<String> tokenize(final char[] chars, final int offset, final int count) { + return super.tokenize("w x y z".toCharArray(), 2, 5); + } + }; + assertEquals("x", tkn.next()); + assertEquals("y", tkn.next()); + } + + // ----------------------------------------------------------------------- + @Test + public void testTokenizeSubclassOutputChange() { + final StringTokenizer tkn = new StringTokenizer("a b c") { + + @Override + protected List<String> tokenize(final char[] chars, final int offset, final int count) { + final List<String> list = super.tokenize(chars, offset, count); + Collections.reverse(list); + return list; + } + }; + assertEquals("c", tkn.next()); + assertEquals("b", tkn.next()); + assertEquals("a", tkn.next()); + } + + // ----------------------------------------------------------------------- + @Test + public void testToString() { + final StringTokenizer tkn = new StringTokenizer("a b c d e"); + assertEquals("StringTokenizer[not tokenized yet]", tkn.toString()); + tkn.next(); + assertEquals("StringTokenizer[a, b, c, d, e]", tkn.toString()); + } + + // ----------------------------------------------------------------------- + @Test + public void testStringTokenizerStringMatcher() { + final char[] chars = { 'a', 'b', 'c', 'd' }; + final StringTokenizer tokens = new StringTokenizer(chars, "bc"); + assertEquals("a", tokens.next()); + assertEquals("d", tokens.next()); + } + + // ----------------------------------------------------------------------- + @Test + public void testStringTokenizerStrMatcher() { + final char[] chars = { 'a', ',', 'c' }; + final StringTokenizer tokens = new StringTokenizer(chars, StringMatcherFactory.INSTANCE.commaMatcher()); + assertEquals("a", tokens.next()); + assertEquals("c", tokens.next()); + } + + // ----------------------------------------------------------------------- + @Test + public void testStringTokenizerQuoteMatcher() { + final char[] chars = { '\'', 'a', 'c', '\'', 'd' }; + final StringTokenizer tokens = new StringTokenizer(chars, StringMatcherFactory.INSTANCE.commaMatcher(), + StringMatcherFactory.INSTANCE.quoteMatcher()); + assertEquals("acd", tokens.next()); + } + + @Test + public void testPreviousTokenAndSetEmptyTokenAsNull() { + final StringTokenizer strTokenizer = StringTokenizer.getTSVInstance(" \t\n\r\f"); + strTokenizer.setEmptyTokenAsNull(true); + + assertNull(strTokenizer.previousToken()); + } +} http://git-wip-us.apache.org/repos/asf/commons-text/blob/6d8b511f/src/test/java/org/apache/commons/text/TextStringBuilderTest.java ---------------------------------------------------------------------- diff --git a/src/test/java/org/apache/commons/text/TextStringBuilderTest.java b/src/test/java/org/apache/commons/text/TextStringBuilderTest.java index 88d3a50..dec5d02 100644 --- a/src/test/java/org/apache/commons/text/TextStringBuilderTest.java +++ b/src/test/java/org/apache/commons/text/TextStringBuilderTest.java @@ -1691,7 +1691,7 @@ public class TextStringBuilderTest { // from Javadoc final TextStringBuilder b = new TextStringBuilder(); b.append("a b "); - final StrTokenizer t = b.asTokenizer(); + final StringTokenizer t = b.asTokenizer(); final String[] tokens1 = t.getTokenArray(); assertEquals(2, tokens1.length);