[text] [TEXT-116] Add a StrTokenizer replacement based on the StringMatcher interface: StringTokenizer.

ggregory Mon, 12 Feb 2018 10:47:26 -0800

Repository: commons-text
Updated Branches:
  refs/heads/master 995c44b71 -> 6d8b511f2



[TEXT-116] Add a StrTokenizer replacement based on the StringMatcher
interface: StringTokenizer.

Project: http://git-wip-us.apache.org/repos/asf/commons-text/repo
Commit: http://git-wip-us.apache.org/repos/asf/commons-text/commit/6d8b511f
Tree: http://git-wip-us.apache.org/repos/asf/commons-text/tree/6d8b511f
Diff: http://git-wip-us.apache.org/repos/asf/commons-text/diff/6d8b511f

Branch: refs/heads/master
Commit: 6d8b511f2081117a3c07a5e54392b1948df79248
Parents: 995c44b
Author: Gary Gregory <garydgreg...@gmail.com>
Authored: Mon Feb 12 11:47:03 2018 -0700
Committer: Gary Gregory <garydgreg...@gmail.com>
Committed: Mon Feb 12 11:47:03 2018 -0700

----------------------------------------------------------------------
 src/changes/changes.xml                         |    1 +
 .../org/apache/commons/text/StrTokenizer.java   |    2 +
 .../apache/commons/text/StringTokenizer.java    | 1176 ++++++++++++++++++
 .../apache/commons/text/TextStringBuilder.java  |   14 +-
 .../text/StrBuilderAppendInsertTest.java        |    3 +
 .../org/apache/commons/text/StrBuilderTest.java |    3 +
 .../org/apache/commons/text/StrLookupTest.java  |    3 +
 .../org/apache/commons/text/StrMatcherTest.java |    3 +
 .../apache/commons/text/StrSubstitutorTest.java |    3 +
 .../apache/commons/text/StrTokenizerTest.java   |    3 +
 .../commons/text/StringTokenizerTest.java       |  962 ++++++++++++++
 .../commons/text/TextStringBuilderTest.java     |    2 +-
 12 files changed, 2167 insertions(+), 8 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/commons-text/blob/6d8b511f/src/changes/changes.xml
----------------------------------------------------------------------
diff --git a/src/changes/changes.xml b/src/changes/changes.xml
index 59a17cd..584c343 100644
--- a/src/changes/changes.xml
+++ b/src/changes/changes.xml
@@ -51,6 +51,7 @@ The <action> type attribute can be add,update,fix,remove.
     <action issue="TEXT-113" type="add" dev="ggregory">Add an interpolator 
string lookup</action>
     <action issue="TEXT-114" type="add" dev="ggregory">Add a StrSubstitutor 
replacement based on interfaces: StringSubstitutor</action>
     <action issue="TEXT-115" type="add" dev="ggregory">Add a StrBuilder 
replacement based on the StringMatcher interface: TextStringBuilder</action>
+    <action issue="TEXT-116" type="add" dev="ggregory">Add a StrTokenizer 
replacement based on the StringMatcher interface: StringTokenizer</action>
   </release>
 
   <release version="1.2" date="2017-12-12" description="Release 1.2">

http://git-wip-us.apache.org/repos/asf/commons-text/blob/6d8b511f/src/main/java/org/apache/commons/text/StrTokenizer.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/commons/text/StrTokenizer.java 
b/src/main/java/org/apache/commons/text/StrTokenizer.java
index c07ce31..3ae662d 100644
--- a/src/main/java/org/apache/commons/text/StrTokenizer.java
+++ b/src/main/java/org/apache/commons/text/StrTokenizer.java
@@ -80,7 +80,9 @@ import java.util.NoSuchElementException;
  * </table>
  *
  * @since 1.0
+ * @deprecated Use {@link StringTokenizer}. This class will be removed in 2.0.
  */
+@Deprecated
 public class StrTokenizer implements ListIterator<String>, Cloneable {
 
     /** Comma separated values tokenizer internal variable. */

http://git-wip-us.apache.org/repos/asf/commons-text/blob/6d8b511f/src/main/java/org/apache/commons/text/StringTokenizer.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/commons/text/StringTokenizer.java 
b/src/main/java/org/apache/commons/text/StringTokenizer.java
new file mode 100644
index 0000000..aaea4fb
--- /dev/null
+++ b/src/main/java/org/apache/commons/text/StringTokenizer.java
@@ -0,0 +1,1176 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.text;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+import java.util.ListIterator;
+import java.util.NoSuchElementException;
+
+import org.apache.commons.text.matcher.StringMatcher;
+import org.apache.commons.text.matcher.StringMatcherFactory;
+
+/**
+ * Tokenizes a string based on delimiters (separators) and supporting quoting 
and ignored character concepts.
+ * <p>
+ * This class can split a String into many smaller strings. It aims to do a 
similar job to
+ * {@link java.util.StringTokenizer StringTokenizer}, however it offers much 
more control and flexibility including
+ * implementing the <code>ListIterator</code> interface. By default, it is set 
up like <code>StringTokenizer</code>.
+ * <p>
+ * The input String is split into a number of <i>tokens</i>. Each token is 
separated from the next String by a
+ * <i>delimiter</i>. One or more delimiter characters must be specified.
+ * <p>
+ * Each token may be surrounded by quotes. The <i>quote</i> matcher specifies 
the quote character(s). A quote may be
+ * escaped within a quoted section by duplicating itself.
+ * <p>
+ * Between each token and the delimiter are potentially characters that need 
trimming. The <i>trimmer</i> matcher
+ * specifies these characters. One usage might be to trim whitespace 
characters.
+ * <p>
+ * At any point outside the quotes there might potentially be invalid 
characters. The <i>ignored</i> matcher specifies
+ * these characters to be removed. One usage might be to remove new line 
characters.
+ * <p>
+ * Empty tokens may be removed or returned as null.
+ *
+ * <pre>
+ * "a,b,c"         - Three tokens "a","b","c"   (comma delimiter)
+ * " a, b , c "    - Three tokens "a","b","c"   (default CSV processing trims 
whitespace)
+ * "a, ", b ,", c" - Three tokens "a, " , " b ", ", c" (quoted text untouched)
+ * </pre>
+ * <p>
+ *
+ * This tokenizer has the following properties and options:
+ *
+ * <table summary="Tokenizer Properties">
+ * <tr>
+ * <th>Property</th>
+ * <th>Type</th>
+ * <th>Default</th>
+ * </tr>
+ * <tr>
+ * <td>delim</td>
+ * <td>CharSetMatcher</td>
+ * <td>{ \t\n\r\f}</td>
+ * </tr>
+ * <tr>
+ * <td>quote</td>
+ * <td>NoneMatcher</td>
+ * <td>{}</td>
+ * </tr>
+ * <tr>
+ * <td>ignore</td>
+ * <td>NoneMatcher</td>
+ * <td>{}</td>
+ * </tr>
+ * <tr>
+ * <td>emptyTokenAsNull</td>
+ * <td>boolean</td>
+ * <td>false</td>
+ * </tr>
+ * <tr>
+ * <td>ignoreEmptyTokens</td>
+ * <td>boolean</td>
+ * <td>true</td>
+ * </tr>
+ * </table>
+ *
+ * @since 1.3
+ */
+public class StringTokenizer implements ListIterator<String>, Cloneable {
+
+    /** Comma separated values tokenizer internal variable. */
+    private static final StringTokenizer CSV_TOKENIZER_PROTOTYPE;
+    /** Tab separated values tokenizer internal variable. */
+    private static final StringTokenizer TSV_TOKENIZER_PROTOTYPE;
+    static {
+        CSV_TOKENIZER_PROTOTYPE = new StringTokenizer();
+        
CSV_TOKENIZER_PROTOTYPE.setDelimiterMatcher(StringMatcherFactory.INSTANCE.commaMatcher());
+        
CSV_TOKENIZER_PROTOTYPE.setQuoteMatcher(StringMatcherFactory.INSTANCE.doubleQuoteMatcher());
+        
CSV_TOKENIZER_PROTOTYPE.setIgnoredMatcher(StringMatcherFactory.INSTANCE.noneMatcher());
+        
CSV_TOKENIZER_PROTOTYPE.setTrimmerMatcher(StringMatcherFactory.INSTANCE.trimMatcher());
+        CSV_TOKENIZER_PROTOTYPE.setEmptyTokenAsNull(false);
+        CSV_TOKENIZER_PROTOTYPE.setIgnoreEmptyTokens(false);
+
+        TSV_TOKENIZER_PROTOTYPE = new StringTokenizer();
+        
TSV_TOKENIZER_PROTOTYPE.setDelimiterMatcher(StringMatcherFactory.INSTANCE.tabMatcher());
+        
TSV_TOKENIZER_PROTOTYPE.setQuoteMatcher(StringMatcherFactory.INSTANCE.doubleQuoteMatcher());
+        
TSV_TOKENIZER_PROTOTYPE.setIgnoredMatcher(StringMatcherFactory.INSTANCE.noneMatcher());
+        
TSV_TOKENIZER_PROTOTYPE.setTrimmerMatcher(StringMatcherFactory.INSTANCE.trimMatcher());
+        TSV_TOKENIZER_PROTOTYPE.setEmptyTokenAsNull(false);
+        TSV_TOKENIZER_PROTOTYPE.setIgnoreEmptyTokens(false);
+    }
+
+    /** The text to work on. */
+    private char[] chars;
+    /** The parsed tokens. */
+    private String[] tokens;
+    /** The current iteration position. */
+    private int tokenPos;
+
+    /** The delimiter matcher. */
+    private StringMatcher delimMatcher = 
StringMatcherFactory.INSTANCE.splitMatcher();
+    /** The quote matcher. */
+    private StringMatcher quoteMatcher = 
StringMatcherFactory.INSTANCE.noneMatcher();
+    /** The ignored matcher. */
+    private StringMatcher ignoredMatcher = 
StringMatcherFactory.INSTANCE.noneMatcher();
+    /** The trimmer matcher. */
+    private StringMatcher trimmerMatcher = 
StringMatcherFactory.INSTANCE.noneMatcher();
+
+    /** Whether to return empty tokens as null. */
+    private boolean emptyAsNull = false;
+    /** Whether to ignore empty tokens. */
+    private boolean ignoreEmptyTokens = true;
+
+    // -----------------------------------------------------------------------
+
+    /**
+     * Returns a clone of <code>CSV_TOKENIZER_PROTOTYPE</code>.
+     *
+     * @return a clone of <code>CSV_TOKENIZER_PROTOTYPE</code>.
+     */
+    private static StringTokenizer getCSVClone() {
+        return (StringTokenizer) CSV_TOKENIZER_PROTOTYPE.clone();
+    }
+
+    /**
+     * Gets a new tokenizer instance which parses Comma Separated Value 
strings initializing it with the given input.
+     * The default for CSV processing will be trim whitespace from both ends 
(which can be overridden with the
+     * setTrimmer method).
+     * <p>
+     * You must call a "reset" method to set the string which you want to 
parse.
+     *
+     * @return a new tokenizer instance which parses Comma Separated Value 
strings
+     */
+    public static StringTokenizer getCSVInstance() {
+        return getCSVClone();
+    }
+
+    /**
+     * Gets a new tokenizer instance which parses Comma Separated Value 
strings initializing it with the given input.
+     * The default for CSV processing will be trim whitespace from both ends 
(which can be overridden with the
+     * setTrimmer method).
+     *
+     * @param input
+     *            the text to parse
+     * @return a new tokenizer instance which parses Comma Separated Value 
strings
+     */
+    public static StringTokenizer getCSVInstance(final String input) {
+        final StringTokenizer tok = getCSVClone();
+        tok.reset(input);
+        return tok;
+    }
+
+    /**
+     * Gets a new tokenizer instance which parses Comma Separated Value 
strings initializing it with the given input.
+     * The default for CSV processing will be trim whitespace from both ends 
(which can be overridden with the
+     * setTrimmer method).
+     *
+     * @param input
+     *            the text to parse
+     * @return a new tokenizer instance which parses Comma Separated Value 
strings
+     */
+    public static StringTokenizer getCSVInstance(final char[] input) {
+        final StringTokenizer tok = getCSVClone();
+        tok.reset(input);
+        return tok;
+    }
+
+    /**
+     * Returns a clone of <code>TSV_TOKENIZER_PROTOTYPE</code>.
+     *
+     * @return a clone of <code>TSV_TOKENIZER_PROTOTYPE</code>.
+     */
+    private static StringTokenizer getTSVClone() {
+        return (StringTokenizer) TSV_TOKENIZER_PROTOTYPE.clone();
+    }
+
+    /**
+     * Gets a new tokenizer instance which parses Tab Separated Value strings. 
The default for CSV processing will be
+     * trim whitespace from both ends (which can be overridden with the 
setTrimmer method).
+     * <p>
+     * You must call a "reset" method to set the string which you want to 
parse.
+     *
+     * @return a new tokenizer instance which parses Tab Separated Value 
strings.
+     */
+    public static StringTokenizer getTSVInstance() {
+        return getTSVClone();
+    }
+
+    /**
+     * Gets a new tokenizer instance which parses Tab Separated Value strings. 
The default for CSV processing will be
+     * trim whitespace from both ends (which can be overridden with the 
setTrimmer method).
+     *
+     * @param input
+     *            the string to parse
+     * @return a new tokenizer instance which parses Tab Separated Value 
strings.
+     */
+    public static StringTokenizer getTSVInstance(final String input) {
+        final StringTokenizer tok = getTSVClone();
+        tok.reset(input);
+        return tok;
+    }
+
+    /**
+     * Gets a new tokenizer instance which parses Tab Separated Value strings. 
The default for CSV processing will be
+     * trim whitespace from both ends (which can be overridden with the 
setTrimmer method).
+     *
+     * @param input
+     *            the string to parse
+     * @return a new tokenizer instance which parses Tab Separated Value 
strings.
+     */
+    public static StringTokenizer getTSVInstance(final char[] input) {
+        final StringTokenizer tok = getTSVClone();
+        tok.reset(input);
+        return tok;
+    }
+
+    // -----------------------------------------------------------------------
+    /**
+     * Constructs a tokenizer splitting on space, tab, newline and form feed 
as per StringTokenizer, but with no text to
+     * tokenize.
+     * <p>
+     * This constructor is normally used with {@link #reset(String)}.
+     */
+    public StringTokenizer() {
+        super();
+        this.chars = null;
+    }
+
+    /**
+     * Constructs a tokenizer splitting on space, tab, newline and form feed 
as per StringTokenizer.
+     *
+     * @param input
+     *            the string which is to be parsed
+     */
+    public StringTokenizer(final String input) {
+        super();
+        if (input != null) {
+            chars = input.toCharArray();
+        } else {
+            chars = null;
+        }
+    }
+
+    /**
+     * Constructs a tokenizer splitting on the specified delimiter character.
+     *
+     * @param input
+     *            the string which is to be parsed
+     * @param delim
+     *            the field delimiter character
+     */
+    public StringTokenizer(final String input, final char delim) {
+        this(input);
+        setDelimiterChar(delim);
+    }
+
+    /**
+     * Constructs a tokenizer splitting on the specified delimiter string.
+     *
+     * @param input
+     *            the string which is to be parsed
+     * @param delim
+     *            the field delimiter string
+     */
+    public StringTokenizer(final String input, final String delim) {
+        this(input);
+        setDelimiterString(delim);
+    }
+
+    /**
+     * Constructs a tokenizer splitting using the specified delimiter matcher.
+     *
+     * @param input
+     *            the string which is to be parsed
+     * @param delim
+     *            the field delimiter matcher
+     */
+    public StringTokenizer(final String input, final StringMatcher delim) {
+        this(input);
+        setDelimiterMatcher(delim);
+    }
+
+    /**
+     * Constructs a tokenizer splitting on the specified delimiter character 
and handling quotes using the specified
+     * quote character.
+     *
+     * @param input
+     *            the string which is to be parsed
+     * @param delim
+     *            the field delimiter character
+     * @param quote
+     *            the field quoted string character
+     */
+    public StringTokenizer(final String input, final char delim, final char 
quote) {
+        this(input, delim);
+        setQuoteChar(quote);
+    }
+
+    /**
+     * Constructs a tokenizer splitting using the specified delimiter matcher 
and handling quotes using the specified
+     * quote matcher.
+     *
+     * @param input
+     *            the string which is to be parsed
+     * @param delim
+     *            the field delimiter matcher
+     * @param quote
+     *            the field quoted string matcher
+     */
+    public StringTokenizer(final String input, final StringMatcher delim, 
final StringMatcher quote) {
+        this(input, delim);
+        setQuoteMatcher(quote);
+    }
+
+    /**
+     * Constructs a tokenizer splitting on space, tab, newline and form feed 
as per StringTokenizer.
+     *
+     * @param input
+     *            the string which is to be parsed, not cloned
+     */
+    public StringTokenizer(final char[] input) {
+        super();
+        if (input == null) {
+            this.chars = null;
+        } else {
+            this.chars = input.clone();
+        }
+    }
+
+    /**
+     * Constructs a tokenizer splitting on the specified character.
+     *
+     * @param input
+     *            the string which is to be parsed, not cloned
+     * @param delim
+     *            the field delimiter character
+     */
+    public StringTokenizer(final char[] input, final char delim) {
+        this(input);
+        setDelimiterChar(delim);
+    }
+
+    /**
+     * Constructs a tokenizer splitting on the specified string.
+     *
+     * @param input
+     *            the string which is to be parsed, not cloned
+     * @param delim
+     *            the field delimiter string
+     */
+    public StringTokenizer(final char[] input, final String delim) {
+        this(input);
+        setDelimiterString(delim);
+    }
+
+    /**
+     * Constructs a tokenizer splitting using the specified delimiter matcher.
+     *
+     * @param input
+     *            the string which is to be parsed, not cloned
+     * @param delim
+     *            the field delimiter matcher
+     */
+    public StringTokenizer(final char[] input, final StringMatcher delim) {
+        this(input);
+        setDelimiterMatcher(delim);
+    }
+
+    /**
+     * Constructs a tokenizer splitting on the specified delimiter character 
and handling quotes using the specified
+     * quote character.
+     *
+     * @param input
+     *            the string which is to be parsed, not cloned
+     * @param delim
+     *            the field delimiter character
+     * @param quote
+     *            the field quoted string character
+     */
+    public StringTokenizer(final char[] input, final char delim, final char 
quote) {
+        this(input, delim);
+        setQuoteChar(quote);
+    }
+
+    /**
+     * Constructs a tokenizer splitting using the specified delimiter matcher 
and handling quotes using the specified
+     * quote matcher.
+     *
+     * @param input
+     *            the string which is to be parsed, not cloned
+     * @param delim
+     *            the field delimiter character
+     * @param quote
+     *            the field quoted string character
+     */
+    public StringTokenizer(final char[] input, final StringMatcher delim, 
final StringMatcher quote) {
+        this(input, delim);
+        setQuoteMatcher(quote);
+    }
+
+    // API
+    // -----------------------------------------------------------------------
+    /**
+     * Gets the number of tokens found in the String.
+     *
+     * @return the number of matched tokens
+     */
+    public int size() {
+        checkTokenized();
+        return tokens.length;
+    }
+
+    /**
+     * Gets the next token from the String. Equivalent to {@link #next()} 
except it returns null rather than throwing
+     * {@link NoSuchElementException} when no tokens remain.
+     *
+     * @return the next sequential token, or null when no more tokens are found
+     */
+    public String nextToken() {
+        if (hasNext()) {
+            return tokens[tokenPos++];
+        }
+        return null;
+    }
+
+    /**
+     * Gets the previous token from the String.
+     *
+     * @return the previous sequential token, or null when no more tokens are 
found
+     */
+    public String previousToken() {
+        if (hasPrevious()) {
+            return tokens[--tokenPos];
+        }
+        return null;
+    }
+
+    /**
+     * Gets a copy of the full token list as an independent modifiable array.
+     *
+     * @return the tokens as a String array
+     */
+    public String[] getTokenArray() {
+        checkTokenized();
+        return tokens.clone();
+    }
+
+    /**
+     * Gets a copy of the full token list as an independent modifiable list.
+     *
+     * @return the tokens as a String array
+     */
+    public List<String> getTokenList() {
+        checkTokenized();
+        final List<String> list = new ArrayList<>(tokens.length);
+        Collections.addAll(list, tokens);
+
+        return list;
+    }
+
+    /**
+     * Resets this tokenizer, forgetting all parsing and iteration already 
completed.
+     * <p>
+     * This method allows the same tokenizer to be reused for the same String.
+     *
+     * @return this, to enable chaining
+     */
+    public StringTokenizer reset() {
+        tokenPos = 0;
+        tokens = null;
+        return this;
+    }
+
+    /**
+     * Reset this tokenizer, giving it a new input string to parse. In this 
manner you can re-use a tokenizer with the
+     * same settings on multiple input lines.
+     *
+     * @param input
+     *            the new string to tokenize, null sets no text to parse
+     * @return this, to enable chaining
+     */
+    public StringTokenizer reset(final String input) {
+        reset();
+        if (input != null) {
+            this.chars = input.toCharArray();
+        } else {
+            this.chars = null;
+        }
+        return this;
+    }
+
+    /**
+     * Reset this tokenizer, giving it a new input string to parse. In this 
manner you can re-use a tokenizer with the
+     * same settings on multiple input lines.
+     *
+     * @param input
+     *            the new character array to tokenize, not cloned, null sets 
no text to parse
+     * @return this, to enable chaining
+     */
+    public StringTokenizer reset(final char[] input) {
+        reset();
+        if (input != null) {
+            this.chars = input.clone();
+        } else {
+            this.chars = null;
+        }
+        return this;
+    }
+
+    // ListIterator
+    // -----------------------------------------------------------------------
+    /**
+     * Checks whether there are any more tokens.
+     *
+     * @return true if there are more tokens
+     */
+    @Override
+    public boolean hasNext() {
+        checkTokenized();
+        return tokenPos < tokens.length;
+    }
+
+    /**
+     * Gets the next token.
+     *
+     * @return the next String token
+     * @throws NoSuchElementException
+     *             if there are no more elements
+     */
+    @Override
+    public String next() {
+        if (hasNext()) {
+            return tokens[tokenPos++];
+        }
+        throw new NoSuchElementException();
+    }
+
+    /**
+     * Gets the index of the next token to return.
+     *
+     * @return the next token index
+     */
+    @Override
+    public int nextIndex() {
+        return tokenPos;
+    }
+
+    /**
+     * Checks whether there are any previous tokens that can be iterated to.
+     *
+     * @return true if there are previous tokens
+     */
+    @Override
+    public boolean hasPrevious() {
+        checkTokenized();
+        return tokenPos > 0;
+    }
+
+    /**
+     * Gets the token previous to the last returned token.
+     *
+     * @return the previous token
+     */
+    @Override
+    public String previous() {
+        if (hasPrevious()) {
+            return tokens[--tokenPos];
+        }
+        throw new NoSuchElementException();
+    }
+
+    /**
+     * Gets the index of the previous token.
+     *
+     * @return the previous token index
+     */
+    @Override
+    public int previousIndex() {
+        return tokenPos - 1;
+    }
+
+    /**
+     * Unsupported ListIterator operation.
+     *
+     * @throws UnsupportedOperationException
+     *             always
+     */
+    @Override
+    public void remove() {
+        throw new UnsupportedOperationException("remove() is unsupported");
+    }
+
+    /**
+     * Unsupported ListIterator operation.
+     *
+     * @param obj
+     *            this parameter ignored.
+     * @throws UnsupportedOperationException
+     *             always
+     */
+    @Override
+    public void set(final String obj) {
+        throw new UnsupportedOperationException("set() is unsupported");
+    }
+
+    /**
+     * Unsupported ListIterator operation.
+     *
+     * @param obj
+     *            this parameter ignored.
+     * @throws UnsupportedOperationException
+     *             always
+     */
+    @Override
+    public void add(final String obj) {
+        throw new UnsupportedOperationException("add() is unsupported");
+    }
+
+    // Implementation
+    // -----------------------------------------------------------------------
+    /**
+     * Checks if tokenization has been done, and if not then do it.
+     */
+    private void checkTokenized() {
+        if (tokens == null) {
+            if (chars == null) {
+                // still call tokenize as subclass may do some work
+                final List<String> split = tokenize(null, 0, 0);
+                tokens = split.toArray(new String[split.size()]);
+            } else {
+                final List<String> split = tokenize(chars, 0, chars.length);
+                tokens = split.toArray(new String[split.size()]);
+            }
+        }
+    }
+
+    /**
+     * Internal method to performs the tokenization.
+     * <p>
+     * Most users of this class do not need to call this method. This method 
will be called automatically by other
+     * (public) methods when required.
+     * <p>
+     * This method exists to allow subclasses to add code before or after the 
tokenization. For example, a subclass
+     * could alter the character array, offset or count to be parsed, or call 
the tokenizer multiple times on multiple
+     * strings. It is also be possible to filter the results.
+     * <p>
+     * <code>StrTokenizer</code> will always pass a zero offset and a count 
equal to the length of the array to this
+     * method, however a subclass may pass other values, or even an entirely 
different array.
+     *
+     * @param srcChars
+     *            the character array being tokenized, may be null
+     * @param offset
+     *            the start position within the character array, must be valid
+     * @param count
+     *            the number of characters to tokenize, must be valid
+     * @return the modifiable list of String tokens, unmodifiable if null 
array or zero count
+     */
+    protected List<String> tokenize(final char[] srcChars, final int offset, 
final int count) {
+        if (srcChars == null || count == 0) {
+            return Collections.emptyList();
+        }
+        final TextStringBuilder buf = new TextStringBuilder();
+        final List<String> tokenList = new ArrayList<>();
+        int pos = offset;
+
+        // loop around the entire buffer
+        while (pos >= 0 && pos < count) {
+            // find next token
+            pos = readNextToken(srcChars, pos, count, buf, tokenList);
+
+            // handle case where end of string is a delimiter
+            if (pos >= count) {
+                addToken(tokenList, "");
+            }
+        }
+        return tokenList;
+    }
+
+    /**
+     * Adds a token to a list, paying attention to the parameters we've set.
+     *
+     * @param list
+     *            the list to add to
+     * @param tok
+     *            the token to add
+     */
+    private void addToken(final List<String> list, String tok) {
+        if (tok == null || tok.length() == 0) {
+            if (isIgnoreEmptyTokens()) {
+                return;
+            }
+            if (isEmptyTokenAsNull()) {
+                tok = null;
+            }
+        }
+        list.add(tok);
+    }
+
+    /**
+     * Reads character by character through the String to get the next token.
+     *
+     * @param srcChars
+     *            the character array being tokenized
+     * @param start
+     *            the first character of field
+     * @param len
+     *            the length of the character array being tokenized
+     * @param workArea
+     *            a temporary work area
+     * @param tokenList
+     *            the list of parsed tokens
+     * @return the starting position of the next field (the character 
immediately after the delimiter), or -1 if end of
+     *         string found
+     */
+    private int readNextToken(final char[] srcChars, int start, final int len, 
final TextStringBuilder workArea,
+            final List<String> tokenList) {
+        // skip all leading whitespace, unless it is the
+        // field delimiter or the quote character
+        while (start < len) {
+            final int removeLen = 
Math.max(getIgnoredMatcher().isMatch(srcChars, start, start, len),
+                    getTrimmerMatcher().isMatch(srcChars, start, start, len));
+            if (removeLen == 0 || getDelimiterMatcher().isMatch(srcChars, 
start, start, len) > 0
+                    || getQuoteMatcher().isMatch(srcChars, start, start, len) 
> 0) {
+                break;
+            }
+            start += removeLen;
+        }
+
+        // handle reaching end
+        if (start >= len) {
+            addToken(tokenList, "");
+            return -1;
+        }
+
+        // handle empty token
+        final int delimLen = getDelimiterMatcher().isMatch(srcChars, start, 
start, len);
+        if (delimLen > 0) {
+            addToken(tokenList, "");
+            return start + delimLen;
+        }
+
+        // handle found token
+        final int quoteLen = getQuoteMatcher().isMatch(srcChars, start, start, 
len);
+        if (quoteLen > 0) {
+            return readWithQuotes(srcChars, start + quoteLen, len, workArea, 
tokenList, start, quoteLen);
+        }
+        return readWithQuotes(srcChars, start, len, workArea, tokenList, 0, 0);
+    }
+
+    /**
+     * Reads a possibly quoted string token.
+     *
+     * @param srcChars
+     *            the character array being tokenized
+     * @param start
+     *            the first character of field
+     * @param len
+     *            the length of the character array being tokenized
+     * @param workArea
+     *            a temporary work area
+     * @param tokenList
+     *            the list of parsed tokens
+     * @param quoteStart
+     *            the start position of the matched quote, 0 if no quoting
+     * @param quoteLen
+     *            the length of the matched quote, 0 if no quoting
+     * @return the starting position of the next field (the character 
immediately after the delimiter, or if end of
+     *         string found, then the length of string
+     */
+    private int readWithQuotes(final char[] srcChars, final int start, final 
int len, final TextStringBuilder workArea,
+            final List<String> tokenList, final int quoteStart, final int 
quoteLen) {
+        // Loop until we've found the end of the quoted
+        // string or the end of the input
+        workArea.clear();
+        int pos = start;
+        boolean quoting = quoteLen > 0;
+        int trimStart = 0;
+
+        while (pos < len) {
+            // quoting mode can occur several times throughout a string
+            // we must switch between quoting and non-quoting until we
+            // encounter a non-quoted delimiter, or end of string
+            if (quoting) {
+                // In quoting mode
+
+                // If we've found a quote character, see if it's
+                // followed by a second quote. If so, then we need
+                // to actually put the quote character into the token
+                // rather than end the token.
+                if (isQuote(srcChars, pos, len, quoteStart, quoteLen)) {
+                    if (isQuote(srcChars, pos + quoteLen, len, quoteStart, 
quoteLen)) {
+                        // matched pair of quotes, thus an escaped quote
+                        workArea.append(srcChars, pos, quoteLen);
+                        pos += quoteLen * 2;
+                        trimStart = workArea.size();
+                        continue;
+                    }
+
+                    // end of quoting
+                    quoting = false;
+                    pos += quoteLen;
+                    continue;
+                }
+
+                // copy regular character from inside quotes
+                workArea.append(srcChars[pos++]);
+                trimStart = workArea.size();
+
+            } else {
+                // Not in quoting mode
+
+                // check for delimiter, and thus end of token
+                final int delimLen = getDelimiterMatcher().isMatch(srcChars, 
pos, start, len);
+                if (delimLen > 0) {
+                    // return condition when end of token found
+                    addToken(tokenList, workArea.substring(0, trimStart));
+                    return pos + delimLen;
+                }
+
+                // check for quote, and thus back into quoting mode
+                if (quoteLen > 0 && isQuote(srcChars, pos, len, quoteStart, 
quoteLen)) {
+                    quoting = true;
+                    pos += quoteLen;
+                    continue;
+                }
+
+                // check for ignored (outside quotes), and ignore
+                final int ignoredLen = getIgnoredMatcher().isMatch(srcChars, 
pos, start, len);
+                if (ignoredLen > 0) {
+                    pos += ignoredLen;
+                    continue;
+                }
+
+                // check for trimmed character
+                // don't yet know if its at the end, so copy to workArea
+                // use trimStart to keep track of trim at the end
+                final int trimmedLen = getTrimmerMatcher().isMatch(srcChars, 
pos, start, len);
+                if (trimmedLen > 0) {
+                    workArea.append(srcChars, pos, trimmedLen);
+                    pos += trimmedLen;
+                    continue;
+                }
+
+                // copy regular character from outside quotes
+                workArea.append(srcChars[pos++]);
+                trimStart = workArea.size();
+            }
+        }
+
+        // return condition when end of string found
+        addToken(tokenList, workArea.substring(0, trimStart));
+        return -1;
+    }
+
+    /**
+     * Checks if the characters at the index specified match the quote already 
matched in readNextToken().
+     *
+     * @param srcChars
+     *            the character array being tokenized
+     * @param pos
+     *            the position to check for a quote
+     * @param len
+     *            the length of the character array being tokenized
+     * @param quoteStart
+     *            the start position of the matched quote, 0 if no quoting
+     * @param quoteLen
+     *            the length of the matched quote, 0 if no quoting
+     * @return true if a quote is matched
+     */
+    private boolean isQuote(final char[] srcChars, final int pos, final int 
len, final int quoteStart,
+            final int quoteLen) {
+        for (int i = 0; i < quoteLen; i++) {
+            if (pos + i >= len || srcChars[pos + i] != srcChars[quoteStart + 
i]) {
+                return false;
+            }
+        }
+        return true;
+    }
+
+    // Delimiter
+    // -----------------------------------------------------------------------
+    /**
+     * Gets the field delimiter matcher.
+     *
+     * @return the delimiter matcher in use
+     */
+    public StringMatcher getDelimiterMatcher() {
+        return this.delimMatcher;
+    }
+
+    /**
+     * Sets the field delimiter matcher.
+     * <p>
+     * The delimiter is used to separate one token from another.
+     *
+     * @param delim
+     *            the delimiter matcher to use
+     * @return this, to enable chaining
+     */
+    public StringTokenizer setDelimiterMatcher(final StringMatcher delim) {
+        if (delim == null) {
+            this.delimMatcher = StringMatcherFactory.INSTANCE.noneMatcher();
+        } else {
+            this.delimMatcher = delim;
+        }
+        return this;
+    }
+
+    /**
+     * Sets the field delimiter character.
+     *
+     * @param delim
+     *            the delimiter character to use
+     * @return this, to enable chaining
+     */
+    public StringTokenizer setDelimiterChar(final char delim) {
+        return 
setDelimiterMatcher(StringMatcherFactory.INSTANCE.charMatcher(delim));
+    }
+
+    /**
+     * Sets the field delimiter string.
+     *
+     * @param delim
+     *            the delimiter string to use
+     * @return this, to enable chaining
+     */
+    public StringTokenizer setDelimiterString(final String delim) {
+        return 
setDelimiterMatcher(StringMatcherFactory.INSTANCE.stringMatcher(delim));
+    }
+
+    // Quote
+    // -----------------------------------------------------------------------
+    /**
+     * Gets the quote matcher currently in use.
+     * <p>
+     * The quote character is used to wrap data between the tokens. This 
enables delimiters to be entered as data. The
+     * default value is '"' (double quote).
+     *
+     * @return the quote matcher in use
+     */
+    public StringMatcher getQuoteMatcher() {
+        return quoteMatcher;
+    }
+
+    /**
+     * Set the quote matcher to use.
+     * <p>
+     * The quote character is used to wrap data between the tokens. This 
enables delimiters to be entered as data.
+     *
+     * @param quote
+     *            the quote matcher to use, null ignored
+     * @return this, to enable chaining
+     */
+    public StringTokenizer setQuoteMatcher(final StringMatcher quote) {
+        if (quote != null) {
+            this.quoteMatcher = quote;
+        }
+        return this;
+    }
+
+    /**
+     * Sets the quote character to use.
+     * <p>
+     * The quote character is used to wrap data between the tokens. This 
enables delimiters to be entered as data.
+     *
+     * @param quote
+     *            the quote character to use
+     * @return this, to enable chaining
+     */
+    public StringTokenizer setQuoteChar(final char quote) {
+        return 
setQuoteMatcher(StringMatcherFactory.INSTANCE.charMatcher(quote));
+    }
+
+    // Ignored
+    // -----------------------------------------------------------------------
+    /**
+     * Gets the ignored character matcher.
+     * <p>
+     * These characters are ignored when parsing the String, unless they are 
within a quoted region. The default value
+     * is not to ignore anything.
+     *
+     * @return the ignored matcher in use
+     */
+    public StringMatcher getIgnoredMatcher() {
+        return ignoredMatcher;
+    }
+
+    /**
+     * Set the matcher for characters to ignore.
+     * <p>
+     * These characters are ignored when parsing the String, unless they are 
within a quoted region.
+     *
+     * @param ignored
+     *            the ignored matcher to use, null ignored
+     * @return this, to enable chaining
+     */
+    public StringTokenizer setIgnoredMatcher(final StringMatcher ignored) {
+        if (ignored != null) {
+            this.ignoredMatcher = ignored;
+        }
+        return this;
+    }
+
+    /**
+     * Set the character to ignore.
+     * <p>
+     * This character is ignored when parsing the String, unless it is within 
a quoted region.
+     *
+     * @param ignored
+     *            the ignored character to use
+     * @return this, to enable chaining
+     */
+    public StringTokenizer setIgnoredChar(final char ignored) {
+        return 
setIgnoredMatcher(StringMatcherFactory.INSTANCE.charMatcher(ignored));
+    }
+
+    // Trimmer
+    // -----------------------------------------------------------------------
+    /**
+     * Gets the trimmer character matcher.
+     * <p>
+     * These characters are trimmed off on each side of the delimiter until 
the token or quote is found. The default
+     * value is not to trim anything.
+     *
+     * @return the trimmer matcher in use
+     */
+    public StringMatcher getTrimmerMatcher() {
+        return trimmerMatcher;
+    }
+
+    /**
+     * Sets the matcher for characters to trim.
+     * <p>
+     * These characters are trimmed off on each side of the delimiter until 
the token or quote is found.
+     *
+     * @param trimmer
+     *            the trimmer matcher to use, null ignored
+     * @return this, to enable chaining
+     */
+    public StringTokenizer setTrimmerMatcher(final StringMatcher trimmer) {
+        if (trimmer != null) {
+            this.trimmerMatcher = trimmer;
+        }
+        return this;
+    }
+
+    // -----------------------------------------------------------------------
+    /**
+     * Gets whether the tokenizer currently returns empty tokens as null. The 
default for this property is false.
+     *
+     * @return true if empty tokens are returned as null
+     */
+    public boolean isEmptyTokenAsNull() {
+        return this.emptyAsNull;
+    }
+
+    /**
+     * Sets whether the tokenizer should return empty tokens as null. The 
default for this property is false.
+     *
+     * @param emptyAsNull
+     *            whether empty tokens are returned as null
+     * @return this, to enable chaining
+     */
+    public StringTokenizer setEmptyTokenAsNull(final boolean emptyAsNull) {
+        this.emptyAsNull = emptyAsNull;
+        return this;
+    }
+
+    // -----------------------------------------------------------------------
+    /**
+     * Gets whether the tokenizer currently ignores empty tokens. The default 
for this property is true.
+     *
+     * @return true if empty tokens are not returned
+     */
+    public boolean isIgnoreEmptyTokens() {
+        return ignoreEmptyTokens;
+    }
+
+    /**
+     * Sets whether the tokenizer should ignore and not return empty tokens. 
The default for this property is true.
+     *
+     * @param ignoreEmptyTokens
+     *            whether empty tokens are not returned
+     * @return this, to enable chaining
+     */
+    public StringTokenizer setIgnoreEmptyTokens(final boolean 
ignoreEmptyTokens) {
+        this.ignoreEmptyTokens = ignoreEmptyTokens;
+        return this;
+    }
+
+    // -----------------------------------------------------------------------
+    /**
+     * Gets the String content that the tokenizer is parsing.
+     *
+     * @return the string content being parsed
+     */
+    public String getContent() {
+        if (chars == null) {
+            return null;
+        }
+        return new String(chars);
+    }
+
+    // -----------------------------------------------------------------------
+    /**
+     * Creates a new instance of this Tokenizer. The new instance is reset so 
that it will be at the start of the token
+     * list. If a {@link CloneNotSupportedException} is caught, return 
<code>null</code>.
+     *
+     * @return a new instance of this Tokenizer which has been reset.
+     */
+    @Override
+    public Object clone() {
+        try {
+            return cloneReset();
+        } catch (final CloneNotSupportedException ex) {
+            return null;
+        }
+    }
+
+    /**
+     * Creates a new instance of this Tokenizer. The new instance is reset so 
that it will be at the start of the token
+     * list.
+     *
+     * @return a new instance of this Tokenizer which has been reset.
+     * @throws CloneNotSupportedException
+     *             if there is a problem cloning
+     */
+    Object cloneReset() throws CloneNotSupportedException {
+        // this method exists to enable 100% test coverage
+        final StringTokenizer cloned = (StringTokenizer) super.clone();
+        if (cloned.chars != null) {
+            cloned.chars = cloned.chars.clone();
+        }
+        cloned.reset();
+        return cloned;
+    }
+
+    // -----------------------------------------------------------------------
+    /**
+     * Gets the String content that the tokenizer is parsing.
+     *
+     * @return the string content being parsed
+     */
+    @Override
+    public String toString() {
+        if (tokens == null) {
+            return "StringTokenizer[not tokenized yet]";
+        }
+        return "StringTokenizer" + getTokenList();
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/commons-text/blob/6d8b511f/src/main/java/org/apache/commons/text/TextStringBuilder.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/commons/text/TextStringBuilder.java 
b/src/main/java/org/apache/commons/text/TextStringBuilder.java
index 8ab9322..8943d03 100644
--- a/src/main/java/org/apache/commons/text/TextStringBuilder.java
+++ b/src/main/java/org/apache/commons/text/TextStringBuilder.java
@@ -2779,7 +2779,7 @@ public class TextStringBuilder implements CharSequence, 
Appendable, Serializable
      * <p>
      * The returned tokenizer is linked to this builder. You may intermix 
calls to the builder and tokenizer within
      * certain limits, however there is no synchronization. Once the tokenizer 
has been used once, it must be
-     * {@link StrTokenizer#reset() reset} to pickup the latest changes in the 
builder. For example:
+     * {@link StringTokenizer#reset() reset} to pickup the latest changes in 
the builder. For example:
      *
      * <pre>
      * StrBuilder b = new StrBuilder();
@@ -2795,13 +2795,13 @@ public class TextStringBuilder implements CharSequence, 
Appendable, Serializable
      * In addition to simply intermixing appends and tokenization, you can 
also call the set methods on the tokenizer to
      * alter how it tokenizes. Just remember to call reset when you want to 
pickup builder changes.
      * <p>
-     * Calling {@link StrTokenizer#reset(String)} or {@link 
StrTokenizer#reset(char[])} with a non-null value will break
-     * the link with the builder.
+     * Calling {@link StringTokenizer#reset(String)} or {@link 
StringTokenizer#reset(char[])} with a non-null value will
+     * break the link with the builder.
      *
      * @return a tokenizer that is linked to this builder
      */
-    public StrTokenizer asTokenizer() {
-        return new StrBuilderTokenizer();
+    public StringTokenizer asTokenizer() {
+        return new TextStringBuilderTokenizer();
     }
 
     // -----------------------------------------------------------------------
@@ -3038,12 +3038,12 @@ public class TextStringBuilder implements CharSequence, 
Appendable, Serializable
     /**
      * Inner class to allow StrBuilder to operate as a tokenizer.
      */
-    class StrBuilderTokenizer extends StrTokenizer {
+    class TextStringBuilderTokenizer extends StringTokenizer {
 
         /**
          * Default constructor.
          */
-        StrBuilderTokenizer() {
+        TextStringBuilderTokenizer() {
             super();
         }
 

http://git-wip-us.apache.org/repos/asf/commons-text/blob/6d8b511f/src/test/java/org/apache/commons/text/StrBuilderAppendInsertTest.java
----------------------------------------------------------------------
diff --git 
a/src/test/java/org/apache/commons/text/StrBuilderAppendInsertTest.java 
b/src/test/java/org/apache/commons/text/StrBuilderAppendInsertTest.java
index 3078808..db93410 100644
--- a/src/test/java/org/apache/commons/text/StrBuilderAppendInsertTest.java
+++ b/src/test/java/org/apache/commons/text/StrBuilderAppendInsertTest.java
@@ -30,7 +30,10 @@ import org.junit.Test;
 
 /**
  * Unit tests for {@link StrBuilder}.
+ *
+ * @deprecated This class will be removed in 2.0.
  */
+@Deprecated
 public class StrBuilderAppendInsertTest {
 
     /** The system line separator. */

http://git-wip-us.apache.org/repos/asf/commons-text/blob/6d8b511f/src/test/java/org/apache/commons/text/StrBuilderTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/commons/text/StrBuilderTest.java 
b/src/test/java/org/apache/commons/text/StrBuilderTest.java
index 1cc5752..528588a 100644
--- a/src/test/java/org/apache/commons/text/StrBuilderTest.java
+++ b/src/test/java/org/apache/commons/text/StrBuilderTest.java
@@ -40,7 +40,10 @@ import org.junit.Test;
 
 /**
  * Unit tests for {@link StrBuilder}.
+ *
+ * @deprecated This class will be removed in 2.0.
  */
+@Deprecated
 public class StrBuilderTest {
 
     // -----------------------------------------------------------------------

http://git-wip-us.apache.org/repos/asf/commons-text/blob/6d8b511f/src/test/java/org/apache/commons/text/StrLookupTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/commons/text/StrLookupTest.java 
b/src/test/java/org/apache/commons/text/StrLookupTest.java
index fce36d2..62330e7 100644
--- a/src/test/java/org/apache/commons/text/StrLookupTest.java
+++ b/src/test/java/org/apache/commons/text/StrLookupTest.java
@@ -29,7 +29,10 @@ import org.junit.Test;
 
 /**
  * Test class for {@link StrLookup}.
+ *
+ * @deprecated This class will be removed in 2.0.
  */
+@Deprecated
 public class StrLookupTest  {
 
     //-----------------------------------------------------------------------

http://git-wip-us.apache.org/repos/asf/commons-text/blob/6d8b511f/src/test/java/org/apache/commons/text/StrMatcherTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/commons/text/StrMatcherTest.java 
b/src/test/java/org/apache/commons/text/StrMatcherTest.java
index cac9670..22278f8 100644
--- a/src/test/java/org/apache/commons/text/StrMatcherTest.java
+++ b/src/test/java/org/apache/commons/text/StrMatcherTest.java
@@ -22,7 +22,10 @@ import org.junit.Test;
 
 /**
  * Unit tests for {@link StrMatcher}.
+ *
+ * @deprecated This class will be removed in 2.0.
  */
+@Deprecated
 public class StrMatcherTest  {
 
     private static final char[] BUFFER1 = "0,1\t2 
3\n\r\f\u0000'\"".toCharArray();

http://git-wip-us.apache.org/repos/asf/commons-text/blob/6d8b511f/src/test/java/org/apache/commons/text/StrSubstitutorTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/commons/text/StrSubstitutorTest.java 
b/src/test/java/org/apache/commons/text/StrSubstitutorTest.java
index cbd95c3..04f95bb 100644
--- a/src/test/java/org/apache/commons/text/StrSubstitutorTest.java
+++ b/src/test/java/org/apache/commons/text/StrSubstitutorTest.java
@@ -35,7 +35,10 @@ import org.junit.Test;
 
 /**
  * Test class for {@link StrSubstitutor}.
+ *
+ * @deprecated This class will be removed in 2.0.
  */
+@Deprecated
 public class StrSubstitutorTest {
 
     private Map<String, String> values;

http://git-wip-us.apache.org/repos/asf/commons-text/blob/6d8b511f/src/test/java/org/apache/commons/text/StrTokenizerTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/commons/text/StrTokenizerTest.java 
b/src/test/java/org/apache/commons/text/StrTokenizerTest.java
index 63a6ec0..35b9cd8 100644
--- a/src/test/java/org/apache/commons/text/StrTokenizerTest.java
+++ b/src/test/java/org/apache/commons/text/StrTokenizerTest.java
@@ -32,7 +32,10 @@ import org.junit.Test;
 
 /**
  * Unit test for {@link StrTokenizer}.
+ *
+ * @deprecated This class will be removed in 2.0.
  */
+@Deprecated
 public class StrTokenizerTest {
 
     private static final String CSV_SIMPLE_FIXTURE = "A,b,c";

http://git-wip-us.apache.org/repos/asf/commons-text/blob/6d8b511f/src/test/java/org/apache/commons/text/StringTokenizerTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/commons/text/StringTokenizerTest.java 
b/src/test/java/org/apache/commons/text/StringTokenizerTest.java
new file mode 100644
index 0000000..79a61db
--- /dev/null
+++ b/src/test/java/org/apache/commons/text/StringTokenizerTest.java
@@ -0,0 +1,962 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.commons.text;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertNull;
+import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.fail;
+
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.List;
+import java.util.NoSuchElementException;
+
+import org.apache.commons.text.matcher.StringMatcher;
+import org.apache.commons.text.matcher.StringMatcherFactory;
+import org.junit.Test;
+
+/**
+ * Unit test for {@link StringTokenizer}.
+ */
+public class StringTokenizerTest {
+
+    private static final String CSV_SIMPLE_FIXTURE = "A,b,c";
+
+    private static final String TSV_SIMPLE_FIXTURE = "A\tb\tc";
+
+    private void checkClone(final StringTokenizer tokenizer) {
+        assertFalse(StringTokenizer.getCSVInstance() == tokenizer);
+        assertFalse(StringTokenizer.getTSVInstance() == tokenizer);
+    }
+
+    // -----------------------------------------------------------------------
+    @Test
+    public void test1() {
+
+        final String input = "a;b;c;\"d;\"\"e\";f; ; ;  ";
+        final StringTokenizer tok = new StringTokenizer(input);
+        tok.setDelimiterChar(';');
+        tok.setQuoteChar('"');
+        tok.setIgnoredMatcher(StringMatcherFactory.INSTANCE.trimMatcher());
+        tok.setIgnoreEmptyTokens(false);
+        final String[] tokens = tok.getTokenArray();
+
+        final String[] expected = { "a", "b", "c", "d;\"e", "f", "", "", "" };
+
+        assertEquals(Arrays.toString(tokens), expected.length, tokens.length);
+        for (int i = 0; i < expected.length; i++) {
+            assertEquals("token[" + i + "] was '" + tokens[i] + "' but was 
expected to be '" + expected[i] + "'",
+                    expected[i], tokens[i]);
+        }
+
+    }
+
+    @Test
+    public void test2() {
+
+        final String input = "a;b;c ;\"d;\"\"e\";f; ; ;";
+        final StringTokenizer tok = new StringTokenizer(input);
+        tok.setDelimiterChar(';');
+        tok.setQuoteChar('"');
+        tok.setIgnoredMatcher(StringMatcherFactory.INSTANCE.noneMatcher());
+        tok.setIgnoreEmptyTokens(false);
+        final String[] tokens = tok.getTokenArray();
+
+        final String[] expected = { "a", "b", "c ", "d;\"e", "f", " ", " ", "" 
};
+
+        assertEquals(Arrays.toString(tokens), expected.length, tokens.length);
+        for (int i = 0; i < expected.length; i++) {
+            assertEquals("token[" + i + "] was '" + tokens[i] + "' but was 
expected to be '" + expected[i] + "'",
+                    expected[i], tokens[i]);
+        }
+
+    }
+
+    @Test
+    public void test3() {
+
+        final String input = "a;b; c;\"d;\"\"e\";f; ; ;";
+        final StringTokenizer tok = new StringTokenizer(input);
+        tok.setDelimiterChar(';');
+        tok.setQuoteChar('"');
+        tok.setIgnoredMatcher(StringMatcherFactory.INSTANCE.noneMatcher());
+        tok.setIgnoreEmptyTokens(false);
+        final String[] tokens = tok.getTokenArray();
+
+        final String[] expected = { "a", "b", " c", "d;\"e", "f", " ", " ", "" 
};
+
+        assertEquals(Arrays.toString(tokens), expected.length, tokens.length);
+        for (int i = 0; i < expected.length; i++) {
+            assertEquals("token[" + i + "] was '" + tokens[i] + "' but was 
expected to be '" + expected[i] + "'",
+                    expected[i], tokens[i]);
+        }
+
+    }
+
+    @Test
+    public void test4() {
+
+        final String input = "a;b; c;\"d;\"\"e\";f; ; ;";
+        final StringTokenizer tok = new StringTokenizer(input);
+        tok.setDelimiterChar(';');
+        tok.setQuoteChar('"');
+        tok.setIgnoredMatcher(StringMatcherFactory.INSTANCE.trimMatcher());
+        tok.setIgnoreEmptyTokens(true);
+        final String[] tokens = tok.getTokenArray();
+
+        final String[] expected = { "a", "b", "c", "d;\"e", "f" };
+
+        assertEquals(Arrays.toString(tokens), expected.length, tokens.length);
+        for (int i = 0; i < expected.length; i++) {
+            assertEquals("token[" + i + "] was '" + tokens[i] + "' but was 
expected to be '" + expected[i] + "'",
+                    expected[i], tokens[i]);
+        }
+
+    }
+
+    @Test
+    public void test5() {
+
+        final String input = "a;b; c;\"d;\"\"e\";f; ; ;";
+        final StringTokenizer tok = new StringTokenizer(input);
+        tok.setDelimiterChar(';');
+        tok.setQuoteChar('"');
+        tok.setIgnoredMatcher(StringMatcherFactory.INSTANCE.trimMatcher());
+        tok.setIgnoreEmptyTokens(false);
+        tok.setEmptyTokenAsNull(true);
+        final String[] tokens = tok.getTokenArray();
+
+        final String[] expected = { "a", "b", "c", "d;\"e", "f", null, null, 
null };
+
+        assertEquals(Arrays.toString(tokens), expected.length, tokens.length);
+        for (int i = 0; i < expected.length; i++) {
+            assertEquals("token[" + i + "] was '" + tokens[i] + "' but was 
expected to be '" + expected[i] + "'",
+                    expected[i], tokens[i]);
+        }
+
+    }
+
+    @Test
+    public void test6() {
+
+        final String input = "a;b; c;\"d;\"\"e\";f; ; ;";
+        final StringTokenizer tok = new StringTokenizer(input);
+        tok.setDelimiterChar(';');
+        tok.setQuoteChar('"');
+        tok.setIgnoredMatcher(StringMatcherFactory.INSTANCE.trimMatcher());
+        tok.setIgnoreEmptyTokens(false);
+        // tok.setTreatingEmptyAsNull(true);
+        final String[] tokens = tok.getTokenArray();
+
+        final String[] expected = { "a", "b", " c", "d;\"e", "f", null, null, 
null };
+
+        int nextCount = 0;
+        while (tok.hasNext()) {
+            tok.next();
+            nextCount++;
+        }
+
+        int prevCount = 0;
+        while (tok.hasPrevious()) {
+            tok.previous();
+            prevCount++;
+        }
+
+        assertEquals(Arrays.toString(tokens), expected.length, tokens.length);
+
+        assertTrue("could not cycle through entire token list" + " using the 
'hasNext' and 'next' methods",
+                nextCount == expected.length);
+
+        assertTrue("could not cycle through entire token list" + " using the 
'hasPrevious' and 'previous' methods",
+                prevCount == expected.length);
+
+    }
+
+    @Test
+    public void test7() {
+
+        final String input = "a   b c \"d e\" f ";
+        final StringTokenizer tok = new StringTokenizer(input);
+        tok.setDelimiterMatcher(StringMatcherFactory.INSTANCE.spaceMatcher());
+        
tok.setQuoteMatcher(StringMatcherFactory.INSTANCE.doubleQuoteMatcher());
+        tok.setIgnoredMatcher(StringMatcherFactory.INSTANCE.noneMatcher());
+        tok.setIgnoreEmptyTokens(false);
+        final String[] tokens = tok.getTokenArray();
+
+        final String[] expected = { "a", "", "", "b", "c", "d e", "f", "" };
+
+        assertEquals(Arrays.toString(tokens), expected.length, tokens.length);
+        for (int i = 0; i < expected.length; i++) {
+            assertEquals("token[" + i + "] was '" + tokens[i] + "' but was 
expected to be '" + expected[i] + "'",
+                    expected[i], tokens[i]);
+        }
+
+    }
+
+    @Test
+    public void test8() {
+
+        final String input = "a   b c \"d e\" f ";
+        final StringTokenizer tok = new StringTokenizer(input);
+        tok.setDelimiterMatcher(StringMatcherFactory.INSTANCE.spaceMatcher());
+        
tok.setQuoteMatcher(StringMatcherFactory.INSTANCE.doubleQuoteMatcher());
+        tok.setIgnoredMatcher(StringMatcherFactory.INSTANCE.noneMatcher());
+        tok.setIgnoreEmptyTokens(true);
+        final String[] tokens = tok.getTokenArray();
+
+        final String[] expected = { "a", "b", "c", "d e", "f" };
+
+        assertEquals(Arrays.toString(tokens), expected.length, tokens.length);
+        for (int i = 0; i < expected.length; i++) {
+            assertEquals("token[" + i + "] was '" + tokens[i] + "' but was 
expected to be '" + expected[i] + "'",
+                    expected[i], tokens[i]);
+        }
+
+    }
+
+    @Test
+    public void testBasic1() {
+        final String input = "a  b c";
+        final StringTokenizer tok = new StringTokenizer(input);
+        assertEquals("a", tok.next());
+        assertEquals("b", tok.next());
+        assertEquals("c", tok.next());
+        assertFalse(tok.hasNext());
+    }
+
+    @Test
+    public void testBasic2() {
+        final String input = "a \nb\fc";
+        final StringTokenizer tok = new StringTokenizer(input);
+        assertEquals("a", tok.next());
+        assertEquals("b", tok.next());
+        assertEquals("c", tok.next());
+        assertFalse(tok.hasNext());
+    }
+
+    @Test
+    public void testBasic3() {
+        final String input = "a \nb\u0001\fc";
+        final StringTokenizer tok = new StringTokenizer(input);
+        assertEquals("a", tok.next());
+        assertEquals("b\u0001", tok.next());
+        assertEquals("c", tok.next());
+        assertFalse(tok.hasNext());
+    }
+
+    @Test
+    public void testBasic4() {
+        final String input = "a \"b\" c";
+        final StringTokenizer tok = new StringTokenizer(input);
+        assertEquals("a", tok.next());
+        assertEquals("\"b\"", tok.next());
+        assertEquals("c", tok.next());
+        assertFalse(tok.hasNext());
+    }
+
+    @Test
+    public void testBasic5() {
+        final String input = "a:b':c";
+        final StringTokenizer tok = new StringTokenizer(input, ':', '\'');
+        assertEquals("a", tok.next());
+        assertEquals("b'", tok.next());
+        assertEquals("c", tok.next());
+        assertFalse(tok.hasNext());
+    }
+
+    @Test
+    public void testBasicDelim1() {
+        final String input = "a:b:c";
+        final StringTokenizer tok = new StringTokenizer(input, ':');
+        assertEquals("a", tok.next());
+        assertEquals("b", tok.next());
+        assertEquals("c", tok.next());
+        assertFalse(tok.hasNext());
+    }
+
+    @Test
+    public void testBasicDelim2() {
+        final String input = "a:b:c";
+        final StringTokenizer tok = new StringTokenizer(input, ',');
+        assertEquals("a:b:c", tok.next());
+        assertFalse(tok.hasNext());
+    }
+
+    @Test
+    public void testDelimString() {
+        final String input = "a##b##c";
+        final StringTokenizer tok = new StringTokenizer(input, "##");
+
+        assertEquals("a", tok.next());
+        assertEquals("b", tok.next());
+        assertEquals("c", tok.next());
+        assertFalse(tok.hasNext());
+    }
+
+    @Test
+    public void testDelimMatcher() {
+        final String input = "a/b\\c";
+        final StringMatcher delimMatcher = 
StringMatcherFactory.INSTANCE.charSetMatcher(new char[] { '/', '\\' });
+
+        final StringTokenizer tok = new StringTokenizer(input, delimMatcher);
+        assertEquals("a", tok.next());
+        assertEquals("b", tok.next());
+        assertEquals("c", tok.next());
+        assertFalse(tok.hasNext());
+    }
+
+    @Test
+    public void testDelimMatcherQuoteMatcher() {
+        final String input = "`a`;`b`;`c`";
+        final StringMatcher delimMatcher = 
StringMatcherFactory.INSTANCE.charSetMatcher(new char[] { ';' });
+        final StringMatcher quoteMatcher = 
StringMatcherFactory.INSTANCE.charSetMatcher(new char[] { '`' });
+
+        final StringTokenizer tok = new StringTokenizer(input, delimMatcher, 
quoteMatcher);
+        assertEquals("a", tok.next());
+        assertEquals("b", tok.next());
+        assertEquals("c", tok.next());
+        assertFalse(tok.hasNext());
+    }
+
+    @Test
+    public void testBasicEmpty1() {
+        final String input = "a  b c";
+        final StringTokenizer tok = new StringTokenizer(input);
+        tok.setIgnoreEmptyTokens(false);
+        assertEquals("a", tok.next());
+        assertEquals("", tok.next());
+        assertEquals("b", tok.next());
+        assertEquals("c", tok.next());
+        assertFalse(tok.hasNext());
+    }
+
+    @Test
+    public void testBasicEmpty2() {
+        final String input = "a  b c";
+        final StringTokenizer tok = new StringTokenizer(input);
+        tok.setIgnoreEmptyTokens(false);
+        tok.setEmptyTokenAsNull(true);
+        assertEquals("a", tok.next());
+        assertNull(tok.next());
+        assertEquals("b", tok.next());
+        assertEquals("c", tok.next());
+        assertFalse(tok.hasNext());
+    }
+
+    @Test
+    public void testBasicQuoted1() {
+        final String input = "a 'b' c";
+        final StringTokenizer tok = new StringTokenizer(input, ' ', '\'');
+        assertEquals("a", tok.next());
+        assertEquals("b", tok.next());
+        assertEquals("c", tok.next());
+        assertFalse(tok.hasNext());
+    }
+
+    @Test
+    public void testBasicQuoted2() {
+        final String input = "a:'b':";
+        final StringTokenizer tok = new StringTokenizer(input, ':', '\'');
+        tok.setIgnoreEmptyTokens(false);
+        tok.setEmptyTokenAsNull(true);
+        assertEquals("a", tok.next());
+        assertEquals("b", tok.next());
+        assertNull(tok.next());
+        assertFalse(tok.hasNext());
+    }
+
+    @Test
+    public void testBasicQuoted3() {
+        final String input = "a:'b''c'";
+        final StringTokenizer tok = new StringTokenizer(input, ':', '\'');
+        tok.setIgnoreEmptyTokens(false);
+        tok.setEmptyTokenAsNull(true);
+        assertEquals("a", tok.next());
+        assertEquals("b'c", tok.next());
+        assertFalse(tok.hasNext());
+    }
+
+    @Test
+    public void testBasicQuoted4() {
+        final String input = "a: 'b' 'c' :d";
+        final StringTokenizer tok = new StringTokenizer(input, ':', '\'');
+        tok.setTrimmerMatcher(StringMatcherFactory.INSTANCE.trimMatcher());
+        tok.setIgnoreEmptyTokens(false);
+        tok.setEmptyTokenAsNull(true);
+        assertEquals("a", tok.next());
+        assertEquals("b c", tok.next());
+        assertEquals("d", tok.next());
+        assertFalse(tok.hasNext());
+    }
+
+    @Test
+    public void testBasicQuoted5() {
+        final String input = "a: 'b'x'c' :d";
+        final StringTokenizer tok = new StringTokenizer(input, ':', '\'');
+        tok.setTrimmerMatcher(StringMatcherFactory.INSTANCE.trimMatcher());
+        tok.setIgnoreEmptyTokens(false);
+        tok.setEmptyTokenAsNull(true);
+        assertEquals("a", tok.next());
+        assertEquals("bxc", tok.next());
+        assertEquals("d", tok.next());
+        assertFalse(tok.hasNext());
+    }
+
+    @Test
+    public void testBasicQuoted6() {
+        final String input = "a:'b'\"c':d";
+        final StringTokenizer tok = new StringTokenizer(input, ':');
+        tok.setQuoteMatcher(StringMatcherFactory.INSTANCE.quoteMatcher());
+        assertEquals("a", tok.next());
+        assertEquals("b\"c:d", tok.next());
+        assertFalse(tok.hasNext());
+    }
+
+    @Test
+    public void testBasicQuoted7() {
+        final String input = "a:\"There's a reason here\":b";
+        final StringTokenizer tok = new StringTokenizer(input, ':');
+        tok.setQuoteMatcher(StringMatcherFactory.INSTANCE.quoteMatcher());
+        assertEquals("a", tok.next());
+        assertEquals("There's a reason here", tok.next());
+        assertEquals("b", tok.next());
+        assertFalse(tok.hasNext());
+    }
+
+    @Test
+    public void testBasicQuotedTrimmed1() {
+        final String input = "a: 'b' :";
+        final StringTokenizer tok = new StringTokenizer(input, ':', '\'');
+        tok.setTrimmerMatcher(StringMatcherFactory.INSTANCE.trimMatcher());
+        tok.setIgnoreEmptyTokens(false);
+        tok.setEmptyTokenAsNull(true);
+        assertEquals("a", tok.next());
+        assertEquals("b", tok.next());
+        assertNull(tok.next());
+        assertFalse(tok.hasNext());
+    }
+
+    @Test
+    public void testBasicTrimmed1() {
+        final String input = "a: b :  ";
+        final StringTokenizer tok = new StringTokenizer(input, ':');
+        tok.setTrimmerMatcher(StringMatcherFactory.INSTANCE.trimMatcher());
+        tok.setIgnoreEmptyTokens(false);
+        tok.setEmptyTokenAsNull(true);
+        assertEquals("a", tok.next());
+        assertEquals("b", tok.next());
+        assertNull(tok.next());
+        assertFalse(tok.hasNext());
+    }
+
+    @Test
+    public void testBasicTrimmed2() {
+        final String input = "a:  b  :";
+        final StringTokenizer tok = new StringTokenizer(input, ':');
+        tok.setTrimmerMatcher(StringMatcherFactory.INSTANCE.stringMatcher("  
"));
+        tok.setIgnoreEmptyTokens(false);
+        tok.setEmptyTokenAsNull(true);
+        assertEquals("a", tok.next());
+        assertEquals("b", tok.next());
+        assertNull(tok.next());
+        assertFalse(tok.hasNext());
+    }
+
+    @Test
+    public void testBasicIgnoreTrimmed1() {
+        final String input = "a: bIGNOREc : ";
+        final StringTokenizer tok = new StringTokenizer(input, ':');
+        
tok.setIgnoredMatcher(StringMatcherFactory.INSTANCE.stringMatcher("IGNORE"));
+        tok.setTrimmerMatcher(StringMatcherFactory.INSTANCE.trimMatcher());
+        tok.setIgnoreEmptyTokens(false);
+        tok.setEmptyTokenAsNull(true);
+        assertEquals("a", tok.next());
+        assertEquals("bc", tok.next());
+        assertNull(tok.next());
+        assertFalse(tok.hasNext());
+    }
+
+    @Test
+    public void testBasicIgnoreTrimmed2() {
+        final String input = "IGNOREaIGNORE: IGNORE bIGNOREc IGNORE : IGNORE ";
+        final StringTokenizer tok = new StringTokenizer(input, ':');
+        
tok.setIgnoredMatcher(StringMatcherFactory.INSTANCE.stringMatcher("IGNORE"));
+        tok.setTrimmerMatcher(StringMatcherFactory.INSTANCE.trimMatcher());
+        tok.setIgnoreEmptyTokens(false);
+        tok.setEmptyTokenAsNull(true);
+        assertEquals("a", tok.next());
+        assertEquals("bc", tok.next());
+        assertNull(tok.next());
+        assertFalse(tok.hasNext());
+    }
+
+    @Test
+    public void testBasicIgnoreTrimmed3() {
+        final String input = "IGNOREaIGNORE: IGNORE bIGNOREc IGNORE : IGNORE ";
+        final StringTokenizer tok = new StringTokenizer(input, ':');
+        
tok.setIgnoredMatcher(StringMatcherFactory.INSTANCE.stringMatcher("IGNORE"));
+        tok.setIgnoreEmptyTokens(false);
+        tok.setEmptyTokenAsNull(true);
+        assertEquals("a", tok.next());
+        assertEquals("  bc  ", tok.next());
+        assertEquals("  ", tok.next());
+        assertFalse(tok.hasNext());
+    }
+
+    @Test
+    public void testBasicIgnoreTrimmed4() {
+        final String input = "IGNOREaIGNORE: IGNORE 'bIGNOREc'IGNORE'd' IGNORE 
: IGNORE ";
+        final StringTokenizer tok = new StringTokenizer(input, ':', '\'');
+        
tok.setIgnoredMatcher(StringMatcherFactory.INSTANCE.stringMatcher("IGNORE"));
+        tok.setTrimmerMatcher(StringMatcherFactory.INSTANCE.trimMatcher());
+        tok.setIgnoreEmptyTokens(false);
+        tok.setEmptyTokenAsNull(true);
+        assertEquals("a", tok.next());
+        assertEquals("bIGNOREcd", tok.next());
+        assertNull(tok.next());
+        assertFalse(tok.hasNext());
+    }
+
+    // -----------------------------------------------------------------------
+    @Test
+    public void testListArray() {
+        final String input = "a  b c";
+        final StringTokenizer tok = new StringTokenizer(input);
+        final String[] array = tok.getTokenArray();
+        final List<?> list = tok.getTokenList();
+
+        assertEquals(Arrays.asList(array), list);
+        assertEquals(3, list.size());
+    }
+
+    // -----------------------------------------------------------------------
+    private void testCSV(final String data) {
+        this.testXSVAbc(StringTokenizer.getCSVInstance(data));
+        this.testXSVAbc(StringTokenizer.getCSVInstance(data.toCharArray()));
+    }
+
+    @Test
+    public void testCSVEmpty() {
+        this.testEmpty(StringTokenizer.getCSVInstance());
+        this.testEmpty(StringTokenizer.getCSVInstance(""));
+    }
+
+    @Test
+    public void testCSVSimple() {
+        this.testCSV(CSV_SIMPLE_FIXTURE);
+    }
+
+    @Test
+    public void testCSVSimpleNeedsTrim() {
+        this.testCSV("   " + CSV_SIMPLE_FIXTURE);
+        this.testCSV("   \n\t  " + CSV_SIMPLE_FIXTURE);
+        this.testCSV("   \n  " + CSV_SIMPLE_FIXTURE + "\n\n\r");
+    }
+
+    void testEmpty(final StringTokenizer tokenizer) {
+        this.checkClone(tokenizer);
+        assertFalse(tokenizer.hasNext());
+        assertFalse(tokenizer.hasPrevious());
+        assertNull(tokenizer.nextToken());
+        assertEquals(0, tokenizer.size());
+        try {
+            tokenizer.next();
+            fail();
+        } catch (final NoSuchElementException ex) {
+        }
+    }
+
+    @Test
+    public void testGetContent() {
+        final String input = "a   b c \"d e\" f ";
+        StringTokenizer tok = new StringTokenizer(input);
+        assertEquals(input, tok.getContent());
+
+        tok = new StringTokenizer(input.toCharArray());
+        assertEquals(input, tok.getContent());
+
+        tok = new StringTokenizer();
+        assertNull(tok.getContent());
+    }
+
+    // -----------------------------------------------------------------------
+    @Test
+    public void testChaining() {
+        final StringTokenizer tok = new StringTokenizer();
+        assertEquals(tok, tok.reset());
+        assertEquals(tok, tok.reset(""));
+        assertEquals(tok, tok.reset(new char[0]));
+        assertEquals(tok, tok.setDelimiterChar(' '));
+        assertEquals(tok, tok.setDelimiterString(" "));
+        assertEquals(tok, tok.setDelimiterMatcher(null));
+        assertEquals(tok, tok.setQuoteChar(' '));
+        assertEquals(tok, tok.setQuoteMatcher(null));
+        assertEquals(tok, tok.setIgnoredChar(' '));
+        assertEquals(tok, tok.setIgnoredMatcher(null));
+        assertEquals(tok, tok.setTrimmerMatcher(null));
+        assertEquals(tok, tok.setEmptyTokenAsNull(false));
+        assertEquals(tok, tok.setIgnoreEmptyTokens(false));
+    }
+
+    /**
+     * Tests that the {@link StringTokenizer#clone()} clone method catches 
{@link CloneNotSupportedException} and
+     * returns <code>null</code>.
+     */
+    @Test
+    public void testCloneNotSupportedException() {
+        final Object notCloned = new StringTokenizer() {
+
+            @Override
+            Object cloneReset() throws CloneNotSupportedException {
+                throw new CloneNotSupportedException("test");
+            }
+        }.clone();
+        assertNull(notCloned);
+    }
+
+    @Test
+    public void testCloneNull() {
+        final StringTokenizer tokenizer = new StringTokenizer((char[]) null);
+        // Start sanity check
+        assertNull(tokenizer.nextToken());
+        tokenizer.reset();
+        assertNull(tokenizer.nextToken());
+        // End sanity check
+        final StringTokenizer clonedTokenizer = (StringTokenizer) 
tokenizer.clone();
+        tokenizer.reset();
+        assertNull(tokenizer.nextToken());
+        assertNull(clonedTokenizer.nextToken());
+    }
+
+    @Test
+    public void testCloneReset() {
+        final char[] input = new char[] { 'a' };
+        final StringTokenizer tokenizer = new StringTokenizer(input);
+        // Start sanity check
+        assertEquals("a", tokenizer.nextToken());
+        tokenizer.reset(input);
+        assertEquals("a", tokenizer.nextToken());
+        // End sanity check
+        final StringTokenizer clonedTokenizer = (StringTokenizer) 
tokenizer.clone();
+        input[0] = 'b';
+        tokenizer.reset(input);
+        assertEquals("b", tokenizer.nextToken());
+        assertEquals("a", clonedTokenizer.nextToken());
+    }
+
+    // -----------------------------------------------------------------------
+    @Test
+    public void testConstructor_String() {
+        StringTokenizer tok = new StringTokenizer("a b");
+        assertEquals("a", tok.next());
+        assertEquals("b", tok.next());
+        assertFalse(tok.hasNext());
+
+        tok = new StringTokenizer("");
+        assertFalse(tok.hasNext());
+
+        tok = new StringTokenizer((String) null);
+        assertFalse(tok.hasNext());
+    }
+
+    // -----------------------------------------------------------------------
+    @Test
+    public void testConstructor_String_char() {
+        StringTokenizer tok = new StringTokenizer("a b", ' ');
+        assertEquals(1, tok.getDelimiterMatcher().isMatch(" ".toCharArray(), 
0, 0, 1));
+        assertEquals("a", tok.next());
+        assertEquals("b", tok.next());
+        assertFalse(tok.hasNext());
+
+        tok = new StringTokenizer("", ' ');
+        assertFalse(tok.hasNext());
+
+        tok = new StringTokenizer((String) null, ' ');
+        assertFalse(tok.hasNext());
+    }
+
+    // -----------------------------------------------------------------------
+    @Test
+    public void testConstructor_String_char_char() {
+        StringTokenizer tok = new StringTokenizer("a b", ' ', '"');
+        assertEquals(1, tok.getDelimiterMatcher().isMatch(" ".toCharArray(), 
0, 0, 1));
+        assertEquals(1, tok.getQuoteMatcher().isMatch("\"".toCharArray(), 0, 
0, 1));
+        assertEquals("a", tok.next());
+        assertEquals("b", tok.next());
+        assertFalse(tok.hasNext());
+
+        tok = new StringTokenizer("", ' ', '"');
+        assertFalse(tok.hasNext());
+
+        tok = new StringTokenizer((String) null, ' ', '"');
+        assertFalse(tok.hasNext());
+    }
+
+    // -----------------------------------------------------------------------
+    @Test
+    public void testConstructor_charArray() {
+        StringTokenizer tok = new StringTokenizer("a b".toCharArray());
+        assertEquals("a", tok.next());
+        assertEquals("b", tok.next());
+        assertFalse(tok.hasNext());
+
+        tok = new StringTokenizer(new char[0]);
+        assertFalse(tok.hasNext());
+
+        tok = new StringTokenizer((char[]) null);
+        assertFalse(tok.hasNext());
+    }
+
+    // -----------------------------------------------------------------------
+    @Test
+    public void testConstructor_charArray_char() {
+        StringTokenizer tok = new StringTokenizer("a b".toCharArray(), ' ');
+        assertEquals(1, tok.getDelimiterMatcher().isMatch(" ".toCharArray(), 
0, 0, 1));
+        assertEquals("a", tok.next());
+        assertEquals("b", tok.next());
+        assertFalse(tok.hasNext());
+
+        tok = new StringTokenizer(new char[0], ' ');
+        assertFalse(tok.hasNext());
+
+        tok = new StringTokenizer((char[]) null, ' ');
+        assertFalse(tok.hasNext());
+    }
+
+    // -----------------------------------------------------------------------
+    @Test
+    public void testConstructor_charArray_char_char() {
+        StringTokenizer tok = new StringTokenizer("a b".toCharArray(), ' ', 
'"');
+        assertEquals(1, tok.getDelimiterMatcher().isMatch(" ".toCharArray(), 
0, 0, 1));
+        assertEquals(1, tok.getQuoteMatcher().isMatch("\"".toCharArray(), 0, 
0, 1));
+        assertEquals("a", tok.next());
+        assertEquals("b", tok.next());
+        assertFalse(tok.hasNext());
+
+        tok = new StringTokenizer(new char[0], ' ', '"');
+        assertFalse(tok.hasNext());
+
+        tok = new StringTokenizer((char[]) null, ' ', '"');
+        assertFalse(tok.hasNext());
+    }
+
+    // -----------------------------------------------------------------------
+    @Test
+    public void testReset() {
+        final StringTokenizer tok = new StringTokenizer("a b c");
+        assertEquals("a", tok.next());
+        assertEquals("b", tok.next());
+        assertEquals("c", tok.next());
+        assertFalse(tok.hasNext());
+
+        tok.reset();
+        assertEquals("a", tok.next());
+        assertEquals("b", tok.next());
+        assertEquals("c", tok.next());
+        assertFalse(tok.hasNext());
+    }
+
+    // -----------------------------------------------------------------------
+    @Test
+    public void testReset_String() {
+        final StringTokenizer tok = new StringTokenizer("x x x");
+        tok.reset("d e");
+        assertEquals("d", tok.next());
+        assertEquals("e", tok.next());
+        assertFalse(tok.hasNext());
+
+        tok.reset((String) null);
+        assertFalse(tok.hasNext());
+    }
+
+    // -----------------------------------------------------------------------
+    @Test
+    public void testReset_charArray() {
+        final StringTokenizer tok = new StringTokenizer("x x x");
+
+        final char[] array = new char[] { 'a', 'b', 'c' };
+        tok.reset(array);
+        assertEquals("abc", tok.next());
+        assertFalse(tok.hasNext());
+
+        tok.reset((char[]) null);
+        assertFalse(tok.hasNext());
+    }
+
+    // -----------------------------------------------------------------------
+    @Test
+    public void testTSV() {
+        this.testXSVAbc(StringTokenizer.getTSVInstance(TSV_SIMPLE_FIXTURE));
+        
this.testXSVAbc(StringTokenizer.getTSVInstance(TSV_SIMPLE_FIXTURE.toCharArray()));
+    }
+
+    @Test
+    public void testTSVEmpty() {
+        this.testEmpty(StringTokenizer.getTSVInstance());
+        this.testEmpty(StringTokenizer.getTSVInstance(""));
+    }
+
+    void testXSVAbc(final StringTokenizer tokenizer) {
+        this.checkClone(tokenizer);
+        assertEquals(-1, tokenizer.previousIndex());
+        assertEquals(0, tokenizer.nextIndex());
+        assertNull(tokenizer.previousToken());
+        assertEquals("A", tokenizer.nextToken());
+        assertEquals(1, tokenizer.nextIndex());
+        assertEquals("b", tokenizer.nextToken());
+        assertEquals(2, tokenizer.nextIndex());
+        assertEquals("c", tokenizer.nextToken());
+        assertEquals(3, tokenizer.nextIndex());
+        assertNull(tokenizer.nextToken());
+        assertEquals(3, tokenizer.nextIndex());
+        assertEquals("c", tokenizer.previousToken());
+        assertEquals(2, tokenizer.nextIndex());
+        assertEquals("b", tokenizer.previousToken());
+        assertEquals(1, tokenizer.nextIndex());
+        assertEquals("A", tokenizer.previousToken());
+        assertEquals(0, tokenizer.nextIndex());
+        assertNull(tokenizer.previousToken());
+        assertEquals(0, tokenizer.nextIndex());
+        assertEquals(-1, tokenizer.previousIndex());
+        assertEquals(3, tokenizer.size());
+    }
+
+    @Test
+    public void testIteration() {
+        final StringTokenizer tkn = new StringTokenizer("a b c");
+        assertFalse(tkn.hasPrevious());
+        try {
+            tkn.previous();
+            fail();
+        } catch (final NoSuchElementException ex) {
+        }
+        assertTrue(tkn.hasNext());
+
+        assertEquals("a", tkn.next());
+        try {
+            tkn.remove();
+            fail();
+        } catch (final UnsupportedOperationException ex) {
+        }
+        try {
+            tkn.set("x");
+            fail();
+        } catch (final UnsupportedOperationException ex) {
+        }
+        try {
+            tkn.add("y");
+            fail();
+        } catch (final UnsupportedOperationException ex) {
+        }
+        assertTrue(tkn.hasPrevious());
+        assertTrue(tkn.hasNext());
+
+        assertEquals("b", tkn.next());
+        assertTrue(tkn.hasPrevious());
+        assertTrue(tkn.hasNext());
+
+        assertEquals("c", tkn.next());
+        assertTrue(tkn.hasPrevious());
+        assertFalse(tkn.hasNext());
+
+        try {
+            tkn.next();
+            fail();
+        } catch (final NoSuchElementException ex) {
+        }
+        assertTrue(tkn.hasPrevious());
+        assertFalse(tkn.hasNext());
+    }
+
+    // -----------------------------------------------------------------------
+    @Test
+    public void testTokenizeSubclassInputChange() {
+        final StringTokenizer tkn = new StringTokenizer("a b c d e") {
+
+            @Override
+            protected List<String> tokenize(final char[] chars, final int 
offset, final int count) {
+                return super.tokenize("w x y z".toCharArray(), 2, 5);
+            }
+        };
+        assertEquals("x", tkn.next());
+        assertEquals("y", tkn.next());
+    }
+
+    // -----------------------------------------------------------------------
+    @Test
+    public void testTokenizeSubclassOutputChange() {
+        final StringTokenizer tkn = new StringTokenizer("a b c") {
+
+            @Override
+            protected List<String> tokenize(final char[] chars, final int 
offset, final int count) {
+                final List<String> list = super.tokenize(chars, offset, count);
+                Collections.reverse(list);
+                return list;
+            }
+        };
+        assertEquals("c", tkn.next());
+        assertEquals("b", tkn.next());
+        assertEquals("a", tkn.next());
+    }
+
+    // -----------------------------------------------------------------------
+    @Test
+    public void testToString() {
+        final StringTokenizer tkn = new StringTokenizer("a b c d e");
+        assertEquals("StringTokenizer[not tokenized yet]", tkn.toString());
+        tkn.next();
+        assertEquals("StringTokenizer[a, b, c, d, e]", tkn.toString());
+    }
+
+    // -----------------------------------------------------------------------
+    @Test
+    public void testStringTokenizerStringMatcher() {
+        final char[] chars = { 'a', 'b', 'c', 'd' };
+        final StringTokenizer tokens = new StringTokenizer(chars, "bc");
+        assertEquals("a", tokens.next());
+        assertEquals("d", tokens.next());
+    }
+
+    // -----------------------------------------------------------------------
+    @Test
+    public void testStringTokenizerStrMatcher() {
+        final char[] chars = { 'a', ',', 'c' };
+        final StringTokenizer tokens = new StringTokenizer(chars, 
StringMatcherFactory.INSTANCE.commaMatcher());
+        assertEquals("a", tokens.next());
+        assertEquals("c", tokens.next());
+    }
+
+    // -----------------------------------------------------------------------
+    @Test
+    public void testStringTokenizerQuoteMatcher() {
+        final char[] chars = { '\'', 'a', 'c', '\'', 'd' };
+        final StringTokenizer tokens = new StringTokenizer(chars, 
StringMatcherFactory.INSTANCE.commaMatcher(),
+                StringMatcherFactory.INSTANCE.quoteMatcher());
+        assertEquals("acd", tokens.next());
+    }
+
+    @Test
+    public void testPreviousTokenAndSetEmptyTokenAsNull() {
+        final StringTokenizer strTokenizer = StringTokenizer.getTSVInstance(" 
\t\n\r\f");
+        strTokenizer.setEmptyTokenAsNull(true);
+
+        assertNull(strTokenizer.previousToken());
+    }
+}

http://git-wip-us.apache.org/repos/asf/commons-text/blob/6d8b511f/src/test/java/org/apache/commons/text/TextStringBuilderTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/commons/text/TextStringBuilderTest.java 
b/src/test/java/org/apache/commons/text/TextStringBuilderTest.java
index 88d3a50..dec5d02 100644
--- a/src/test/java/org/apache/commons/text/TextStringBuilderTest.java
+++ b/src/test/java/org/apache/commons/text/TextStringBuilderTest.java
@@ -1691,7 +1691,7 @@ public class TextStringBuilderTest {
         // from Javadoc
         final TextStringBuilder b = new TextStringBuilder();
         b.append("a b ");
-        final StrTokenizer t = b.asTokenizer();
+        final StringTokenizer t = b.asTokenizer();
 
         final String[] tokens1 = t.getTokenArray();
         assertEquals(2, tokens1.length);

[text] [TEXT-116] Add a StrTokenizer replacement based on the StringMatcher interface: StringTokenizer.

Reply via email to