This is an automated email from the ASF dual-hosted git repository.

ggregory pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/commons-text.git

commit ea61a39ddc6e77396ab96d03e08885c4c162cb54
Author: Gary Gregory <[email protected]>
AuthorDate: Thu May 18 09:35:36 2023 -0400

    Rework some org.apache.commons.text.similarity internals to use JRE's
    Function
---
 .../{Tokenizer.java => CharSequenceTokenizer.java}     | 18 ++----------------
 .../apache/commons/text/similarity/CosineDistance.java |  4 ++--
 .../apache/commons/text/similarity/RegexTokenizer.java |  4 ++--
 .../org/apache/commons/text/similarity/Tokenizer.java  | 18 +++++++-----------
 4 files changed, 13 insertions(+), 31 deletions(-)

diff --git a/src/main/java/org/apache/commons/text/similarity/Tokenizer.java 
b/src/main/java/org/apache/commons/text/similarity/CharSequenceTokenizer.java
similarity index 74%
copy from src/main/java/org/apache/commons/text/similarity/Tokenizer.java
copy to 
src/main/java/org/apache/commons/text/similarity/CharSequenceTokenizer.java
index fa8fda46..3c85c382 100644
--- a/src/main/java/org/apache/commons/text/similarity/Tokenizer.java
+++ 
b/src/main/java/org/apache/commons/text/similarity/CharSequenceTokenizer.java
@@ -16,20 +16,6 @@
  */
 package org.apache.commons.text.similarity;
 
-/**
- * A tokenizer. Can produce arrays of tokens from a given type.
- *
- * @param <T> given type
- * @since 1.0
- */
-interface Tokenizer<T> {
-
-    /**
-     * Returns an array of tokens.
-     *
-     * @param text input text
-     * @return array of tokens
-     */
-    T[] tokenize(CharSequence text);
-
+interface CharSequenceTokenizer<T> extends Tokenizer<T, CharSequence> {
+    // empty
 }
diff --git 
a/src/main/java/org/apache/commons/text/similarity/CosineDistance.java 
b/src/main/java/org/apache/commons/text/similarity/CosineDistance.java
index 92c71f1c..807d446e 100644
--- a/src/main/java/org/apache/commons/text/similarity/CosineDistance.java
+++ b/src/main/java/org/apache/commons/text/similarity/CosineDistance.java
@@ -37,8 +37,8 @@ public class CosineDistance implements EditDistance<Double> {
 
     @Override
     public Double apply(final CharSequence left, final CharSequence right) {
-        final CharSequence[] leftTokens = 
RegexTokenizer.INSTANCE.tokenize(left);
-        final CharSequence[] rightTokens = 
RegexTokenizer.INSTANCE.tokenize(right);
+        final CharSequence[] leftTokens = RegexTokenizer.INSTANCE.apply(left);
+        final CharSequence[] rightTokens = 
RegexTokenizer.INSTANCE.apply(right);
 
         final Map<CharSequence, Integer> leftVector = Counter.of(leftTokens);
         final Map<CharSequence, Integer> rightVector = Counter.of(rightTokens);
diff --git 
a/src/main/java/org/apache/commons/text/similarity/RegexTokenizer.java 
b/src/main/java/org/apache/commons/text/similarity/RegexTokenizer.java
index be64f849..c23c43e4 100644
--- a/src/main/java/org/apache/commons/text/similarity/RegexTokenizer.java
+++ b/src/main/java/org/apache/commons/text/similarity/RegexTokenizer.java
@@ -34,7 +34,7 @@ import org.apache.commons.lang3.Validate;
  *
  * @since 1.0
  */
-final class RegexTokenizer implements Tokenizer<CharSequence> {
+final class RegexTokenizer implements CharSequenceTokenizer<CharSequence> {
 
     /** The whitespace pattern. */
     private static final Pattern PATTERN = Pattern.compile("(\\w)+");
@@ -50,7 +50,7 @@ final class RegexTokenizer implements Tokenizer<CharSequence> 
{
      * @throws IllegalArgumentException if the input text is blank
      */
     @Override
-    public CharSequence[] tokenize(final CharSequence text) {
+    public CharSequence[] apply(final CharSequence text) {
         Validate.isTrue(StringUtils.isNotBlank(text), "Invalid text");
         final Matcher matcher = PATTERN.matcher(text);
         final List<String> tokens = new ArrayList<>();
diff --git a/src/main/java/org/apache/commons/text/similarity/Tokenizer.java 
b/src/main/java/org/apache/commons/text/similarity/Tokenizer.java
index fa8fda46..724e1966 100644
--- a/src/main/java/org/apache/commons/text/similarity/Tokenizer.java
+++ b/src/main/java/org/apache/commons/text/similarity/Tokenizer.java
@@ -14,22 +14,18 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
+
 package org.apache.commons.text.similarity;
 
+import java.util.function.Function;
+
 /**
  * A tokenizer. Can produce arrays of tokens from a given type.
  *
- * @param <T> given type
+ * @param <T> The type to tokenize.
+ * @param <R> The return array element type.
  * @since 1.0
  */
-interface Tokenizer<T> {
-
-    /**
-     * Returns an array of tokens.
-     *
-     * @param text input text
-     * @return array of tokens
-     */
-    T[] tokenize(CharSequence text);
-
+interface Tokenizer<T, R> extends Function<T, R[]> {
+    // empty
 }

Reply via email to