Repository: commons-text
Updated Branches:
  refs/heads/NEW-METRICS ff1959c84 -> 81f679dea


Use tokenizers in internal package, and add more tests


Project: http://git-wip-us.apache.org/repos/asf/commons-text/repo
Commit: http://git-wip-us.apache.org/repos/asf/commons-text/commit/81f679de
Tree: http://git-wip-us.apache.org/repos/asf/commons-text/tree/81f679de
Diff: http://git-wip-us.apache.org/repos/asf/commons-text/diff/81f679de

Branch: refs/heads/NEW-METRICS
Commit: 81f679dea1eb4c1d90d7173547ff98f7d14110a4
Parents: ff1959c
Author: Bruno P. Kinoshita <[email protected]>
Authored: Fri Mar 27 03:25:51 2015 -0300
Committer: Bruno P. Kinoshita <[email protected]>
Committed: Fri Mar 27 03:25:51 2015 -0300

----------------------------------------------------------------------
 .../commons/text/similarity/CosineDistance.java | 28 ++++++-
 .../text/similarity/CosineSimilarity.java       | 77 ++++++++++++++------
 .../similarity/internal/CharacterTokenizer.java | 43 +++++++++++
 .../text/similarity/internal/Counter.java       | 44 +++++++++++
 .../similarity/internal/SimpleTokenizer.java    | 50 +++++++++++++
 .../similarity/internal/StringTokenizer.java    | 36 +++++++++
 .../text/similarity/internal/Tokenizer.java     | 36 +++++++++
 .../internal/WhiteSpaceTokenizer.java           | 46 ++++++++++++
 .../text/similarity/internal/package-info.java  | 21 ++++++
 .../text/similarity/CosineDistanceTest.java     | 55 ++++++++++++++
 .../text/similarity/CosineSimilarityTest.java   | 51 -------------
 11 files changed, 414 insertions(+), 73 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/commons-text/blob/81f679de/src/main/java/org/apache/commons/text/similarity/CosineDistance.java
----------------------------------------------------------------------
diff --git 
a/src/main/java/org/apache/commons/text/similarity/CosineDistance.java 
b/src/main/java/org/apache/commons/text/similarity/CosineDistance.java
index ad19745..54970e4 100644
--- a/src/main/java/org/apache/commons/text/similarity/CosineDistance.java
+++ b/src/main/java/org/apache/commons/text/similarity/CosineDistance.java
@@ -16,6 +16,32 @@
  */
 package org.apache.commons.text.similarity;
 
-public class CosineDistance {
+import java.util.Map;
+
+import org.apache.commons.text.similarity.internal.Counter;
+import org.apache.commons.text.similarity.internal.SimpleTokenizer;
+import org.apache.commons.text.similarity.internal.Tokenizer;
+
+/**
+ * <p>
+ * Calcules the cosine distance.
+ * </p>
+ */
+public class CosineDistance implements StringMetric<Double> {
+
+    private final Tokenizer<String> tokenizer = new SimpleTokenizer();
+    
+    private final CosineSimilarity cosineSimilarity = new CosineSimilarity();
+    
+    @Override
+    public Double compare(CharSequence left, CharSequence right) {
+        String[] leftTokens = tokenizer.tokenize(left);
+        String[] rightTokens = tokenizer.tokenize(right);
+        
+        Map<String, Integer> leftVector = Counter.of(leftTokens);
+        Map<String, Integer> rightVector = Counter.of(rightTokens);
+        double similarity = cosineSimilarity.cosineSimilarity(leftVector, 
rightVector);
+        return 1.0 - similarity;
+    }
 
 }

http://git-wip-us.apache.org/repos/asf/commons-text/blob/81f679de/src/main/java/org/apache/commons/text/similarity/CosineSimilarity.java
----------------------------------------------------------------------
diff --git 
a/src/main/java/org/apache/commons/text/similarity/CosineSimilarity.java 
b/src/main/java/org/apache/commons/text/similarity/CosineSimilarity.java
index ca9d087..992e5e9 100644
--- a/src/main/java/org/apache/commons/text/similarity/CosineSimilarity.java
+++ b/src/main/java/org/apache/commons/text/similarity/CosineSimilarity.java
@@ -16,9 +16,16 @@
  */
 package org.apache.commons.text.similarity;
 
+import java.util.HashSet;
+import java.util.Map;
+import java.util.Set;
+
+
 /**
- * <p>Measures the Cosine similarity of two CharSequences. It treats the 
CharSequences as
- * two vectors of an inner product space and compares the angle between 
them.</p>
+ * <p>
+ * Measures the Cosine similarity of two vectors of an inner product space and
+ * compares the angle between them.
+ * </p>
  *
  * <p>
  * For further explanation about the Cosine Similarity, take a look at its
@@ -27,39 +34,67 @@ package org.apache.commons.text.similarity;
  *
  * @since 0.1
  */
-public class CosineSimilarity implements StringMetric<Double> {
+public class CosineSimilarity {
 
-    @Override
-    public Double compare(CharSequence left, CharSequence right) {
-        if (left == null || right == null) {
-            throw new IllegalArgumentException("String parameters must not be 
null");
+    /**
+     * Calculates the cosine similarity for two given vectors.
+     *
+     * @param leftVector left vector
+     * @param rightVector right vector
+     * @return cosine similarity between the two vectors
+     */
+    public Double cosineSimilarity(Map<String, Integer> leftVector, 
Map<String, Integer> rightVector) {
+        if (leftVector == null || rightVector == null) {
+            throw new IllegalArgumentException("Vectors must not be null");
         }
-        long dotProduct = dot(left, right);
+
+        Set<String> intersection = getIntersection(leftVector, rightVector);
+
+        double dotProduct = dot(leftVector, rightVector, intersection);
         double d1 = 0.0d;
-        for (int i = 0; i < left.length(); ++i) {
-            d1 += Math.pow(((int) left.charAt(i)), 2);
+        for (Integer value : leftVector.values()) {
+            d1 += Math.pow(value, 2);
         }
         double d2 = 0.0d;
-        for (int i = 0; i < right.length(); ++i) {
-            d2 += Math.pow(((int) right.charAt(i)), 2);
+        for (Integer value : rightVector.values()) {
+            d2 += Math.pow(value, 2);
+        }
+        double cosineSimilarity;
+        if (d1 <= 0.0 || d2 <= 0.0) {
+            cosineSimilarity = 0.0;
+        } else {
+            cosineSimilarity = (double) (dotProduct / (double) (Math.sqrt(d1) 
* Math.sqrt(d2)));
         }
-        double cosineSimilarity = dotProduct / (double) (Math.sqrt(d1) * 
Math.sqrt(d2));
         return cosineSimilarity;
     }
 
     /**
-     * Computes the dot product of two CharSequences. It ignores remaining 
characters. It means
-     * that if a string is longer than other, then a smaller part of it will 
be used to compute
+     * Returns a set with strings common to the two given maps.
+     *
+     * @param leftVector left vector map
+     * @param rightVector right vector map
+     * @return common strings
+     */
+    private Set<String> getIntersection(Map<String, Integer> leftVector, 
Map<String, Integer> rightVector) {
+        Set<String> intersection = new HashSet<String>(leftVector.keySet());
+        intersection.retainAll(rightVector.keySet());
+        return intersection;
+    }
+
+    /**
+     * Computes the dot product of two vectors. It ignores remaining elements. 
It means
+     * that if a vector is longer than other, then a smaller part of it will 
be used to compute
      * the dot product.
-     * 
-     * @param left left string
-     * @param right right string
+     *
+     * @param left left vector
+     * @param right right vector
+     * @param intersection common elements
      * @return the dot product
      */
-    protected long dot(CharSequence left, CharSequence right) {
+    private double dot(Map<String, Integer> leftVector, Map<String, Integer> 
rightVector, Set<String> intersection) {
         long dotProduct = 0;
-        for (int i = 0; i < left.length() && i < right.length(); ++i) {
-            dotProduct += (((int) left.charAt(i)) * ((int) right.charAt(i)));
+        for (String key : intersection) {
+            dotProduct += leftVector.get(key) * rightVector.get(key);
         }
         return dotProduct;
     }

http://git-wip-us.apache.org/repos/asf/commons-text/blob/81f679de/src/main/java/org/apache/commons/text/similarity/internal/CharacterTokenizer.java
----------------------------------------------------------------------
diff --git 
a/src/main/java/org/apache/commons/text/similarity/internal/CharacterTokenizer.java
 
b/src/main/java/org/apache/commons/text/similarity/internal/CharacterTokenizer.java
new file mode 100644
index 0000000..5bab5a9
--- /dev/null
+++ 
b/src/main/java/org/apache/commons/text/similarity/internal/CharacterTokenizer.java
@@ -0,0 +1,43 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.text.similarity.internal;
+
+/**
+ * <p>
+ * Returns each character in a string as a token.
+ * </p>
+ */
+public class CharacterTokenizer implements Tokenizer<Character> {
+
+    /**
+     * {@inheritDoc}
+     *
+     * @throws IllegalArgumentException if the input text is blank
+     */
+    @Override
+    public Character[] tokenize(CharSequence text) {
+        if (text == null || text.toString().trim().equals("")) {
+            throw new IllegalArgumentException("Invalid text");
+        }
+        Character[] tokens = new Character[text.length()];
+        for (int i = 0; i < text.length(); ++i) {
+            tokens[i] = text.charAt(i);
+        }
+        return tokens;
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/commons-text/blob/81f679de/src/main/java/org/apache/commons/text/similarity/internal/Counter.java
----------------------------------------------------------------------
diff --git 
a/src/main/java/org/apache/commons/text/similarity/internal/Counter.java 
b/src/main/java/org/apache/commons/text/similarity/internal/Counter.java
new file mode 100644
index 0000000..6d2a4c4
--- /dev/null
+++ b/src/main/java/org/apache/commons/text/similarity/internal/Counter.java
@@ -0,0 +1,44 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.text.similarity.internal;
+
+import java.util.HashMap;
+import java.util.Map;
+
+/**
+ * Java implementation of Python's collections Counter module.
+ *
+ * @see 
https://docs.python.org/dev/library/collections.html#collections.Counter
+ */
+public class Counter {
+
+    private Counter() {}
+    
+    public static Map<String, Integer> of(String[] tokens) {
+        Map<String, Integer> innerCounter = new HashMap<String, Integer>();
+        for (String token : tokens) {
+            if (innerCounter.containsKey(token)) {
+                int value = innerCounter.get(token);
+                innerCounter.put(token, ++value);
+            } else {
+                innerCounter.put(token, 1);
+            }
+        }
+        return innerCounter;
+    }
+    
+}

http://git-wip-us.apache.org/repos/asf/commons-text/blob/81f679de/src/main/java/org/apache/commons/text/similarity/internal/SimpleTokenizer.java
----------------------------------------------------------------------
diff --git 
a/src/main/java/org/apache/commons/text/similarity/internal/SimpleTokenizer.java
 
b/src/main/java/org/apache/commons/text/similarity/internal/SimpleTokenizer.java
new file mode 100644
index 0000000..9df5299
--- /dev/null
+++ 
b/src/main/java/org/apache/commons/text/similarity/internal/SimpleTokenizer.java
@@ -0,0 +1,50 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.text.similarity.internal;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+/**
+ * <p>
+ * A simple word tokenizer that utilizes regex.
+ * </p>
+ */
+public class SimpleTokenizer implements Tokenizer<String> {
+
+    /**
+     * {@inheritDoc}
+     *
+     * @throws IllegalArgumentException if the input text is blank
+     */
+    @Override
+    public String[] tokenize(CharSequence text) {
+        if (text == null || text.toString().trim().equals("")) {
+            throw new IllegalArgumentException("Invalid text");
+        }
+        Pattern pattern = Pattern.compile("(\\w)+");
+        Matcher matcher = pattern.matcher(text.toString());
+        List<String> tokens = new ArrayList<String>();
+        while (matcher.find()) {
+            tokens.add(matcher.group(0));
+        }
+        return tokens.toArray(new String[0]);
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/commons-text/blob/81f679de/src/main/java/org/apache/commons/text/similarity/internal/StringTokenizer.java
----------------------------------------------------------------------
diff --git 
a/src/main/java/org/apache/commons/text/similarity/internal/StringTokenizer.java
 
b/src/main/java/org/apache/commons/text/similarity/internal/StringTokenizer.java
new file mode 100644
index 0000000..137f338
--- /dev/null
+++ 
b/src/main/java/org/apache/commons/text/similarity/internal/StringTokenizer.java
@@ -0,0 +1,36 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.text.similarity.internal;
+
+/**
+ * <p>
+ * A string tokenizer. Can produce arrays of tokens from a given type.
+ * </p>
+ *
+ * @param <T> given type
+ */
+public interface StringTokenizer<T> {
+
+    /**
+     * Returns an array of tokens.
+     *
+     * @param text input text
+     * @return array of tokens
+     */
+    T[] tokenize(CharSequence text);
+    
+}

http://git-wip-us.apache.org/repos/asf/commons-text/blob/81f679de/src/main/java/org/apache/commons/text/similarity/internal/Tokenizer.java
----------------------------------------------------------------------
diff --git 
a/src/main/java/org/apache/commons/text/similarity/internal/Tokenizer.java 
b/src/main/java/org/apache/commons/text/similarity/internal/Tokenizer.java
new file mode 100644
index 0000000..b5b877b
--- /dev/null
+++ b/src/main/java/org/apache/commons/text/similarity/internal/Tokenizer.java
@@ -0,0 +1,36 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.text.similarity.internal;
+
+/**
+ * <p>
+ * A tokenizer. Can produce arrays of tokens from a given type.
+ * </p>
+ *
+ * @param <T> given type
+ */
+public interface Tokenizer<T> {
+
+    /**
+     * Returns an array of tokens.
+     *
+     * @param text input text
+     * @return array of tokens
+     */
+    T[] tokenize(CharSequence text);
+    
+}

http://git-wip-us.apache.org/repos/asf/commons-text/blob/81f679de/src/main/java/org/apache/commons/text/similarity/internal/WhiteSpaceTokenizer.java
----------------------------------------------------------------------
diff --git 
a/src/main/java/org/apache/commons/text/similarity/internal/WhiteSpaceTokenizer.java
 
b/src/main/java/org/apache/commons/text/similarity/internal/WhiteSpaceTokenizer.java
new file mode 100644
index 0000000..a827fdf
--- /dev/null
+++ 
b/src/main/java/org/apache/commons/text/similarity/internal/WhiteSpaceTokenizer.java
@@ -0,0 +1,46 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.text.similarity.internal;
+
+/**
+ * <p>
+ * A simple white space tokenizer.
+ * </p>
+ */
+public class WhiteSpaceTokenizer implements Tokenizer<String> {
+
+    private static final String SPACE_DELIMITER = " ";
+
+    /**
+     * {@inheritDoc}
+     *
+     * @throws IllegalArgumentException if the input text is blank
+     */
+    @Override
+    public String[] tokenize(CharSequence text) {
+        if (text == null || text.toString().trim().equals("")) {
+            throw new IllegalArgumentException("Invalid text");
+        }
+        java.util.StringTokenizer stringTokenizer = new 
java.util.StringTokenizer(text.toString(), SPACE_DELIMITER);
+        String[] tokens = new String[stringTokenizer.countTokens()];
+        for (int i = 0; i < tokens.length; ++i) {
+            tokens[i] = stringTokenizer.nextToken();
+        }
+        return tokens;
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/commons-text/blob/81f679de/src/main/java/org/apache/commons/text/similarity/internal/package-info.java
----------------------------------------------------------------------
diff --git 
a/src/main/java/org/apache/commons/text/similarity/internal/package-info.java 
b/src/main/java/org/apache/commons/text/similarity/internal/package-info.java
new file mode 100644
index 0000000..a346c07
--- /dev/null
+++ 
b/src/main/java/org/apache/commons/text/similarity/internal/package-info.java
@@ -0,0 +1,21 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/**
+ * Classes used internally by similarity algorithms. Internal use, backward 
compatibility
+ * is not guaranteed.
+ */
+package org.apache.commons.text.similarity.internal;
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/commons-text/blob/81f679de/src/test/java/org/apache/commons/text/similarity/CosineDistanceTest.java
----------------------------------------------------------------------
diff --git 
a/src/test/java/org/apache/commons/text/similarity/CosineDistanceTest.java 
b/src/test/java/org/apache/commons/text/similarity/CosineDistanceTest.java
new file mode 100644
index 0000000..ce33572
--- /dev/null
+++ b/src/test/java/org/apache/commons/text/similarity/CosineDistanceTest.java
@@ -0,0 +1,55 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ * 
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.text.similarity;
+
+import static org.junit.Assert.assertEquals;
+
+import java.math.BigDecimal;
+import java.math.RoundingMode;
+
+import org.junit.BeforeClass;
+import org.junit.Test;
+
+/**
+ * Unit tests for {@link org.apache.commons.text.similarity.CosineSimilarity}.
+ */
+public class CosineDistanceTest {
+
+    private static CosineDistance cosineDistance;
+
+    @BeforeClass
+    public static void setUp() {
+        cosineDistance = new CosineDistance();
+    }
+
+    @Test
+    public void testCosineSimilarity() {
+        assertEquals(Double.valueOf(0.5d), 
roundValue(cosineDistance.compare("the house", "da house")));
+        assertEquals(Double.valueOf(0.0d), 
roundValue(cosineDistance.compare("AB", "AB")));
+        assertEquals(Double.valueOf(1.0d), 
roundValue(cosineDistance.compare("AB", "BA")));
+        assertEquals(Double.valueOf(0.08d), roundValue(cosineDistance.compare(
+                "the boy was from tamana shi, kumamoto ken, and the girl was 
from rio de janeiro, rio",
+                "the boy was from tamana shi, kumamoto, and the boy was from 
rio de janeiro, rio de janeiro")));
+    }
+
+    // --- Utility methods
+
+    private Double roundValue(Double value) {
+        return (Double) new BigDecimal(value).setScale(2, 
RoundingMode.HALF_UP).doubleValue();
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/commons-text/blob/81f679de/src/test/java/org/apache/commons/text/similarity/CosineSimilarityTest.java
----------------------------------------------------------------------
diff --git 
a/src/test/java/org/apache/commons/text/similarity/CosineSimilarityTest.java 
b/src/test/java/org/apache/commons/text/similarity/CosineSimilarityTest.java
deleted file mode 100644
index aa08057..0000000
--- a/src/test/java/org/apache/commons/text/similarity/CosineSimilarityTest.java
+++ /dev/null
@@ -1,51 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- * 
- *      http://www.apache.org/licenses/LICENSE-2.0
- * 
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.commons.text.similarity;
-
-import static org.junit.Assert.assertEquals;
-
-import java.math.BigDecimal;
-import java.math.RoundingMode;
-
-import org.junit.BeforeClass;
-import org.junit.Test;
-
-/**
- * Unit tests for {@link org.apache.commons.text.similarity.CosineSimilarity}.
- */
-public class CosineSimilarityTest {
-
-    private static CosineSimilarity cosineSimilarity;
-
-    @BeforeClass
-    public static void setUp() {
-        cosineSimilarity = new CosineSimilarity();
-    }
-
-    @Test
-    public void testCosineSimilarity() {
-        assertEquals(Double.valueOf(0.62d), 
roundValue(cosineSimilarity.compare("ABCDE", "AB")));
-        assertEquals(Double.valueOf(1.00d), 
roundValue(cosineSimilarity.compare("AB", "AB")));
-    }
-
-    // --- Utility methods
-
-    private Double roundValue(Double value) {
-        return (Double) new BigDecimal(value).setScale(2, 
RoundingMode.HALF_UP).doubleValue();
-    }
-
-}

Reply via email to