Repository: commons-text Updated Branches: refs/heads/master 2f0052334 -> 980791b69
SANDBOX-493: Change (R) StringMetric.compare(CS left, CS right) to apply so that it is consistent with BiFunction.This fixes #2 from github. Thanks to Jonathan Baker. Project: http://git-wip-us.apache.org/repos/asf/commons-text/repo Commit: http://git-wip-us.apache.org/repos/asf/commons-text/commit/980791b6 Tree: http://git-wip-us.apache.org/repos/asf/commons-text/tree/980791b6 Diff: http://git-wip-us.apache.org/repos/asf/commons-text/diff/980791b6 Branch: refs/heads/master Commit: 980791b69dc4cb1a639e835ea3b151d9d107ea7f Parents: 2f00523 Author: Bruno P. Kinoshita <brunodepau...@yahoo.com.br> Authored: Thu Mar 19 23:39:04 2015 -0300 Committer: Bruno P. Kinoshita <brunodepau...@yahoo.com.br> Committed: Thu Mar 19 23:39:04 2015 -0300 ---------------------------------------------------------------------- src/changes/changes.xml | 1 + .../commons/text/similarity/FuzzyScore.java | 18 ++-- .../text/similarity/HammingDistance.java | 12 +-- .../text/similarity/JaroWrinklerDistance.java | 30 +++--- .../text/similarity/LevenshteinDistance.java | 24 ++--- .../commons/text/similarity/StringMetric.java | 9 +- .../commons/text/similarity/FuzzyScoreTest.java | 22 ++-- .../text/similarity/HammingDistanceTest.java | 20 ++-- .../similarity/JaroWrinklerDistanceTest.java | 20 ++-- .../similarity/LevenshteinDistanceTest.java | 100 ++++++++++++++++--- .../ParameterizedLevenshteinDistanceTest.java | 2 +- 11 files changed, 168 insertions(+), 90 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/commons-text/blob/980791b6/src/changes/changes.xml ---------------------------------------------------------------------- diff --git a/src/changes/changes.xml b/src/changes/changes.xml index fa6417f..553d55b 100644 --- a/src/changes/changes.xml +++ b/src/changes/changes.xml @@ -22,6 +22,7 @@ <body> <release version="1.0" date="tba" description="tba"> + <action issue="SANDBOX-493" type="fix" dev="kinow" due-to="Jonathan Baker">Change (R) StringMetric.compare(CS left, CS right) to "apply" so that it is consistent with BiFunction.</action> <action issue="SANDBOX-491" type="fix" dev="kinow" due-to="Jonathan Baker">Allow extra information (e.g. Levenshtein threshold) to be stored as (final) fields in the StringMetric instance.</action> <action issue="SANDBOX-486" type="add" dev="kinow">Port Myers algorithm from [collections]</action> <action issue="SANDBOX-485" type="add" dev="kinow">Add Hamming distance</action> http://git-wip-us.apache.org/repos/asf/commons-text/blob/980791b6/src/main/java/org/apache/commons/text/similarity/FuzzyScore.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/commons/text/similarity/FuzzyScore.java b/src/main/java/org/apache/commons/text/similarity/FuzzyScore.java index 3cf6df2..e7e5b09 100644 --- a/src/main/java/org/apache/commons/text/similarity/FuzzyScore.java +++ b/src/main/java/org/apache/commons/text/similarity/FuzzyScore.java @@ -58,14 +58,14 @@ public class FuzzyScore implements StringMetric<Integer> { * </p> * * <pre> - * score.compare(null, null, null) = IllegalArgumentException - * score.compare("", "", Locale.ENGLISH) = 0 - * score.compare("Workshop", "b", Locale.ENGLISH) = 0 - * score.compare("Room", "o", Locale.ENGLISH) = 1 - * score.compare("Workshop", "w", Locale.ENGLISH) = 1 - * score.compare("Workshop", "ws", Locale.ENGLISH) = 2 - * score.compare("Workshop", "wo", Locale.ENGLISH) = 4 - * score.compare("Apache Software Foundation", "asf", Locale.ENGLISH) = 3 + * score.apply(null, null, null) = IllegalArgumentException + * score.apply("", "", Locale.ENGLISH) = 0 + * score.apply("Workshop", "b", Locale.ENGLISH) = 0 + * score.apply("Room", "o", Locale.ENGLISH) = 1 + * score.apply("Workshop", "w", Locale.ENGLISH) = 1 + * score.apply("Workshop", "ws", Locale.ENGLISH) = 2 + * score.apply("Workshop", "wo", Locale.ENGLISH) = 4 + * score.apply("Apache Software Foundation", "asf", Locale.ENGLISH) = 3 * </pre> * * @param term a full term that should be matched against, must not be null @@ -76,7 +76,7 @@ public class FuzzyScore implements StringMetric<Integer> { * Locale input {@code null} */ @Override - public Integer compare(CharSequence term, CharSequence query) { + public Integer apply(CharSequence term, CharSequence query) { if (term == null || query == null) { throw new IllegalArgumentException("Strings must not be null"); } http://git-wip-us.apache.org/repos/asf/commons-text/blob/980791b6/src/main/java/org/apache/commons/text/similarity/HammingDistance.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/commons/text/similarity/HammingDistance.java b/src/main/java/org/apache/commons/text/similarity/HammingDistance.java index eb21307..94d0aad 100644 --- a/src/main/java/org/apache/commons/text/similarity/HammingDistance.java +++ b/src/main/java/org/apache/commons/text/similarity/HammingDistance.java @@ -39,11 +39,11 @@ public class HammingDistance implements StringMetric<Integer> { * will throw IllegalArgumentException</p> * * <pre> - * distance.compare("", "") = 0 - * distance.compare("pappa", "pappa") = 0 - * distance.compare("1011101", "1011111") = 1 - * distance.compare("ATCG", "ACCC") = 2 - * distance.compare("karolin", "kerstin" = 3 + * distance.apply("", "") = 0 + * distance.apply("pappa", "pappa") = 0 + * distance.apply("1011101", "1011111") = 1 + * distance.apply("ATCG", "ACCC") = 2 + * distance.apply("karolin", "kerstin" = 3 * </pre> * * @param left the first CharSequence, must not be null @@ -53,7 +53,7 @@ public class HammingDistance implements StringMetric<Integer> { * if they do not have the same length */ @Override - public Integer compare(CharSequence left, CharSequence right) { + public Integer apply(CharSequence left, CharSequence right) { if (left == null || right == null) { throw new IllegalArgumentException("Strings must not be null"); } http://git-wip-us.apache.org/repos/asf/commons-text/blob/980791b6/src/main/java/org/apache/commons/text/similarity/JaroWrinklerDistance.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/commons/text/similarity/JaroWrinklerDistance.java b/src/main/java/org/apache/commons/text/similarity/JaroWrinklerDistance.java index 07fbc7e..6955c3d 100644 --- a/src/main/java/org/apache/commons/text/similarity/JaroWrinklerDistance.java +++ b/src/main/java/org/apache/commons/text/similarity/JaroWrinklerDistance.java @@ -47,20 +47,20 @@ public class JaroWrinklerDistance implements StringMetric<Double> { * between two CharSequences. * * <pre> - * distance.compare(null, null) = IllegalArgumentException - * distance.compare("","") = 0.0 - * distance.compare("","a") = 0.0 - * distance.compare("aaapppp", "") = 0.0 - * distance.compare("frog", "fog") = 0.93 - * distance.compare("fly", "ant") = 0.0 - * distance.compare("elephant", "hippo") = 0.44 - * distance.compare("hippo", "elephant") = 0.44 - * distance.compare("hippo", "zzzzzzzz") = 0.0 - * distance.compare("hello", "hallo") = 0.88 - * distance.compare("ABC Corporation", "ABC Corp") = 0.91 - * distance.compare("D N H Enterprises Inc", "D & H Enterprises, Inc.") = 0.93 - * distance.compare("My Gym Children's Fitness Center", "My Gym. Childrens Fitness") = 0.94 - * distance.compare("PENNSYLVANIA", "PENNCISYLVNIA") = 0.9 + * distance.apply(null, null) = IllegalArgumentException + * distance.apply("","") = 0.0 + * distance.apply("","a") = 0.0 + * distance.apply("aaapppp", "") = 0.0 + * distance.apply("frog", "fog") = 0.93 + * distance.apply("fly", "ant") = 0.0 + * distance.apply("elephant", "hippo") = 0.44 + * distance.apply("hippo", "elephant") = 0.44 + * distance.apply("hippo", "zzzzzzzz") = 0.0 + * distance.apply("hello", "hallo") = 0.88 + * distance.apply("ABC Corporation", "ABC Corp") = 0.91 + * distance.apply("D N H Enterprises Inc", "D & H Enterprises, Inc.") = 0.93 + * distance.apply("My Gym Children's Fitness Center", "My Gym. Childrens Fitness") = 0.94 + * distance.apply("PENNSYLVANIA", "PENNCISYLVNIA") = 0.9 * </pre> * * @param left the first String, must not be null @@ -69,7 +69,7 @@ public class JaroWrinklerDistance implements StringMetric<Double> { * @throws IllegalArgumentException if either String input {@code null} */ @Override - public Double compare(CharSequence left, CharSequence right) { + public Double apply(CharSequence left, CharSequence right) { final double DEFAULT_SCALING_FACTOR = 0.1; if (left == null || right == null) { http://git-wip-us.apache.org/repos/asf/commons-text/blob/980791b6/src/main/java/org/apache/commons/text/similarity/LevenshteinDistance.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/commons/text/similarity/LevenshteinDistance.java b/src/main/java/org/apache/commons/text/similarity/LevenshteinDistance.java index 920dddb..c75e12e 100644 --- a/src/main/java/org/apache/commons/text/similarity/LevenshteinDistance.java +++ b/src/main/java/org/apache/commons/text/similarity/LevenshteinDistance.java @@ -80,17 +80,17 @@ public class LevenshteinDistance implements StringMetric<Integer> { * is from <a href="http://www.merriampark.com/ldjava.htm">http://www.merriampark.com/ldjava.htm</a></p> * * <pre> - * distance.compare(null, *) = IllegalArgumentException - * distance.compare(*, null) = IllegalArgumentException - * distance.compare("","") = 0 - * distance.compare("","a") = 1 - * distance.compare("aaapppp", "") = 7 - * distance.compare("frog", "fog") = 1 - * distance.compare("fly", "ant") = 3 - * distance.compare("elephant", "hippo") = 7 - * distance.compare("hippo", "elephant") = 7 - * distance.compare("hippo", "zzzzzzzz") = 8 - * distance.compare("hello", "hallo") = 1 + * distance.apply(null, *) = IllegalArgumentException + * distance.apply(*, null) = IllegalArgumentException + * distance.apply("","") = 0 + * distance.apply("","a") = 1 + * distance.apply("aaapppp", "") = 7 + * distance.apply("frog", "fog") = 1 + * distance.apply("fly", "ant") = 3 + * distance.apply("elephant", "hippo") = 7 + * distance.apply("hippo", "elephant") = 7 + * distance.apply("hippo", "zzzzzzzz") = 8 + * distance.apply("hello", "hallo") = 1 * </pre> * * @param left the first string, must not be null @@ -98,7 +98,7 @@ public class LevenshteinDistance implements StringMetric<Integer> { * @return result distance, or -1 * @throws IllegalArgumentException if either String input {@code null} */ - public Integer compare(CharSequence left, CharSequence right) { + public Integer apply(CharSequence left, CharSequence right) { if (threshold != null) { return limitedCompare(left, right, threshold); } else { http://git-wip-us.apache.org/repos/asf/commons-text/blob/980791b6/src/main/java/org/apache/commons/text/similarity/StringMetric.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/commons/text/similarity/StringMetric.java b/src/main/java/org/apache/commons/text/similarity/StringMetric.java index fd6880a..2d1adfa 100644 --- a/src/main/java/org/apache/commons/text/similarity/StringMetric.java +++ b/src/main/java/org/apache/commons/text/similarity/StringMetric.java @@ -24,6 +24,13 @@ package org.apache.commons.text.similarity; * the algorithm, higher values can mean closer strings, or more distant strings. * </p> * + * <p> + * This is a BiFunction<CharSequence, CharSequence, R>. + * The <code>apply</code> method + * accepts a pair of {@link CharSequence} parameters + * and returns an <code>R</code> type similarity score. + * </p> + * * @param <R> The type of similarity score unit used by this StringMetric. */ public interface StringMetric<R> { @@ -35,6 +42,6 @@ public interface StringMetric<R> { * @param right the second CharSequence * @return the similarity score between two CharSequences */ - R compare(CharSequence left, CharSequence right); + R apply(CharSequence left, CharSequence right); } http://git-wip-us.apache.org/repos/asf/commons-text/blob/980791b6/src/test/java/org/apache/commons/text/similarity/FuzzyScoreTest.java ---------------------------------------------------------------------- diff --git a/src/test/java/org/apache/commons/text/similarity/FuzzyScoreTest.java b/src/test/java/org/apache/commons/text/similarity/FuzzyScoreTest.java index 88778fc..44c2eeb 100644 --- a/src/test/java/org/apache/commons/text/similarity/FuzzyScoreTest.java +++ b/src/test/java/org/apache/commons/text/similarity/FuzzyScoreTest.java @@ -31,34 +31,34 @@ public class FuzzyScoreTest { @Test public void testGetFuzzyScore() throws Exception { - assertEquals(0, (int) ENGLISH_SCORE.compare("", "")); - assertEquals(0, (int) ENGLISH_SCORE.compare("Workshop", "b")); - assertEquals(1, (int) ENGLISH_SCORE.compare("Room", "o")); - assertEquals(1, (int) ENGLISH_SCORE.compare("Workshop", "w")); - assertEquals(2, (int) ENGLISH_SCORE.compare("Workshop", "ws")); - assertEquals(4, (int) ENGLISH_SCORE.compare("Workshop", "wo")); - assertEquals(3, (int) ENGLISH_SCORE.compare( + assertEquals(0, (int) ENGLISH_SCORE.apply("", "")); + assertEquals(0, (int) ENGLISH_SCORE.apply("Workshop", "b")); + assertEquals(1, (int) ENGLISH_SCORE.apply("Room", "o")); + assertEquals(1, (int) ENGLISH_SCORE.apply("Workshop", "w")); + assertEquals(2, (int) ENGLISH_SCORE.apply("Workshop", "ws")); + assertEquals(4, (int) ENGLISH_SCORE.apply("Workshop", "wo")); + assertEquals(3, (int) ENGLISH_SCORE.apply( "Apache Software Foundation", "asf")); } @Test(expected = IllegalArgumentException.class) public void testGetFuzzyScore_StringNullLocale() throws Exception { - ENGLISH_SCORE.compare("not null", null); + ENGLISH_SCORE.apply("not null", null); } @Test(expected = IllegalArgumentException.class) public void testGetFuzzyScore_NullStringLocale() throws Exception { - ENGLISH_SCORE.compare(null, "not null"); + ENGLISH_SCORE.apply(null, "not null"); } @Test(expected = IllegalArgumentException.class) public void testGetFuzzyScore_NullNullLocale() throws Exception { - ENGLISH_SCORE.compare(null, null); + ENGLISH_SCORE.apply(null, null); } @Test(expected = IllegalArgumentException.class) public void testMissingLocale() throws Exception { - FuzzyScore score = new FuzzyScore((Locale) null); + new FuzzyScore((Locale) null); } } http://git-wip-us.apache.org/repos/asf/commons-text/blob/980791b6/src/test/java/org/apache/commons/text/similarity/HammingDistanceTest.java ---------------------------------------------------------------------- diff --git a/src/test/java/org/apache/commons/text/similarity/HammingDistanceTest.java b/src/test/java/org/apache/commons/text/similarity/HammingDistanceTest.java index aac2fa3..ed23433 100644 --- a/src/test/java/org/apache/commons/text/similarity/HammingDistanceTest.java +++ b/src/test/java/org/apache/commons/text/similarity/HammingDistanceTest.java @@ -35,24 +35,24 @@ public class HammingDistanceTest { @Test public void testHammingDistance() { - assertEquals(Integer.valueOf(0), distance.compare("", "")); - assertEquals(Integer.valueOf(0), distance.compare("pappa", "pappa")); - assertEquals(Integer.valueOf(1), distance.compare("papaa", "pappa")); - assertEquals(Integer.valueOf(3), distance.compare("karolin", "kathrin")); - assertEquals(Integer.valueOf(3), distance.compare("karolin", "kerstin")); - assertEquals(Integer.valueOf(2), distance.compare("1011101", "1001001")); - assertEquals(Integer.valueOf(3), distance.compare("2173896", "2233796")); - assertEquals(Integer.valueOf(2), distance.compare("ATCG", "ACCC")); + assertEquals(Integer.valueOf(0), distance.apply("", "")); + assertEquals(Integer.valueOf(0), distance.apply("pappa", "pappa")); + assertEquals(Integer.valueOf(1), distance.apply("papaa", "pappa")); + assertEquals(Integer.valueOf(3), distance.apply("karolin", "kathrin")); + assertEquals(Integer.valueOf(3), distance.apply("karolin", "kerstin")); + assertEquals(Integer.valueOf(2), distance.apply("1011101", "1001001")); + assertEquals(Integer.valueOf(3), distance.apply("2173896", "2233796")); + assertEquals(Integer.valueOf(2), distance.apply("ATCG", "ACCC")); } @Test(expected=IllegalArgumentException.class) public void testHammingDistance_nullLeftValue() { - distance.compare(null, ""); + distance.apply(null, ""); } @Test(expected=IllegalArgumentException.class) public void testHammingDistance_nullRightValue() { - distance.compare("", null); + distance.apply("", null); } } http://git-wip-us.apache.org/repos/asf/commons-text/blob/980791b6/src/test/java/org/apache/commons/text/similarity/JaroWrinklerDistanceTest.java ---------------------------------------------------------------------- diff --git a/src/test/java/org/apache/commons/text/similarity/JaroWrinklerDistanceTest.java b/src/test/java/org/apache/commons/text/similarity/JaroWrinklerDistanceTest.java index 7050b05..660cca5 100644 --- a/src/test/java/org/apache/commons/text/similarity/JaroWrinklerDistanceTest.java +++ b/src/test/java/org/apache/commons/text/similarity/JaroWrinklerDistanceTest.java @@ -35,28 +35,28 @@ public class JaroWrinklerDistanceTest { @Test public void testGetJaroWinklerDistance_StringString() { - assertEquals(0.93d, (double) distance.compare("frog", "fog"), 0.0d); - assertEquals(0.0d, (double) distance.compare("fly", "ant"), 0.0d); - assertEquals(0.44d, (double) distance.compare("elephant", "hippo"), 0.0d); - assertEquals(0.91d, (double) distance.compare("ABC Corporation", "ABC Corp"), 0.0d); - assertEquals(0.93d, (double) distance.compare("D N H Enterprises Inc", "D & H Enterprises, Inc."), 0.0d); - assertEquals(0.94d, (double) distance.compare("My Gym Children's Fitness Center", "My Gym. Childrens Fitness"), 0.0d); - assertEquals(0.9d, (double) distance.compare("PENNSYLVANIA", "PENNCISYLVNIA"), 0.0d); + assertEquals(0.93d, (double) distance.apply("frog", "fog"), 0.0d); + assertEquals(0.0d, (double) distance.apply("fly", "ant"), 0.0d); + assertEquals(0.44d, (double) distance.apply("elephant", "hippo"), 0.0d); + assertEquals(0.91d, (double) distance.apply("ABC Corporation", "ABC Corp"), 0.0d); + assertEquals(0.93d, (double) distance.apply("D N H Enterprises Inc", "D & H Enterprises, Inc."), 0.0d); + assertEquals(0.94d, (double) distance.apply("My Gym Children's Fitness Center", "My Gym. Childrens Fitness"), 0.0d); + assertEquals(0.9d, (double) distance.apply("PENNSYLVANIA", "PENNCISYLVNIA"), 0.0d); } @Test(expected = IllegalArgumentException.class) public void testGetJaroWinklerDistance_NullNull() throws Exception { - distance.compare(null, null); + distance.apply(null, null); } @Test(expected = IllegalArgumentException.class) public void testGetJaroWinklerDistance_StringNull() throws Exception { - distance.compare(" ", null); + distance.apply(" ", null); } @Test(expected = IllegalArgumentException.class) public void testGetJaroWinklerDistance_NullString() throws Exception { - distance.compare(null, "clear"); + distance.apply(null, "clear"); } } http://git-wip-us.apache.org/repos/asf/commons-text/blob/980791b6/src/test/java/org/apache/commons/text/similarity/LevenshteinDistanceTest.java ---------------------------------------------------------------------- diff --git a/src/test/java/org/apache/commons/text/similarity/LevenshteinDistanceTest.java b/src/test/java/org/apache/commons/text/similarity/LevenshteinDistanceTest.java index 814677d..bed0ca3 100644 --- a/src/test/java/org/apache/commons/text/similarity/LevenshteinDistanceTest.java +++ b/src/test/java/org/apache/commons/text/similarity/LevenshteinDistanceTest.java @@ -29,41 +29,111 @@ public class LevenshteinDistanceTest { @Test public void testGetLevenshteinDistance_StringString() { - assertEquals(0, (int) UNLIMITED_DISTANCE.compare("", "")); - assertEquals(1, (int) UNLIMITED_DISTANCE.compare("", "a")); - assertEquals(7, (int) UNLIMITED_DISTANCE.compare("aaapppp", "")); - assertEquals(1, (int) UNLIMITED_DISTANCE.compare("frog", "fog")); - assertEquals(3, (int) UNLIMITED_DISTANCE.compare("fly", "ant")); - assertEquals(7, (int) UNLIMITED_DISTANCE.compare("elephant", "hippo")); - assertEquals(7, (int) UNLIMITED_DISTANCE.compare("hippo", "elephant")); - assertEquals(8, (int) UNLIMITED_DISTANCE.compare("hippo", "zzzzzzzz")); - assertEquals(8, (int) UNLIMITED_DISTANCE.compare("zzzzzzzz", "hippo")); - assertEquals(1, (int) UNLIMITED_DISTANCE.compare("hello", "hallo")); + assertEquals(0, (int) UNLIMITED_DISTANCE.apply("", "")); + assertEquals(1, (int) UNLIMITED_DISTANCE.apply("", "a")); + assertEquals(7, (int) UNLIMITED_DISTANCE.apply("aaapppp", "")); + assertEquals(1, (int) UNLIMITED_DISTANCE.apply("frog", "fog")); + assertEquals(3, (int) UNLIMITED_DISTANCE.apply("fly", "ant")); + assertEquals(7, (int) UNLIMITED_DISTANCE.apply("elephant", "hippo")); + assertEquals(7, (int) UNLIMITED_DISTANCE.apply("hippo", "elephant")); + assertEquals(8, (int) UNLIMITED_DISTANCE.apply("hippo", "zzzzzzzz")); + assertEquals(8, (int) UNLIMITED_DISTANCE.apply("zzzzzzzz", "hippo")); + assertEquals(1, (int) UNLIMITED_DISTANCE.apply("hello", "hallo")); } @Test(expected = IllegalArgumentException.class) public void testGetLevenshteinDistance_NullString() throws Exception { - UNLIMITED_DISTANCE.compare("a", null); + UNLIMITED_DISTANCE.apply("a", null); } @Test(expected = IllegalArgumentException.class) public void testGetLevenshteinDistance_StringNull() throws Exception { - UNLIMITED_DISTANCE.compare(null, "a"); + UNLIMITED_DISTANCE.apply(null, "a"); + } + + @Test + public void testGetLevenshteinDistance_StringStringInt() { + // empty strings + assertEquals(0, (int) new LevenshteinDistance(0).apply("", "")); + assertEquals(7, (int) new LevenshteinDistance(8).apply("aaapppp", "")); + assertEquals(7, (int) new LevenshteinDistance(7).apply("aaapppp", "")); + assertEquals(-1, (int) new LevenshteinDistance(6).apply("aaapppp", "")); + + // unequal strings, zero threshold + assertEquals(-1, (int) new LevenshteinDistance(0).apply("b", "a")); + assertEquals(-1, (int) new LevenshteinDistance(0).apply("a", "b")); + + // equal strings + assertEquals(0, (int) new LevenshteinDistance(0).apply("aa", "aa")); + assertEquals(0, (int) new LevenshteinDistance(2).apply("aa", "aa")); + + // same length + assertEquals(-1, (int) new LevenshteinDistance(2).apply("aaa", "bbb")); + assertEquals(3, (int) new LevenshteinDistance(3).apply("aaa", "bbb")); + + // big stripe + assertEquals(6, (int) new LevenshteinDistance(10).apply("aaaaaa", "b")); + + // distance less than threshold + assertEquals(7, (int) new LevenshteinDistance(8).apply("aaapppp", "b")); + assertEquals(3, (int) new LevenshteinDistance(4).apply("a", "bbb")); + + // distance equal to threshold + assertEquals(7, (int) new LevenshteinDistance(7).apply("aaapppp", "b")); + assertEquals(3, (int) new LevenshteinDistance(3).apply("a", "bbb")); + + // distance greater than threshold + assertEquals(-1, (int) new LevenshteinDistance(2).apply("a", "bbb")); + assertEquals(-1, (int) new LevenshteinDistance(2).apply("bbb", "a")); + assertEquals(-1, (int) new LevenshteinDistance(6).apply("aaapppp", "b")); + + // stripe runs off array, strings not similar + assertEquals(-1, (int) new LevenshteinDistance(1).apply("a", "bbb")); + assertEquals(-1, (int) new LevenshteinDistance(1).apply("bbb", "a")); + + // stripe runs off array, strings are similar + assertEquals(-1, (int) new LevenshteinDistance(1).apply("12345", "1234567")); + assertEquals(-1, (int) new LevenshteinDistance(1).apply("1234567", "12345")); + + // old getLevenshteinDistance test cases + assertEquals(1, (int) new LevenshteinDistance(1).apply("frog", "fog")); + assertEquals(3, (int) new LevenshteinDistance(3).apply("fly", "ant")); + assertEquals(7, (int) new LevenshteinDistance(7).apply("elephant", "hippo")); + assertEquals(-1, (int) new LevenshteinDistance(6).apply("elephant", "hippo")); + assertEquals(7, (int) new LevenshteinDistance(7).apply("hippo", "elephant")); + assertEquals(-1, (int) new LevenshteinDistance(6).apply("hippo", "elephant")); + assertEquals(8, (int) new LevenshteinDistance(8).apply("hippo", "zzzzzzzz")); + assertEquals(8, (int) new LevenshteinDistance(8).apply("zzzzzzzz", "hippo")); + assertEquals(1, (int) new LevenshteinDistance(1).apply("hello", "hallo")); + + assertEquals(1, + (int) new LevenshteinDistance(Integer.MAX_VALUE).apply("frog", "fog")); + assertEquals(3, (int) new LevenshteinDistance(Integer.MAX_VALUE).apply("fly", "ant")); + assertEquals(7, + (int) new LevenshteinDistance(Integer.MAX_VALUE).apply("elephant", "hippo")); + assertEquals(7, + (int) new LevenshteinDistance(Integer.MAX_VALUE).apply("hippo", "elephant")); + assertEquals(8, + (int) new LevenshteinDistance(Integer.MAX_VALUE).apply("hippo", "zzzzzzzz")); + assertEquals(8, + (int) new LevenshteinDistance(Integer.MAX_VALUE).apply("zzzzzzzz", "hippo")); + assertEquals(1, + (int) new LevenshteinDistance(Integer.MAX_VALUE).apply("hello", "hallo")); } @Test(expected = IllegalArgumentException.class) public void testGetLevenshteinDistance_NullStringInt() throws Exception { - UNLIMITED_DISTANCE.compare(null, "a"); + UNLIMITED_DISTANCE.apply(null, "a"); } @Test(expected = IllegalArgumentException.class) public void testGetLevenshteinDistance_StringNullInt() throws Exception { - UNLIMITED_DISTANCE.compare("a", null); + UNLIMITED_DISTANCE.apply("a", null); } @Test(expected = IllegalArgumentException.class) public void testConstructorWithNegativeThreshold() throws Exception { - LevenshteinDistance distance = new LevenshteinDistance(-1); + new LevenshteinDistance(-1); } } http://git-wip-us.apache.org/repos/asf/commons-text/blob/980791b6/src/test/java/org/apache/commons/text/similarity/ParameterizedLevenshteinDistanceTest.java ---------------------------------------------------------------------- diff --git a/src/test/java/org/apache/commons/text/similarity/ParameterizedLevenshteinDistanceTest.java b/src/test/java/org/apache/commons/text/similarity/ParameterizedLevenshteinDistanceTest.java index c6fd116..2ef5da2 100644 --- a/src/test/java/org/apache/commons/text/similarity/ParameterizedLevenshteinDistanceTest.java +++ b/src/test/java/org/apache/commons/text/similarity/ParameterizedLevenshteinDistanceTest.java @@ -119,7 +119,7 @@ public class ParameterizedLevenshteinDistanceTest { @Test public void test() { LevenshteinDistance metric = new LevenshteinDistance(threshold); - assertThat(metric.compare(left, right), equalTo(distance)); + assertThat(metric.apply(left, right), equalTo(distance)); } }