bradleyrumball commented on a change in pull request #214:
URL: https://github.com/apache/commons-text/pull/214#discussion_r610499047



##########
File path: 
src/main/java/org/apache/commons/text/similarity/JaroWinklerDistance.java
##########
@@ -71,20 +76,7 @@ public Double apply(final CharSequence left, final 
CharSequence right) {
             throw new IllegalArgumentException("CharSequences must not be 
null");
         }
 
-        // TODO: replace the rest of the code by this in 2.0, see TEXT-104
-        //
-        // JaroWinklerSimilarity similarity = new JaroWinklerSimilarity();
-        // return 1 - similarity.apply(left, right);
-
-        final double defaultScalingFactor = 0.1;
-        final int[] mtp = matches(left, right);
-        final double m = mtp[0];
-        if (m == 0) {
-            return 0D;
-        }
-        final double j = ((m / left.length() + m / right.length() + (m - 
(double) mtp[1] / 2) / m)) / 3;
-        final double jw = j < 0.7D ? j : j + defaultScalingFactor * mtp[2] * 
(1D - j);
-        return jw;
+        return 1 - similarity.apply(left, right);

Review comment:
       Hi Kinow!
   
   I wanted to do this as it seemed a far superior approach but wasn't sure if 
the repetition was present to limit stack depth.
   
   This looks good and is far superior.

##########
File path: 
src/test/java/org/apache/commons/text/similarity/JaroWinklerDistanceTest.java
##########
@@ -36,34 +36,22 @@ public static void setUp() {
 
     @Test
     public void testGetJaroWinklerDistance_StringString() {
-        assertEquals(0.92499d, distance.apply("frog", "fog"), 0.00001d);
-        assertEquals(0.0d, distance.apply("fly", "ant"), 
0.00000000000000000001d);
-        assertEquals(0.44166d, distance.apply("elephant", "hippo"), 0.00001d);
-        assertEquals(0.90666d, distance.apply("ABC Corporation", "ABC Corp"), 
0.00001d);
-        assertEquals(0.95251d, distance.apply("D N H Enterprises Inc", "D & H 
Enterprises, Inc."), 0.00001d);
-        assertEquals(0.942d, distance.apply("My Gym Children's Fitness 
Center", "My Gym. Childrens Fitness"), 0.00001d);
-        assertEquals(0.898018d, distance.apply("PENNSYLVANIA", 
"PENNCISYLVNIA"), 0.00001d);
-        assertEquals(0.971428d, distance.apply("/opt/software1", 
"/opt/software2"), 0.00001d);
-        assertEquals(0.941666d, distance.apply("aaabcd", "aaacdb"), 0.00001d);
-        assertEquals(0.911111d, distance.apply("John Horn", "John Hopkins"), 
0.00001d);
-        // TODO: replace tests in 2.0. See TEXT-104 for more.
-        // assertEquals(0d, distance.apply("", ""), 0.00001d);
-        // assertEquals(0d, distance.apply("foo", "foo"), 0.00001d);
-        // assertEquals(1 - 0.94166d, distance.apply("foo", "foo "), 0.00001d);
-        // assertEquals(1 - 0.90666d, distance.apply("foo", "foo  "), 
0.00001d);
-        // assertEquals(1 - 0.86666d, distance.apply("foo", " foo "), 
0.00001d);
-        // assertEquals(1 - 0.51111d, distance.apply("foo", "  foo"), 
0.00001d);
-        // assertEquals(1 - 0.92499d, distance.apply("frog", "fog"), 0.00001d);
-        // assertEquals(1.0d, distance.apply("fly", "ant"), 
0.00000000000000000001d);
-        // assertEquals(1 - 0.44166d, distance.apply("elephant", "hippo"), 
0.00001d);
-        // assertEquals(1 - 0.90666d, distance.apply("ABC Corporation", "ABC 
Corp"), 0.00001d);
-        // assertEquals(1 - 0.95251d, distance.apply("D N H Enterprises Inc", 
"D & H Enterprises, Inc."), 0.00001d);
-        // assertEquals(1 - 0.942d,
-        //         distance.apply("My Gym Children's Fitness Center", "My Gym. 
Childrens Fitness"), 0.00001d);
-        // assertEquals(1 - 0.898018d, distance.apply("PENNSYLVANIA", 
"PENNCISYLVNIA"), 0.00001d);
-        // assertEquals(1 - 0.971428d, distance.apply("/opt/software1", 
"/opt/software2"), 0.00001d);
-        // assertEquals(1 - 0.941666d, distance.apply("aaabcd", "aaacdb"), 
0.00001d);
-        // assertEquals(1 - 0.911111d, distance.apply("John Horn", "John 
Hopkins"), 0.00001d);
+        assertEquals(0.07501d, distance.apply("frog", "fog"), 0.00001d);
+        assertEquals(1.0d, distance.apply("fly", "ant"), 
0.00000000000000000001d);
+        assertEquals(0.55834d, distance.apply("elephant", "hippo"), 0.00001d);
+        assertEquals(0.09334d, distance.apply("ABC Corporation", "ABC Corp"), 
0.00001d);
+        assertEquals(0.04749d, distance.apply("D N H Enterprises Inc", "D & H 
Enterprises, Inc."), 0.00001d);
+        assertEquals(0.058d, distance.apply("My Gym Children's Fitness 
Center", "My Gym. Childrens Fitness"), 0.00001d);
+        assertEquals(0.101982d, distance.apply("PENNSYLVANIA", 
"PENNCISYLVNIA"), 0.00001d);
+        assertEquals(0.028572d, distance.apply("/opt/software1", 
"/opt/software2"), 0.00001d);
+        assertEquals(0.058334d, distance.apply("aaabcd", "aaacdb"), 0.00001d);
+        assertEquals(0.088889d, distance.apply("John Horn", "John Hopkins"), 
0.00001d);
+        assertEquals(0d, distance.apply("", ""), 0.00001d);
+        assertEquals(0d, distance.apply("foo", "foo"), 0.00001d);
+        assertEquals(1 - 0.94166d, distance.apply("foo", "foo "), 0.00001d);
+        assertEquals(1 - 0.90666d, distance.apply("foo", "foo  "), 0.00001d);
+        assertEquals(1 - 0.86666d, distance.apply("foo", " foo "), 0.00001d);
+        assertEquals(1 - 0.51111d, distance.apply("foo", "  foo"), 0.00001d);

Review comment:
       Looks good; the only thing I can note is the (1 - double) in the 4 new 
cases if we wanted to be consistent with the other tests. Otherwise, happy days!




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
[email protected]


Reply via email to