Repository: commons-text Updated Branches: refs/heads/master 4c29a73b0 -> 8bda46197
TEXT-32: finishing LCS similarity Project: http://git-wip-us.apache.org/repos/asf/commons-text/repo Commit: http://git-wip-us.apache.org/repos/asf/commons-text/commit/46bb20ab Tree: http://git-wip-us.apache.org/repos/asf/commons-text/tree/46bb20ab Diff: http://git-wip-us.apache.org/repos/asf/commons-text/diff/46bb20ab Branch: refs/heads/master Commit: 46bb20abc4bd2dd2afbf92eb5005158a281f0ba7 Parents: 676a1a7 Author: Rob Tompkins <[email protected]> Authored: Thu Dec 22 15:30:49 2016 -0500 Committer: Rob Tompkins <[email protected]> Committed: Thu Dec 22 15:34:14 2016 -0500 ---------------------------------------------------------------------- .../similarity/LongestCommonSubsequence.java | 48 ++++------- .../LongestCommonSubsequenceTest.java | 89 +++++++++++++++----- 2 files changed, 83 insertions(+), 54 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/commons-text/blob/46bb20ab/src/main/java/org/apache/commons/text/similarity/LongestCommonSubsequence.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/commons/text/similarity/LongestCommonSubsequence.java b/src/main/java/org/apache/commons/text/similarity/LongestCommonSubsequence.java index 4e809c2..fd2b5bb 100644 --- a/src/main/java/org/apache/commons/text/similarity/LongestCommonSubsequence.java +++ b/src/main/java/org/apache/commons/text/similarity/LongestCommonSubsequence.java @@ -42,12 +42,23 @@ package org.apache.commons.text.similarity; */ public class LongestCommonSubsequence implements SimilarityScore<Integer> { + /** + * Calculates longestCommonSubsequence similarity scoreof two set character sequence passed as + * input. + * + * @param left first character sequence + * @param right second character sequence + * @return longestCommonSubsequenceLength + * @throws IllegalArgumentException + * if either String input {@code null} + */ + @Override public Integer apply(final CharSequence left, final CharSequence right) { // Quick return for invalid inputs if (left == null || right == null) { throw new IllegalArgumentException("Inputs must not be null"); } - return longestCommonSubstringLengthArray(left,right)[left.length()][right.length()]; + return logestCommonSubsequence(left, right).length(); } /** @@ -70,6 +81,8 @@ public class LongestCommonSubsequence implements SimilarityScore<Integer> { * @param left * @param right * @return lcsLengthArray + * @throws IllegalArgumentException + * if either String input {@code null} */ public CharSequence logestCommonSubsequence(final CharSequence left, final CharSequence right) { // Quick return @@ -93,7 +106,7 @@ public class LongestCommonSubsequence implements SimilarityScore<Integer> { j = j - 1; } } - return longestCommonSubstringArray; + return longestCommonSubstringArray.reverse().toString(); } /** @@ -126,35 +139,4 @@ public class LongestCommonSubsequence implements SimilarityScore<Integer> { return lcsLengthArray; } - - public int wikiLcs(CharSequence left, CharSequence right) { - int m = left.length(); - int n = right.length(); - char[] x = left.toString().toCharArray(); - char[] y = right.toString().toCharArray(); - - int[][] c = new int[m + 1][n + 1]; - - for (int i = 0; i <= m; i++) { - c[i][0] = 0; - } - - for (int j = 0; j <= n; j++) { - c[0][j] = 0; - } - - for (int i = 1; i <= m; i++) { - for (int j = 1; j <= n; j++) { - if (x[i - 1] == y[j - 1]) { - c[i][j] = c[i - 1][j - 1] + 1; - - } else { - c[i][j] = Math.max(c[i][j - 1], c[i - 1][j]); - } - } - } - - return c[m][n]; - } - } http://git-wip-us.apache.org/repos/asf/commons-text/blob/46bb20ab/src/test/java/org/apache/commons/text/similarity/LongestCommonSubsequenceTest.java ---------------------------------------------------------------------- diff --git a/src/test/java/org/apache/commons/text/similarity/LongestCommonSubsequenceTest.java b/src/test/java/org/apache/commons/text/similarity/LongestCommonSubsequenceTest.java index 9c50412..e852884 100644 --- a/src/test/java/org/apache/commons/text/similarity/LongestCommonSubsequenceTest.java +++ b/src/test/java/org/apache/commons/text/similarity/LongestCommonSubsequenceTest.java @@ -1,36 +1,83 @@ package org.apache.commons.text.similarity; +import org.junit.Before; import org.junit.Test; +import static org.junit.Assert.assertEquals; + /** * Created by tompkicr on 12/20/16. */ public class LongestCommonSubsequenceTest { + private LongestCommonSubsequence subject; + + @Before + public void setup() { + subject = new LongestCommonSubsequence(); + } + @Test - public void testLongestCommonSubsequence() { - LongestCommonSubsequence lcs = new LongestCommonSubsequence(); - CharSequence retVal = lcs.logestCommonSubsequence("abab","abba"); - System.out.println("testLongestCommonSubstring:"); - System.out.println("retVal: " + retVal); - for (int n=0; n < retVal.length(); n++){ - System.out.print("isISOControl: " + Character.isISOControl(retVal.charAt(n)) + " "); - System.out.println(retVal.charAt(n)); - } + public void testLongestCommonSubsequenceApply() { + assertEquals(Integer.valueOf(0), subject.apply("", "")); + assertEquals(Integer.valueOf(0), subject.apply("left", "")); + assertEquals(Integer.valueOf(0), subject.apply("", "right")); + assertEquals(Integer.valueOf(3), subject.apply("frog", "fog")); + assertEquals(Integer.valueOf(0), subject.apply("fly", "ant")); + assertEquals(Integer.valueOf(1), subject.apply("elephant", "hippo")); + assertEquals(Integer.valueOf(8), subject.apply("ABC Corporation", "ABC Corp")); + assertEquals(Integer.valueOf(20), subject.apply("D N H Enterprises Inc", "D & H Enterprises, Inc.")); + assertEquals(Integer.valueOf(24), subject.apply("My Gym Children's Fitness Center", "My Gym. Childrens Fitness")); + assertEquals(Integer.valueOf(11), subject.apply("PENNSYLVANIA", "PENNCISYLVNIA")); + assertEquals(Integer.valueOf(1), subject.apply("left", "right")); + assertEquals(Integer.valueOf(4), subject.apply("leettteft", "ritttght")); + assertEquals(Integer.valueOf(15), subject.apply("the same string", "the same string")); + } + + @Test(expected = IllegalArgumentException.class) + public void testGettingLongestCommonSubsequenceApplyNullNull() throws Exception { + subject.apply(null, null); + } + + @Test(expected = IllegalArgumentException.class) + public void testGettingLongestCommonSubsequenceApplyStringNull() throws Exception { + subject.apply(" ", null); + } + + @Test(expected = IllegalArgumentException.class) + public void testGettingLongestCommonSubsequenceApplyNullString() throws Exception { + subject.apply(null, "right"); } @Test - public void testLcsLengthArray() { - LongestCommonSubsequence lcs = new LongestCommonSubsequence(); - String left = "abab"; - String right = "abba"; - int[][] retVal = lcs.longestCommonSubstringLengthArray(left, right); - System.out.println("testLcsLengthArray:"); - for (int i=0; i <= left.length(); i++){ - for (int j=0; j <= right.length(); j++){ - System.out.print(retVal[i][j] + " "); - } - System.out.print("\n"); - } + public void testLongestCommonSubsequence() { + assertEquals("", subject.logestCommonSubsequence("", "")); + assertEquals("", subject.logestCommonSubsequence("left", "")); + assertEquals("", subject.logestCommonSubsequence("", "right")); + assertEquals("fog", subject.logestCommonSubsequence("frog", "fog")); + assertEquals("", subject.logestCommonSubsequence("fly", "ant")); + assertEquals("h", subject.logestCommonSubsequence("elephant", "hippo")); + assertEquals("ABC Corp", subject.logestCommonSubsequence("ABC Corporation", "ABC Corp")); + assertEquals("D H Enterprises Inc", subject.logestCommonSubsequence("D N H Enterprises Inc", "D & H Enterprises, Inc.")); + assertEquals("My Gym Childrens Fitness", subject.logestCommonSubsequence("My Gym Children's Fitness Center", "My Gym. Childrens Fitness")); + assertEquals("PENNSYLVNIA", subject.logestCommonSubsequence("PENNSYLVANIA", "PENNCISYLVNIA")); + assertEquals("t", subject.logestCommonSubsequence("left", "right")); + assertEquals("tttt", subject.logestCommonSubsequence("leettteft", "ritttght")); + assertEquals("the same string", subject.logestCommonSubsequence("the same string", "the same string")); + } + + @Test(expected = IllegalArgumentException.class) + public void testGettingLongestCommonSubsequenceNullNull() throws Exception { + subject.logestCommonSubsequence(null, null); + } + + @Test(expected = IllegalArgumentException.class) + public void testGettingLongestCommonSubsequenceStringNull() throws Exception { + subject.logestCommonSubsequence(" ", null); + } + + @Test(expected = IllegalArgumentException.class) + public void testGettingLongestCommonSubsequenceNullString() throws Exception { + subject.logestCommonSubsequence(null, "right"); } }
