This is an automated email from the ASF dual-hosted git repository.
ggregory pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/commons-codec.git
The following commit(s) were added to refs/heads/master by this push:
new 047d2472 CODEC-312: Fix possible StringIndexOutOfBoundException thrown
by MatchRatingApproachEncoder.encode() method (#220)
047d2472 is described below
commit 047d24724c4aaae158331cfef0de0a9d5df2686e
Author: Arthur Chan <[email protected]>
AuthorDate: Sat Nov 25 03:05:19 2023 +0000
CODEC-312: Fix possible StringIndexOutOfBoundException thrown by
MatchRatingApproachEncoder.encode() method (#220)
* CODEC-312: Fix possible StringIndexOutOfBoundException
Signed-off-by: Arthur Chan <[email protected]>
* CODEC-312: Add unit test
Signed-off-by: Arthur Chan <[email protected]>
* Remove unmaintained comments
---------
Signed-off-by: Arthur Chan <[email protected]>
Co-authored-by: Gary Gregory <[email protected]>
---
.../codec/language/MatchRatingApproachEncoder.java | 10 ++++++++
.../language/MatchRatingApproachEncoderTest.java | 28 +++++++++++-----------
2 files changed, 24 insertions(+), 14 deletions(-)
diff --git
a/src/main/java/org/apache/commons/codec/language/MatchRatingApproachEncoder.java
b/src/main/java/org/apache/commons/codec/language/MatchRatingApproachEncoder.java
index 86f08437..d871cc48 100644
---
a/src/main/java/org/apache/commons/codec/language/MatchRatingApproachEncoder.java
+++
b/src/main/java/org/apache/commons/codec/language/MatchRatingApproachEncoder.java
@@ -126,10 +126,20 @@ public class MatchRatingApproachEncoder implements
StringEncoder {
// Preprocessing
name = cleanName(name);
+ // Bulletproof if name becomes empty after cleanName(name)
+ if (SPACE.equals(name) || name.isEmpty()) {
+ return EMPTY;
+ }
+
// BEGIN: Actual encoding part of the algorithm...
// 1. Delete all vowels unless the vowel begins the word
name = removeVowels(name);
+ // Bulletproof if name becomes empty after removeVowels(name)
+ if (SPACE.equals(name) || name.isEmpty()) {
+ return EMPTY;
+ }
+
// 2. Remove second consonant from any double consonant
name = removeDoubleConsonants(name);
diff --git
a/src/test/java/org/apache/commons/codec/language/MatchRatingApproachEncoderTest.java
b/src/test/java/org/apache/commons/codec/language/MatchRatingApproachEncoderTest.java
index bf7508e8..7f10ecd8 100644
---
a/src/test/java/org/apache/commons/codec/language/MatchRatingApproachEncoderTest.java
+++
b/src/test/java/org/apache/commons/codec/language/MatchRatingApproachEncoderTest.java
@@ -35,8 +35,6 @@ import org.junit.jupiter.api.Test;
*/
public class MatchRatingApproachEncoderTest extends
AbstractStringEncoderTest<MatchRatingApproachEncoder> {
- // ********** BEGIN REGION - TEST SUPPORT METHODS
-
@Override
protected MatchRatingApproachEncoder createStringEncoder() {
return new MatchRatingApproachEncoder();
@@ -248,10 +246,6 @@ public class MatchRatingApproachEncoderTest extends
AbstractStringEncoderTest<Ma
assertTrue(this.getStringEncoder().isEncodeEquals("O'Sullivan", "Ó '
Súilleabháin"));
}
- // ***** END REGION - TEST SUPPORT METHODS
-
- // ***** BEGIN REGION - TEST GET MRA ENCODING
-
@Test
public final void
testCompare_Surname_PRZEMYSL_PSHEMESHIL_SuccessfullyMatched() {
assertTrue(this.getStringEncoder().isEncodeEquals(" P rz e m y s l", "
P sh e m e sh i l"));
@@ -297,10 +291,6 @@ public class MatchRatingApproachEncoderTest extends
AbstractStringEncoderTest<Ma
assertTrue(this.getStringEncoder().isEncodeEquals("Zach", "Zacharia"));
}
- // ***** END REGION - TEST GET MRA ENCODING
-
- // ***** BEGIN REGION - TEST GET MRA COMPARISONS
-
@Test
public final void testCompareNameNullSpace_ReturnsFalseSuccessfully() {
assertFalse(getStringEncoder().isEncodeEquals(null, " "));
@@ -433,8 +423,6 @@ public class MatchRatingApproachEncoderTest extends
AbstractStringEncoderTest<Ma
assertFalse(this.getStringEncoder().isEncodeEquals("", "test"));
}
- // **** BEGIN YIDDISH/SLAVIC SECTION ****
-
@Test
public final void
testIsEncodeEquals_CornerCase_FirstNameNull_ReturnsFalse() {
assertFalse(this.getStringEncoder().isEncodeEquals(null, "test"));
@@ -470,8 +458,6 @@ public class MatchRatingApproachEncoderTest extends
AbstractStringEncoderTest<Ma
assertTrue(this.getStringEncoder().isVowel("I"));
}
- // **** END YIDDISH/SLAVIC SECTION ****
-
@Test
public final void testIsVowel_SmallD_ReturnsFalse() {
assertFalse(this.getStringEncoder().isVowel("d"));
@@ -519,4 +505,18 @@ public class MatchRatingApproachEncoderTest extends
AbstractStringEncoderTest<Ma
// ***** END REGION - TEST GET MRA COMPARISONS
+ @Test
+ public final void testPunctuationOnly() {
+ assertEquals(this.getStringEncoder().encode(".,-"), "");
+ }
+
+ @Test
+ public final void testVowelOnly() {
+ assertEquals(this.getStringEncoder().encode("aeiouAEIOU"), "A");
+ }
+
+ @Test
+ public final void testVowelAndPunctuationOnly() {
+ assertEquals(this.getStringEncoder().encode("uoiea.,-AEIOU"), "U");
+ }
}