This is an automated email from the ASF dual-hosted git repository.
ggregory pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/commons-codec.git
The following commit(s) were added to refs/heads/master by this push:
new 1f908b26 CODEC-315: Fix possible IndexOutOfBoundException (#223)
1f908b26 is described below
commit 1f908b266411c28862c3952f3a3002ed19837b1b
Author: Arthur Chan <[email protected]>
AuthorDate: Sat Nov 25 01:37:57 2023 +0000
CODEC-315: Fix possible IndexOutOfBoundException (#223)
Signed-off-by: Arthur Chan <[email protected]>
---
.../commons/codec/language/bm/PhoneticEngine.java | 4 ++--
.../commons/codec/language/bm/PhoneticEngineTest.java | 18 ++++++++++++++++++
2 files changed, 20 insertions(+), 2 deletions(-)
diff --git
a/src/main/java/org/apache/commons/codec/language/bm/PhoneticEngine.java
b/src/main/java/org/apache/commons/codec/language/bm/PhoneticEngine.java
index ef69a2a4..b98893cf 100644
--- a/src/main/java/org/apache/commons/codec/language/bm/PhoneticEngine.java
+++ b/src/main/java/org/apache/commons/codec/language/bm/PhoneticEngine.java
@@ -409,7 +409,7 @@ public class PhoneticEngine {
switch (this.nameType) {
case SEPHARDIC:
words.forEach(aWord -> {
- final String[] parts = aWord.split("'");
+ final String[] parts = aWord.split("'", -1);
words2.add(parts[parts.length - 1]);
});
words2.removeAll(NAME_PREFIXES.get(this.nameType));
@@ -431,7 +431,7 @@ public class PhoneticEngine {
} else if (words2.size() == 1) {
// not a multi-word name
input = words.iterator().next();
- } else {
+ } else if (!words2.isEmpty()) {
// encode each word in a multi-word name separately (normally used
for approx matches)
final StringBuilder result = new StringBuilder();
words2.forEach(word -> result.append("-").append(encode(word)));
diff --git
a/src/test/java/org/apache/commons/codec/language/bm/PhoneticEngineTest.java
b/src/test/java/org/apache/commons/codec/language/bm/PhoneticEngineTest.java
index 6725c492..1a7c2117 100644
--- a/src/test/java/org/apache/commons/codec/language/bm/PhoneticEngineTest.java
+++ b/src/test/java/org/apache/commons/codec/language/bm/PhoneticEngineTest.java
@@ -48,6 +48,15 @@ public class PhoneticEngineTest {
);
}
+ public static Stream<Arguments> invalidData() {
+ return Stream.of(
+ Arguments.of("bar", "bar|bor|var|vor",
NameType.ASHKENAZI, RuleType.APPROX, Boolean.FALSE, TEN),
+ Arguments.of("al", "|al", NameType.SEPHARDIC,
RuleType.APPROX, Boolean.FALSE, TEN),
+ Arguments.of("da", "da|di", NameType.GENERIC,
RuleType.EXACT, Boolean.FALSE, TEN),
+ Arguments.of("'''", "", NameType.SEPHARDIC,
RuleType.APPROX, Boolean.FALSE, TEN)
+ );
+ }
+
// TODO Identify if there is a need to an
assertTimeout(Duration.ofMillis(10000L) in some point, since this method was
marked as @Test(timeout = 10000L)
@ParameterizedTest
@MethodSource("data")
@@ -70,4 +79,13 @@ public class PhoneticEngineTest {
}
}
}
+
+ @ParameterizedTest
+ @MethodSource("invalidData")
+ public void testInvalidEncode(final String input, final String
phoneticExpected, final NameType nameType,
+ final RuleType ruleType, final boolean
concat, final int maxPhonemes) {
+ final PhoneticEngine engine = new PhoneticEngine(nameType, ruleType,
concat, maxPhonemes);
+
+ assertEquals(engine.encode(input), phoneticExpected);
+ }
}