This is an automated email from the ASF dual-hosted git repository.
ggregory pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/commons-codec.git
The following commit(s) were added to refs/heads/master by this push:
new e352a9b1 [CODEC-249] Fix Incorrect transform of CH digraph according
Metaphone basic rules (#423)
e352a9b1 is described below
commit e352a9b11f47e21ae9ce069e12dab67455dd185a
Author: Shalu Jha <[email protected]>
AuthorDate: Fri Feb 13 00:44:23 2026 +0530
[CODEC-249] Fix Incorrect transform of CH digraph according Metaphone basic
rules (#423)
* [CODEC-249] Fix Incorrect transform of CH digraph according Metaphone
basic rules
* [CODEC-249] Add. test for Metaphone CH character translation with max
code length of 5
* Remove blank lines.
---------
Co-authored-by: Gary Gregory <[email protected]>
---
.../apache/commons/codec/language/Metaphone.java | 16 ++++++----------
.../commons/codec/language/MetaphoneTest.java | 21 +++++++++++++++++++--
2 files changed, 25 insertions(+), 12 deletions(-)
diff --git a/src/main/java/org/apache/commons/codec/language/Metaphone.java
b/src/main/java/org/apache/commons/codec/language/Metaphone.java
index 53b5960b..429bc2a9 100644
--- a/src/main/java/org/apache/commons/codec/language/Metaphone.java
+++ b/src/main/java/org/apache/commons/codec/language/Metaphone.java
@@ -246,7 +246,11 @@ public class Metaphone implements StringEncoder {
if (isPreviousChar(local, n, 'S') && !isLastChar(wdsz, n)
&& FRONTV.indexOf(local.charAt(n + 1)) >= 0) {
break;
}
- if (regionMatch(local, n, "CIA")) { // "CIA" -> X
+ if (isPreviousChar(local, n, 'S') && isNextChar(local, n,
'H')) { // SCH->sk
+ code.append('K');
+ break;
+ }
+ if (regionMatch(local, n, "CIA") || isNextChar(local, n,
'H')) { // "CIA" -> X or CH -> X
code.append('X');
break;
}
@@ -254,15 +258,7 @@ public class Metaphone implements StringEncoder {
code.append('S');
break; // CI,CE,CY -> S
}
- if (isPreviousChar(local, n, 'S') && isNextChar(local, n,
'H')) { // SCH->sk
- code.append('K');
- break;
- }
- if (!isNextChar(local, n, 'H') || n == 0 && wdsz >= 3 &&
isVowel(local, 2)) { // CH consonant -> K consonant
- code.append('K');
- } else {
- code.append('X'); // CHvowel -> X
- }
+ code.append('K'); // default C -> K
break;
case 'D':
if (!isLastChar(wdsz, n + 1) && isNextChar(local, n, 'G')
&& FRONTV.indexOf(local.charAt(n + 2)) >= 0) { // DGE DGI DGY -> J
diff --git a/src/test/java/org/apache/commons/codec/language/MetaphoneTest.java
b/src/test/java/org/apache/commons/codec/language/MetaphoneTest.java
index 26dd0cc1..a5f43807 100644
--- a/src/test/java/org/apache/commons/codec/language/MetaphoneTest.java
+++ b/src/test/java/org/apache/commons/codec/language/MetaphoneTest.java
@@ -272,11 +272,28 @@ class MetaphoneTest extends
AbstractStringEncoderTest<Metaphone> {
@Test
void testTranslateOfSCHAndCH() {
+ assertEquals("SNS", getStringEncoder().metaphone("SCIENCE"));
+ assertEquals("SN", getStringEncoder().metaphone("SCENE"));
+ assertEquals("S", getStringEncoder().metaphone("SCY"));
assertEquals("SKTL", getStringEncoder().metaphone("SCHEDULE"));
assertEquals("SKMT", getStringEncoder().metaphone("SCHEMATIC"));
-
- assertEquals("KRKT", getStringEncoder().metaphone("CHARACTER"));
+ assertEquals("TSKR", getStringEncoder().metaphone("DISCHARGE"));
+ assertEquals("EX", getStringEncoder().metaphone("ECHO"));
assertEquals("TX", getStringEncoder().metaphone("TEACH"));
+ assertEquals("XR", getStringEncoder().metaphone("CHERI"));
+ assertEquals("XP", getStringEncoder().metaphone("CHIP"));
+ assertEquals("XRST", getStringEncoder().metaphone("CHRIST"));
+ assertEquals("X", getStringEncoder().metaphone("CIAO"));
+ assertEquals("ST", getStringEncoder().metaphone("CITY"));
+ assertEquals("KT", getStringEncoder().metaphone("CAT"));
+
+ }
+
+ @Test
+ void testTranslateOfCHCharacterWithMaxCodeLenFive() {
+ final Metaphone metaphone = new Metaphone();
+ metaphone.setMaxCodeLen(5);
+ assertEquals("XRKTR", metaphone.metaphone("CHARACTER"));
}
@Test