This is an automated email from the ASF dual-hosted git repository.
ggregory pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/commons-codec.git
The following commit(s) were added to refs/heads/master by this push:
new 2cdfac1a [CODEC-317] ColognePhonetic can create duplicate consecutive
codes in some cases.
2cdfac1a is described below
commit 2cdfac1a8e34ffba32603d97d81173158b16ba04
Author: Gary Gregory <[email protected]>
AuthorDate: Mon Feb 16 18:49:17 2026 -0500
[CODEC-317] ColognePhonetic can create duplicate consecutive codes in
some cases.
---
src/changes/changes.xml | 1 +
.../commons/codec/language/ColognePhonetic.java | 35 ++++++++++++----------
.../codec/language/ColognePhoneticTest.java | 10 +++----
3 files changed, 25 insertions(+), 21 deletions(-)
diff --git a/src/changes/changes.xml b/src/changes/changes.xml
index 2bdba713..886426a4 100644
--- a/src/changes/changes.xml
+++ b/src/changes/changes.xml
@@ -46,6 +46,7 @@ The <action> type attribute can be add,update,fix,remove.
<release version="1.22.0" date="YYYY-MM-DD" description="This is a feature
and maintenance release. Java 8 or later is required.">
<!-- FIX -->
<action type="fix" dev="ggregory" due-to="Shalu Jha, Andrey, Gary
Gregory" issue="CODEC-249">Fix Incorrect transform of CH digraph according
Metaphone basic rules #423.</action>
+ <action type="fix" dev="ggregory" due-to="DRUser123, Shalu Jha, Gary
Gregory" issue="CODEC-317">ColognePhonetic can create duplicate consecutive
codes in some cases.</action>
<!-- ADD -->
<action type="add" dev="ggregory" due-to="Inkeet, Gary Gregory, Wolff
Bock von Wuelfingen" issue="CODEC-326">Add Base58 support.</action>
<action type="add" dev="ggregory" due-to="Gary Gregory">Add
BaseNCodecInputStream.AbstracBuilder.setByteArray(byte[]).</action>
diff --git
a/src/main/java/org/apache/commons/codec/language/ColognePhonetic.java
b/src/main/java/org/apache/commons/codec/language/ColognePhonetic.java
index debcb219..b5708ffa 100644
--- a/src/main/java/org/apache/commons/codec/language/ColognePhonetic.java
+++ b/src/main/java/org/apache/commons/codec/language/ColognePhonetic.java
@@ -270,11 +270,15 @@ public class ColognePhonetic implements StringEncoder {
* @param code the code to store.
*/
public void put(final char code) {
- if (code != CHAR_IGNORE && lastCode != code && (code != '0' ||
length == 0)) {
+ final boolean accept = code != CHAR_IGNORE;
+ final boolean nonZ = code != '0';
+ if (accept && lastCode != code && (nonZ || length == 0)) {
data[length] = code;
length++;
}
- lastCode = code;
+ if (nonZ && accept) {
+ lastCode = code;
+ }
}
}
// Predefined char arrays for better performance and less GC load
@@ -398,8 +402,8 @@ public class ColognePhonetic implements StringEncoder {
@Override
public Object encode(final Object object) throws EncoderException {
if (!(object instanceof String)) {
- throw new EncoderException("This method's parameter was expected
to be of the type " + String.class.getName() + ". But actually it was of the
type "
- + object.getClass().getName() + ".");
+ throw new EncoderException(String.format("This method's parameter
was expected to be of the type %s. But actually it was of the type %s.",
+ String.class.getName(), object.getClass().getName()));
}
return encode((String) object);
}
@@ -434,20 +438,19 @@ public class ColognePhonetic implements StringEncoder {
private char[] preprocess(final String text) {
// This converts German small sharp s (Eszett) to SS
final char[] chrs = text.toUpperCase(Locale.GERMAN).toCharArray();
-
for (int index = 0; index < chrs.length; index++) {
switch (chrs[index]) {
- case '\u00C4': // capital A, umlaut mark
- chrs[index] = 'A';
- break;
- case '\u00DC': // capital U, umlaut mark
- chrs[index] = 'U';
- break;
- case '\u00D6': // capital O, umlaut mark
- chrs[index] = 'O';
- break;
- default:
- break;
+ case '\u00C4': // capital A, umlaut mark
+ chrs[index] = 'A';
+ break;
+ case '\u00DC': // capital U, umlaut mark
+ chrs[index] = 'U';
+ break;
+ case '\u00D6': // capital O, umlaut mark
+ chrs[index] = 'O';
+ break;
+ default:
+ break;
}
}
return chrs;
diff --git
a/src/test/java/org/apache/commons/codec/language/ColognePhoneticTest.java
b/src/test/java/org/apache/commons/codec/language/ColognePhoneticTest.java
index f04f7406..bab6d6a9 100644
--- a/src/test/java/org/apache/commons/codec/language/ColognePhoneticTest.java
+++ b/src/test/java/org/apache/commons/codec/language/ColognePhoneticTest.java
@@ -162,14 +162,14 @@ class ColognePhoneticTest extends
AbstractStringEncoderTest<ColognePhonetic> {
Arguments.arguments("weber", "317"),
Arguments.arguments("wagner", "3467"),
Arguments.arguments("becker", "147"),
- Arguments.arguments("hoffmann", "0366"),
+ Arguments.arguments("hoffmann", "036"),
Arguments.arguments("sch\u00C4fer", "837"), // schÄfer - why upper
case A-umlaut ?
Arguments.arguments("sch\u00e4fer", "837"), // schäfer - add
equivalent lower-case
Arguments.arguments("Breschnew", "17863"),
Arguments.arguments("Wikipedia", "3412"),
Arguments.arguments("peter", "127"),
Arguments.arguments("pharma", "376"),
- Arguments.arguments("m\u00f6nchengladbach", "664645214"), //
mönchengladbach
+ Arguments.arguments("m\u00f6nchengladbach", "64645214"), //
mönchengladbach
Arguments.arguments("deutsch", "28"),
Arguments.arguments("deutz", "28"),
Arguments.arguments("hamburg", "06174"),
@@ -181,9 +181,9 @@ class ColognePhoneticTest extends
AbstractStringEncoderTest<ColognePhonetic> {
Arguments.arguments("matsch", "68"),
Arguments.arguments("matz", "68"),
Arguments.arguments("Arbeitsamt", "071862"),
- Arguments.arguments("Eberhard", "01772"),
- Arguments.arguments("Eberhardt", "01772"),
- Arguments.arguments("Celsius", "8588"),
+ Arguments.arguments("Eberhard", "0172"),
+ Arguments.arguments("Eberhardt", "0172"),
+ Arguments.arguments("Celsius", "858"),
Arguments.arguments("Ace", "08"),
Arguments.arguments("shch", "84"), // CODEC-254
Arguments.arguments("xch", "484"), // CODEC-255