Author: ggregory
Date: Sun Mar 22 18:48:52 2015
New Revision: 1668441
URL: http://svn.apache.org/r1668441
Log:
[CODEC-199] Bug in HW rule in Soundex.
Modified:
commons/proper/codec/trunk/src/changes/changes.xml
commons/proper/codec/trunk/src/main/java/org/apache/commons/codec/language/Soundex.java
commons/proper/codec/trunk/src/test/java/org/apache/commons/codec/language/SoundexTest.java
Modified: commons/proper/codec/trunk/src/changes/changes.xml
URL:
http://svn.apache.org/viewvc/commons/proper/codec/trunk/src/changes/changes.xml?rev=1668441&r1=1668440&r2=1668441&view=diff
==============================================================================
--- commons/proper/codec/trunk/src/changes/changes.xml (original)
+++ commons/proper/codec/trunk/src/changes/changes.xml Sun Mar 22 18:48:52 2015
@@ -43,6 +43,7 @@ The <action> type attribute can be add,u
</properties>
<body>
<release version="1.11" date="DD MM 2014" description="Feature and fix
release.">
+ <action dev="ggregory" type="add" issue="CODEC-199" due-to="Yossi
Tamari">Bug in HW rule in Soundex</action>
<action dev="ggregory" type="add"
issue="CODEC-183">BaseNCodecOutputStream only supports writing EOF on
close()</action>
<action dev="ggregory" type="add" issue="CODEC-195">Support SHA-224 in
DigestUtils on Java 8</action>
<action dev="ggregory" type="add" issue="CODEC-194">Support
java.nio.ByteBuffer in org.apache.commons.codec.binary.Hex</action>
Modified:
commons/proper/codec/trunk/src/main/java/org/apache/commons/codec/language/Soundex.java
URL:
http://svn.apache.org/viewvc/commons/proper/codec/trunk/src/main/java/org/apache/commons/codec/language/Soundex.java?rev=1668441&r1=1668440&r2=1668441&view=diff
==============================================================================
---
commons/proper/codec/trunk/src/main/java/org/apache/commons/codec/language/Soundex.java
(original)
+++
commons/proper/codec/trunk/src/main/java/org/apache/commons/codec/language/Soundex.java
Sun Mar 22 18:48:52 2015
@@ -185,13 +185,14 @@ public class Soundex implements StringEn
final char mappedChar = this.map(str.charAt(index));
// HW rule check
if (index > 1 && mappedChar != '0') {
- final char hwChar = str.charAt(index - 1);
- if ('H' == hwChar || 'W' == hwChar) {
- final char preHWChar = str.charAt(index - 2);
- final char firstCode = this.map(preHWChar);
- if (firstCode == mappedChar || 'H' == preHWChar || 'W' ==
preHWChar) {
+ for (int i=index-1 ; i>=0 ; i--) {
+ final char prevChar = str.charAt(i);
+ if (this.map(prevChar)==mappedChar) {
return 0;
}
+ if ('H'!=prevChar && 'W'!=prevChar) {
+ break;
+ }
}
}
return mappedChar;
Modified:
commons/proper/codec/trunk/src/test/java/org/apache/commons/codec/language/SoundexTest.java
URL:
http://svn.apache.org/viewvc/commons/proper/codec/trunk/src/test/java/org/apache/commons/codec/language/SoundexTest.java?rev=1668441&r1=1668440&r2=1668441&view=diff
==============================================================================
---
commons/proper/codec/trunk/src/test/java/org/apache/commons/codec/language/SoundexTest.java
(original)
+++
commons/proper/codec/trunk/src/test/java/org/apache/commons/codec/language/SoundexTest.java
Sun Mar 22 18:48:52 2015
@@ -228,6 +228,8 @@ public class SoundexTest extends StringE
// for the F). It is not coded A-226.
Assert.assertEquals("A261",
this.getStringEncoder().encode("Ashcraft"));
Assert.assertEquals("A261",
this.getStringEncoder().encode("Ashcroft"));
+ Assert.assertEquals("Y330", this.getStringEncoder().encode("yehudit"));
+ Assert.assertEquals("Y330", this.getStringEncoder().encode("yhwdyt"));
}
/**
@@ -388,4 +390,17 @@ public class SoundexTest extends StringE
Assert.assertEquals("", this.getStringEncoder().encode("\u00f6"));
}
}
+
+ /**
+ * Tests example from
http://en.wikipedia.org/wiki/Soundex#American_Soundex as of 2015-03-22.
+ */
+ @Test
+ public void testWikipediaAmericanSoundex() {
+ Assert.assertEquals("R163", this.getStringEncoder().encode("Robert"));
+ Assert.assertEquals("R163", this.getStringEncoder().encode("Rupert"));
+ Assert.assertEquals("A261",
this.getStringEncoder().encode("Ashcraft"));
+ Assert.assertEquals("A261",
this.getStringEncoder().encode("Ashcroft"));
+ Assert.assertEquals("T522",
this.getStringEncoder().encode("Tymczak"));
+ Assert.assertEquals("P236",
this.getStringEncoder().encode("Pfister"));
+ }
}