Author: ggregory
Date: Thu Mar 26 04:10:43 2015
New Revision: 1669274
URL: http://svn.apache.org/r1669274
Log:
[CODEC-199] Bug in HW rule in Soundex. Applying 2nd version of the patch.
Modified:
commons/proper/codec/trunk/src/main/java/org/apache/commons/codec/language/Soundex.java
Modified:
commons/proper/codec/trunk/src/main/java/org/apache/commons/codec/language/Soundex.java
URL:
http://svn.apache.org/viewvc/commons/proper/codec/trunk/src/main/java/org/apache/commons/codec/language/Soundex.java?rev=1669274&r1=1669273&r2=1669274&view=diff
==============================================================================
---
commons/proper/codec/trunk/src/main/java/org/apache/commons/codec/language/Soundex.java
(original)
+++
commons/proper/codec/trunk/src/main/java/org/apache/commons/codec/language/Soundex.java
Thu Mar 26 04:10:43 2015
@@ -41,7 +41,7 @@ public class Soundex implements StringEn
*
* @see #US_ENGLISH_MAPPING
*/
- public static final String US_ENGLISH_MAPPING_STRING =
"01230120022455012623010202";
+ public static final String US_ENGLISH_MAPPING_STRING =
"0123012#02245501262301#202";
/**
* This is a default mapping of the 26 letters used in US English. A value
of <code>0</code> for a letter position
@@ -168,37 +168,6 @@ public class Soundex implements StringEn
}
/**
- * Used internally by the Soundex algorithm.
- *
- * Consonants from the same code group separated by W or H are treated as
one.
- *
- * @param str
- * the cleaned working string to encode (in upper case).
- * @param index
- * the character position to encode
- * @return Mapping code for a particular character
- * @throws IllegalArgumentException
- * if the character is not mapped
- */
- private char getMappingCode(final String str, final int index) {
- // map() throws IllegalArgumentException
- final char mappedChar = this.map(str.charAt(index));
- // HW rule check
- if (index > 1 && mappedChar != '0') {
- for (int i=index-1 ; i>=0 ; i--) {
- final char prevChar = str.charAt(i);
- if (this.map(prevChar)==mappedChar) {
- return 0;
- }
- if ('H'!=prevChar && 'W'!=prevChar) {
- break;
- }
- }
- }
- return mappedChar;
- }
-
- /**
* Returns the maxLength. Standard Soundex
*
* @deprecated This feature is not needed since the encoding size must be
constant. Will be removed in 2.0.
@@ -268,14 +237,14 @@ public class Soundex implements StringEn
char last, mapped;
int incount = 1, count = 1;
out[0] = str.charAt(0);
- // getMappingCode() throws IllegalArgumentException
- last = getMappingCode(str, 0);
+ // map() throws IllegalArgumentException
+ last = this.map(str.charAt(0));
while (incount < str.length() && count < out.length) {
- mapped = getMappingCode(str, incount++);
- if (mapped != 0) {
- if (mapped != '0' && mapped != last) {
- out[count++] = mapped;
- }
+ mapped = this.map(str.charAt(incount++));
+ if (mapped == '0') {
+ last = mapped;
+ } else if (mapped != '#' && mapped != last) {
+ out[count++] = mapped;
last = mapped;
}
}