Here is a patch for codec, which includes the following: 0. Added a function to Encoder - public String encode(byte[])
1. Added function to all classes that implement encoder. 2. Hex, encodes a byte[] to a hex string - 0xcbd342 -> "cbd342" 3. TestHex, a junit test for Hex class. 4. Some files in the codec package CRLF problems, patch resolves this. -------- Tim O'Brien Transolutions, Inc. 18 N Waukegan Road Lake Bluff, Il 60044 W 847-574-2143 F 847-234-3471 M 847-863-7045
Index: src/java/org/apache/commons/codec/Encoder.java =================================================================== RCS file: /home/cvspublic/jakarta-commons-sandbox/codec/src/java/org/apache/commons/codec/Encoder.java,v retrieving revision 1.2 diff -u -r1.2 Encoder.java --- src/java/org/apache/commons/codec/Encoder.java 18 Nov 2002 12:41:24 -0000 1.2 +++ src/java/org/apache/commons/codec/Encoder.java 1 Dec 2002 22:03:15 -0000 @@ -1,4 +1,16 @@ -/* ==================================================================== * The Apache Software License, Version 1.1 * * Copyright (c) 2002 The Apache Software Foundation. All rights * reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright +/* ==================================================================== + * The Apache Software License, Version 1.1 * + * Copyright (c) 2002 The Apache Software Foundation. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in * the documentation and/or other materials provided with the * distribution. @@ -31,4 +43,25 @@ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. - * ==================================================================== * * This software consists of voluntary contributions made by many * individuals on behalf of the Apache Software Foundation. For more * information on the Apache Software Foundation, please see * <http://www.apache.org/>. */ package org.apache.commons.codec; /** * Encoder is an interface, which is implemented by Soundex, * Metaphone, Soundex2, etc. * * @author [EMAIL PROTECTED] * @version $Revision: 1.2 $ $Date: 2002/11/18 12:41:24 $ */ public interface Encoder { String encode(String str); } + * ==================================================================== + * + * This software consists of voluntary contributions made by many + * individuals on behalf of the Apache Software Foundation. For more + * information on the Apache Software Foundation, please see + * <http://www.apache.org/>. + */ +package org.apache.commons.codec; + +/** + * Encoder is an interface, which is implemented by Soundex, + * Metaphone, Soundex2, etc. + * + * @author [EMAIL PROTECTED] + * @version $Revision: 1.2 $ $Date: 2002/11/18 12:41:24 $ + */ +public interface Encoder { + + String encode(String str); + String encode(byte[] bytes); + +} Index: src/java/org/apache/commons/codec/EncoderComparator.java =================================================================== RCS file: /home/cvspublic/jakarta-commons-sandbox/codec/src/java/org/apache/commons/codec/EncoderComparator.java,v retrieving revision 1.2 diff -u -r1.2 EncoderComparator.java --- src/java/org/apache/commons/codec/EncoderComparator.java 18 Nov 2002 12:41:24 -0000 1.2 +++ src/java/org/apache/commons/codec/EncoderComparator.java 1 Dec 2002 22:03:16 +-0000 @@ -1,2 +1,86 @@ -/* ==================================================================== * The Apache Software License, Version 1.1 * * Copyright (c) 2002 The Apache Software Foundation. All rights * reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in * the documentation and/or other materials provided with the * distribution. * * 3. The end-user documentation included with the redistribution, * if any, must include the following acknowledgment: * "This product includes software developed by the * Apache Software Foundation (http://www.apache.org/)." * Alternately, this acknowledgment may appear in the software itself, * if and wherever such third-party acknowledgments normally appear. * * 4. The names "Apache" and "Apache Software Foundation" and * "Apache Commons" must not be used to endorse or promote products * derived from this software without prior written permission. For * written permission, please contact [EMAIL PROTECTED] * * 5. Products derived from this software may not be called "Apache", * "Apache Turbine", nor may "Apache" appear in their name, without * prior written permission of the Apache Software Foundation. * * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * ==================================================================== * * This software consists of voluntary contributions made by many * individuals on behalf of the Apache Software Foundation. For more * information on the Apache Software Foundation, please see * <http://www.apache.org/>. */ package org.apache.commons.codec; -import java.util.Comparator; /** * Compare using an Encoder. * * @author [EMAIL PROTECTED] * @version $Revision: 1.2 $ $Date: 2002/11/18 12:41:24 $ */ public class EncoderComparator implements Comparator { private Encoder encoder; /** * Use the default soundex algorithm, US_ENGLISH. */ public EncoderComparator() { this(RefinedSoundex.US_ENGLISH); } /** * Use the provided soundex algorithm. */ public EncoderComparator(Encoder en) { this.encoder = en; } public int compare(Object o1, Object o2) { String s1 = encoder.encode(o1.toString()); String s2 = encoder.encode(o2.toString()); return s1.compareTo(s2); } } +/* ==================================================================== + * The Apache Software License, Version 1.1 * + * Copyright (c) 2002 The Apache Software Foundation. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * 3. The end-user documentation included with the redistribution, + * if any, must include the following acknowledgment: + * "This product includes software developed by the + * Apache Software Foundation (http://www.apache.org/)." + * Alternately, this acknowledgment may appear in the software itself, + * if and wherever such third-party acknowledgments normally appear. + * + * 4. The names "Apache" and "Apache Software Foundation" and + * "Apache Commons" must not be used to endorse or promote products + * derived from this software without prior written permission. For + * written permission, please contact [EMAIL PROTECTED] + * + * 5. Products derived from this software may not be called "Apache", + * "Apache Turbine", nor may "Apache" appear in their name, without + * prior written permission of the Apache Software Foundation. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF + * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * ==================================================================== + * + * This software consists of voluntary contributions made by many + * individuals on behalf of the Apache Software Foundation. For more + * information on the Apache Software Foundation, please see + * <http://www.apache.org/>. + */ +package org.apache.commons.codec; +import java.util.Comparator; + +/** + * Compare using an Encoder. + * + * @author [EMAIL PROTECTED] + * @version $Revision: 1.2 $ $Date: 2002/11/18 12:41:24 $ + */ +public class EncoderComparator implements Comparator { + + private Encoder encoder; + + /** + * Use the default soundex algorithm, US_ENGLISH. + */ + public EncoderComparator() { + this(RefinedSoundex.US_ENGLISH); + } + + /** + * Use the provided soundex algorithm. + */ + public EncoderComparator(Encoder en) { + this.encoder = en; + } + + public int compare(Object o1, Object o2) { + String s1 = encoder.encode(o1.toString()); + String s2 = encoder.encode(o2.toString()); + return s1.compareTo(s2); + } + +} Index: src/java/org/apache/commons/codec/Hex.java =================================================================== RCS file: src/java/org/apache/commons/codec/Hex.java diff -N src/java/org/apache/commons/codec/Hex.java --- /dev/null 1 Jan 1970 00:00:00 -0000 +++ src/java/org/apache/commons/codec/Hex.java 1 Dec 2002 22:03:18 -0000 @@ -0,0 +1,102 @@ +/* ==================================================================== + * The Apache Software License, Version 1.1 + * + * Copyright (c) 2002 The Apache Software Foundation. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * 3. The end-user documentation included with the redistribution, + * if any, must include the following acknowledgment: + * "This product includes software developed by the + * Apache Software Foundation (http://www.apache.org/)." + * Alternately, this acknowledgment may appear in the software itself, + * if and wherever such third-party acknowledgments normally appear. + * + * 4. The names "Apache" and "Apache Software Foundation" and + * "Apache Commons" must not be used to endorse or promote products + * derived from this software without prior written permission. For + * written permission, please contact [EMAIL PROTECTED] + * + * 5. Products derived from this software may not be called "Apache", + * "Apache Turbine", nor may "Apache" appear in their name, without + * prior written permission of the Apache Software Foundation. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF + * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * ==================================================================== + * + * This software consists of voluntary contributions made by many + * individuals on behalf of the Apache Software Foundation. For more + * information on the Apache Software Foundation, please see + * <http://www.apache.org/>. + */ + package org.apache.commons.codec; + +/** + * @author [EMAIL PROTECTED] + */ +public class Hex implements Encoder { + + private char[] hexDigits = { '0', '1', '2', '3', '4', '5', '6', '7', + '8', '9', 'a', 'b', 'c', 'd', 'e', 'f' }; + + public String toHex(byte[] pBytes) { + + StringBuffer sBuf = new StringBuffer(); + + for( int i = 0; i < pBytes.length; i++ ) { + + sBuf.append( hexDigits[ (((int) (pBytes[i] >> 4)) & 0x0f) ] ); + sBuf.append( hexDigits[ (((int) (pBytes[i] & 0x0f)) & 0x0f) ] ); + + } + + return( sBuf.toString() ); + + } + + + /** + * @see org.apache.commons.codec.Encoder#encode(String) + */ + public String encode(String str) { + if( str != null ) { + return toHex(str.getBytes()); + } else { + return null; + } + } + + /** + * @see org.apache.commons.codec.Encoder#encode(byte[]) + */ + public String encode(byte[] bytes) { + if( bytes != null ) { + return toHex( bytes ); + } else { + return null; + } + } + +} Index: src/java/org/apache/commons/codec/Metaphone.java =================================================================== RCS file: /home/cvspublic/jakarta-commons-sandbox/codec/src/java/org/apache/commons/codec/Metaphone.java,v retrieving revision 1.4 diff -u -r1.4 Metaphone.java --- src/java/org/apache/commons/codec/Metaphone.java 18 Nov 2002 12:41:24 -0000 1.4 +++ src/java/org/apache/commons/codec/Metaphone.java 1 Dec 2002 22:03:19 -0000 @@ -1,2 +1,309 @@ -/* ==================================================================== * The Apache Software License, Version 1.1 * * Copyright (c) 2001-2002 The Apache Software Foundation. All rights * reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in * the documentation and/or other materials provided with the * distribution. * * 3. The end-user documentation included with the redistribution, * if any, must include the following acknowledgment: * "This product includes software developed by the * Apache Software Foundation (http://www.apache.org/)." * Alternately, this acknowledgment may appear in the software itself, * if and wherever such third-party acknowledgments normally appear. * * 4. The names "Apache" and "Apache Software Foundation" and * "Apache Commons" must not be used to endorse or promote products * derived from this software without prior written permission. For * written permission, please contact [EMAIL PROTECTED] * * 5. Products derived from this software may not be called "Apache", * "Apache Turbine", nor may "Apache" appear in their name, without * prior written permission of the Apache Software Foundation. * * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * ==================================================================== * * This software consists of voluntary contributions made by many * individuals on behalf of the Apache Software Foundation. For more * information on the Apache Software Foundation, please see * <http://www.apache.org/>. */ package org.apache.commons.codec; /** * A class to generate phonetic code. * The initial Java implementation, William B. Brogden. December, 1997 * Permission given by wbrogden for code to be used anywhere. * * @see "Hanging on the Metaphone" by Lawrence Philips * <i>Computer Language</i> of Dec. 1990, p 39 * * @version $Revision: 1.4 $ $Date: 2002/11/18 12:41:24 $ * @author [EMAIL PROTECTED] * @author [EMAIL PROTECTED] * @author [EMAIL PROTECTED] */ public class Metaphone implements Encoder { - private String vowels = "AEIOU" ; private String frontv = "EIY" ; private String varson = "CSPTG" ; private int maxCodeLen = 4 ; public Metaphone() { super(); } /** * Find the metaphone value of a String. This is similar to the * soundex algorithm, but better at finding similar sounding words. * All input is converted to upper case. * Limitations: Input format is expected to be a single ASCII word * with only characters in the A - Z range, no punctuation or numbers. */ public String metaphone( String txt ){ int mtsz = 0 ; boolean hard = false ; if(( txt == null ) || ( txt.length() == 0 )) return "" ; // single character is itself if( txt.length() == 1 ) return txt.toUpperCase() ; char[] inwd = txt.toUpperCase().toCharArray() ; String tmpS ; StringBuffer local = new StringBuffer( 40 ); // manipulate StringBuffer code = new StringBuffer( 10 ) ; // output // handle initial 2 characters exceptions switch( inwd[0] ){ case 'K': case 'G' : case 'P' : /* looking for KN, etc*/ if( inwd[1] == 'N')local.append(inwd, 1, inwd.length - 1 ); else local.append( inwd ); break; case 'A': /* looking for AE */ if( inwd[1] == 'E' )local.append(inwd, 1, inwd.length - 1 ); else local.append( inwd ); break; case 'W' : /* looking for WR or WH */ if( inwd[1] == 'R' ){ // WR -> R local.append(inwd, 1, inwd.length - 1 ); break ; } if( inwd[1] == 'H'){ local.append(inwd, 1, inwd.length - 1 ); local.setCharAt( 0,'W'); // WH -> W } else local.append( inwd ); break; case 'X' : /* initial X becomes S */ inwd[0] = 'S' ;local.append( inwd ); break ; default : local.append( inwd ); } // now local has working string with initials fixed int wdsz = local.length(); int n = 0 ; while((mtsz < maxCodeLen ) && // max code size of 4 works well (n < wdsz ) ){ char symb = local.charAt(n) ; // remove duplicate letters except C if(( symb != 'C' ) && (n > 0 ) && ( local.charAt(n - 1 ) == symb )) n++ ; else{ // not dup switch( symb ){ case 'A' : case 'E' : case 'I' : case 'O' : case 'U' : if( n == 0 ) { code.append(symb );mtsz++; } break ; // only use vowel if leading char case 'B' : if( (n > 0 ) && !(n + 1 == wdsz ) && // not MB at end of word ( local.charAt(n - 1) == 'M')) { code.append(symb); } else code.append(symb); mtsz++ ; break ; case 'C' : // lots of C special cases /* discard if SCI, SCE or SCY */ if( ( n > 0 ) && ( local.charAt(n-1) == 'S' ) && ( n + 1 < wdsz ) && ( frontv.indexOf( local.charAt(n + 1)) >= 0 )){ break ;} tmpS = local.toString(); if( tmpS.indexOf("CIA", n ) == n ) { // "CIA" -> X code.append('X' ); mtsz++; break ; } if( ( n + 1 < wdsz ) && (frontv.indexOf( local.charAt(n+1) )>= 0 )){ code.append('S');mtsz++; break ; // CI,CE,CY -> S } if(( n > 0) && ( tmpS.indexOf("SCH",n-1 )== n-1 )){ // SCH->sk code.append('K') ; mtsz++;break ; } if( tmpS.indexOf("CH", n ) == n ){ // detect CH if((n == 0 ) && (wdsz >= 3 ) && // CH consonant -> K consonant (vowels.indexOf( local.charAt( 2) ) < 0 )){ code.append('K'); } else { code.append('X'); // CHvowel -> X } mtsz++; } else { code.append('K' );mtsz++; } break ; case 'D' : if(( n + 2 < wdsz )&& // DGE DGI DGY -> J ( local.charAt(n+1) == 'G' )&& (frontv.indexOf( local.charAt(n+2) )>= 0)){ code.append('J' ); n += 2 ; } else { code.append( 'T' ); } mtsz++; break ; case 'G' : // GH silent at end or before consonant if(( n + 2 == wdsz )&& (local.charAt(n+1) == 'H' )) break ; if(( n + 2 < wdsz ) && (local.charAt(n+1) == 'H' )&& (vowels.indexOf( local.charAt(n+2)) < 0 )) break ; tmpS = local.toString(); if((n > 0) && ( tmpS.indexOf("GN", n ) == n)|| ( tmpS.indexOf("GNED",n) == n )) break ; // silent G if(( n > 0 ) && (local.charAt(n-1) == 'G')) hard = true ; else hard = false ; if((n+1 < wdsz) && (frontv.indexOf( local.charAt(n+1) ) >= 0 )&& (!hard) ) code.append( 'J' ); else code.append('K'); mtsz++; break ; case 'H': if( n + 1 == wdsz ) break ; // terminal H if((n > 0) && (varson.indexOf( local.charAt(n-1)) >= 0)) break ; if( vowels.indexOf( local.charAt(n+1)) >=0 ){ code.append('H') ; mtsz++;// Hvowel } break; case 'F': case 'J' : case 'L' : case 'M': case 'N' : case 'R' : code.append( symb ); mtsz++; break ; case 'K' : if( n > 0 ){ // not initial if( local.charAt( n -1) != 'C' ) { code.append(symb ); } } else code.append( symb ); // initial K mtsz++ ; break ; case 'P' : if((n + 1 < wdsz) && // PH -> F (local.charAt( n+1) == 'H'))code.append('F'); else code.append( symb ); mtsz++; break ; case 'Q' : code.append('K' );mtsz++; break ; case 'S' : tmpS = local.toString(); if((tmpS.indexOf("SH", n )== n) || (tmpS.indexOf("SIO",n )== n) || (tmpS.indexOf("SIA",n )== n)) code.append('X'); else code.append( 'S' ); mtsz++ ; break ; case 'T' : tmpS = local.toString(); // TIA TIO -> X if((tmpS.indexOf("TIA",n )== n)|| (tmpS.indexOf("TIO",n )== n) ){ code.append('X'); mtsz++; break; } if( tmpS.indexOf("TCH",n )==n) break; // substitute numeral 0 for TH (resembles theta after all) if( tmpS.indexOf("TH", n )==n) code.append('0'); else code.append( 'T' ); mtsz++ ; break ; case 'V' : code.append('F'); mtsz++;break ; case 'W' : case 'Y' : // silent if not followed by vowel if((n+1 < wdsz) && (vowels.indexOf( local.charAt(n+1))>=0)){ code.append( symb );mtsz++; } break ; case 'X' : code.append('K'); code.append('S');mtsz += 2; break ; case 'Z' : code.append('S'); mtsz++; break ; } // end switch n++ ; } // end else from symb != 'C' if( mtsz > 4 )code.setLength( 4); } return code.toString(); } // end static method metaPhone() public String encode(String pString) { return( metaphone( pString ) ); } /** * Are the metaphones of two strings the same. */ public boolean isMetaphoneEqual(String str1, String str2) { return metaphone(str1).equals(metaphone(str2)); } /** * Returns the maxCodeLen. * @return int */ public int getMaxCodeLen() { return maxCodeLen; } /** * Sets the maxCodeLen. * @param maxCodeLen The maxCodeLen to set */ public void setMaxCodeLen(int maxCodeLen) { this.maxCodeLen = maxCodeLen; } } \ No newline at end of file +/* ==================================================================== + * The Apache Software License, Version 1.1 * + * Copyright (c) 2002 The Apache Software Foundation. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * 3. The end-user documentation included with the redistribution, + * if any, must include the following acknowledgment: + * "This product includes software developed by the + * Apache Software Foundation (http://www.apache.org/)." + * Alternately, this acknowledgment may appear in the software itself, + * if and wherever such third-party acknowledgments normally appear. + * + * 4. The names "Apache" and "Apache Software Foundation" and + * "Apache Commons" must not be used to endorse or promote products + * derived from this software without prior written permission. For + * written permission, please contact [EMAIL PROTECTED] + * + * 5. Products derived from this software may not be called "Apache", + * "Apache Turbine", nor may "Apache" appear in their name, without + * prior written permission of the Apache Software Foundation. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF + * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * ==================================================================== + * + * This software consists of voluntary contributions made by many + * individuals on behalf of the Apache Software Foundation. For more + * information on the Apache Software Foundation, please see + * <http://www.apache.org/>. + */ +package org.apache.commons.codec; + +/** + * A class to generate phonetic code. + * The initial Java implementation, William B. Brogden. December, 1997 + * Permission given by wbrogden for code to be used anywhere. + * + * @see "Hanging on the Metaphone" by Lawrence Philips + * <i>Computer Language</i> of Dec. 1990, p 39 + * + * @version $Revision: 1.4 $ $Date: 2002/11/18 12:41:24 $ + * @author [EMAIL PROTECTED] + * @author [EMAIL PROTECTED] + * @author [EMAIL PROTECTED] + */ +public class Metaphone implements Encoder { + private String vowels = "AEIOU" ; + private String frontv = "EIY" ; + private String varson = "CSPTG" ; + + private int maxCodeLen = 4 ; + + public Metaphone() { + super(); + } + + /** + * Find the metaphone value of a String. This is similar to the + * soundex algorithm, but better at finding similar sounding words. + * All input is converted to upper case. + * Limitations: Input format is expected to be a single ASCII word + * with only characters in the A - Z range, no punctuation or numbers. + */ + public String metaphone( String txt ){ + int mtsz = 0 ; + boolean hard = false ; + if(( txt == null ) || + ( txt.length() == 0 )) return "" ; + // single character is itself + if( txt.length() == 1 ) return txt.toUpperCase() ; + + char[] inwd = txt.toUpperCase().toCharArray() ; + + String tmpS ; + StringBuffer local = new StringBuffer( 40 ); // manipulate + StringBuffer code = new StringBuffer( 10 ) ; // output + // handle initial 2 characters exceptions + switch( inwd[0] ){ + case 'K': case 'G' : case 'P' : /* looking for KN, etc*/ + if( inwd[1] == 'N')local.append(inwd, 1, inwd.length - 1 ); + else local.append( inwd ); + break; + case 'A': /* looking for AE */ + if( inwd[1] == 'E' )local.append(inwd, 1, inwd.length - 1 ); + else local.append( inwd ); + break; + case 'W' : /* looking for WR or WH */ + if( inwd[1] == 'R' ){ // WR -> R + local.append(inwd, 1, inwd.length - 1 ); break ; + } + if( inwd[1] == 'H'){ + local.append(inwd, 1, inwd.length - 1 ); + local.setCharAt( 0,'W'); // WH -> W + } + else local.append( inwd ); + break; + case 'X' : /* initial X becomes S */ + inwd[0] = 'S' ;local.append( inwd ); + break ; + default : + local.append( inwd ); + } // now local has working string with initials fixed + int wdsz = local.length(); + int n = 0 ; + while((mtsz < maxCodeLen ) && // max code size of 4 works well + (n < wdsz ) ){ + char symb = local.charAt(n) ; + // remove duplicate letters except C + if(( symb != 'C' ) && + (n > 0 ) && ( local.charAt(n - 1 ) == symb )) n++ ; + else{ // not dup + switch( symb ){ + case 'A' : case 'E' : case 'I' : case 'O' : case 'U' : + if( n == 0 ) { code.append(symb );mtsz++; + } + break ; // only use vowel if leading char + case 'B' : + if( (n > 0 ) && + !(n + 1 == wdsz ) && // not MB at end of word + ( local.charAt(n - 1) == 'M')) { + code.append(symb); + } + else code.append(symb); + mtsz++ ; + break ; + case 'C' : // lots of C special cases + /* discard if SCI, SCE or SCY */ + if( ( n > 0 ) && + ( local.charAt(n-1) == 'S' ) && + ( n + 1 < wdsz ) && + ( frontv.indexOf( local.charAt(n + 1)) >= 0 )){ break ;} + tmpS = local.toString(); + if( tmpS.indexOf("CIA", n ) == n ) { // "CIA" -> X + code.append('X' ); mtsz++; break ; + } + if( ( n + 1 < wdsz ) && + (frontv.indexOf( local.charAt(n+1) )>= 0 )){ + code.append('S');mtsz++; break ; // CI,CE,CY -> S + } + if(( n > 0) && + ( tmpS.indexOf("SCH",n-1 )== n-1 )){ // SCH->sk + code.append('K') ; mtsz++;break ; + } + if( tmpS.indexOf("CH", n ) == n ){ // detect CH + if((n == 0 ) && + (wdsz >= 3 ) && // CH consonant -> K consonant + (vowels.indexOf( local.charAt( 2) ) < 0 )){ + code.append('K'); + } + else { code.append('X'); // CHvowel -> X + } + mtsz++; + } + else { code.append('K' );mtsz++; + } + break ; + case 'D' : + if(( n + 2 < wdsz )&& // DGE DGI DGY -> J + ( local.charAt(n+1) == 'G' )&& + (frontv.indexOf( local.charAt(n+2) )>= 0)){ + code.append('J' ); n += 2 ; + } + else { code.append( 'T' ); + } + mtsz++; + break ; + case 'G' : // GH silent at end or before consonant + if(( n + 2 == wdsz )&& + (local.charAt(n+1) == 'H' )) break ; + if(( n + 2 < wdsz ) && + (local.charAt(n+1) == 'H' )&& + (vowels.indexOf( local.charAt(n+2)) < 0 )) break ; + tmpS = local.toString(); + if((n > 0) && + ( tmpS.indexOf("GN", n ) == n)|| + ( tmpS.indexOf("GNED",n) == n )) break ; // silent G + if(( n > 0 ) && + (local.charAt(n-1) == 'G')) hard = true ; + else hard = false ; + if((n+1 < wdsz) && + (frontv.indexOf( local.charAt(n+1) ) >= 0 )&& + (!hard) ) code.append( 'J' ); + else code.append('K'); + mtsz++; + break ; + case 'H': + if( n + 1 == wdsz ) break ; // terminal H + if((n > 0) && + (varson.indexOf( local.charAt(n-1)) >= 0)) break ; + if( vowels.indexOf( local.charAt(n+1)) >=0 ){ + code.append('H') ; mtsz++;// Hvowel + } + break; + case 'F': case 'J' : case 'L' : + case 'M': case 'N' : case 'R' : + code.append( symb ); mtsz++; break ; + case 'K' : + if( n > 0 ){ // not initial + if( local.charAt( n -1) != 'C' ) { + code.append(symb ); + } + } + else code.append( symb ); // initial K + mtsz++ ; + break ; + case 'P' : + if((n + 1 < wdsz) && // PH -> F + (local.charAt( n+1) == 'H'))code.append('F'); + else code.append( symb ); + mtsz++; + break ; + case 'Q' : + code.append('K' );mtsz++; break ; + case 'S' : + tmpS = local.toString(); + if((tmpS.indexOf("SH", n )== n) || + (tmpS.indexOf("SIO",n )== n) || + (tmpS.indexOf("SIA",n )== n)) code.append('X'); + else code.append( 'S' ); + mtsz++ ; + break ; + case 'T' : + tmpS = local.toString(); // TIA TIO -> X + if((tmpS.indexOf("TIA",n )== n)|| + (tmpS.indexOf("TIO",n )== n) ){ + code.append('X'); mtsz++; break; + } + if( tmpS.indexOf("TCH",n )==n) break; + // substitute numeral 0 for TH (resembles theta after all) + if( tmpS.indexOf("TH", n )==n) code.append('0'); + else code.append( 'T' ); + mtsz++ ; + break ; + case 'V' : + code.append('F'); mtsz++;break ; + case 'W' : case 'Y' : // silent if not followed by vowel + if((n+1 < wdsz) && + (vowels.indexOf( local.charAt(n+1))>=0)){ + code.append( symb );mtsz++; + } + break ; + case 'X' : + code.append('K'); code.append('S');mtsz += 2; + break ; + case 'Z' : + code.append('S'); mtsz++; break ; + } // end switch + n++ ; + } // end else from symb != 'C' + if( mtsz > 4 )code.setLength( 4); + } + return code.toString(); + } // end static method metaPhone() + + public String encode(String pString) { + return( metaphone( pString ) ); + } + + public String encode(byte[] pBytes) { + return( metaphone( new String( pBytes ) ) ); + } + + + /** + * Are the metaphones of two strings the same. + */ + public boolean isMetaphoneEqual(String str1, String str2) { + return metaphone(str1).equals(metaphone(str2)); + } + + /** + * Returns the maxCodeLen. + * @return int + */ + public int getMaxCodeLen() { + return maxCodeLen; + } + + /** + * Sets the maxCodeLen. + * @param maxCodeLen The maxCodeLen to set + */ + public void setMaxCodeLen(int maxCodeLen) { + this.maxCodeLen = maxCodeLen; + } + +} Index: src/java/org/apache/commons/codec/RefinedSoundex.java =================================================================== RCS file: /home/cvspublic/jakarta-commons-sandbox/codec/src/java/org/apache/commons/codec/RefinedSoundex.java,v retrieving revision 1.3 diff -u -r1.3 RefinedSoundex.java --- src/java/org/apache/commons/codec/RefinedSoundex.java 18 Nov 2002 13:00:25 -0000 1.3 +++ src/java/org/apache/commons/codec/RefinedSoundex.java 1 Dec 2002 22:03:18 +-0000 @@ -1,3 +1,136 @@ -/* ==================================================================== * The Apache Software License, Version 1.1 * * Copyright (c) 2002 The Apache Software Foundation. All rights * reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in * the documentation and/or other materials provided with the * distribution. * * 3. The end-user documentation included with the redistribution, * if any, must include the following acknowledgment: * "This product includes software developed by the * Apache Software Foundation (http://www.apache.org/)." * Alternately, this acknowledgment may appear in the software itself, * if and wherever such third-party acknowledgments normally appear. * * 4. The names "Apache" and "Apache Software Foundation" and * "Apache Commons" must not be used to endorse or promote products * derived from this software without prior written permission. For * written permission, please contact [EMAIL PROTECTED] * * 5. Products derived from this software may not be called "Apache", * "Apache Turbine", nor may "Apache" appear in their name, without * prior written permission of the Apache Software Foundation. * * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * ==================================================================== * * This software consists of voluntary contributions made by many * individuals on behalf of the Apache Software Foundation. For more * information on the Apache Software Foundation, please see * <http://www.apache.org/>. */ package org.apache.commons.codec; +/* ==================================================================== + * The Apache Software License, Version 1.1 + * + * Copyright (c) 2002 The Apache Software Foundation. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * 3. The end-user documentation included with the redistribution, + * if any, must include the following acknowledgment: + * "This product includes software developed by the + * Apache Software Foundation (http://www.apache.org/)." + * Alternately, this acknowledgment may appear in the software itself, + * if and wherever such third-party acknowledgments normally appear. + * + * 4. The names "Apache" and "Apache Software Foundation" and + * "Apache Commons" must not be used to endorse or promote products + * derived from this software without prior written permission. For + * written permission, please contact [EMAIL PROTECTED] + * + * 5. Products derived from this software may not be called "Apache", + * "Apache Turbine", nor may "Apache" appear in their name, without + * prior written permission of the Apache Software Foundation. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF + * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * ==================================================================== + * + * This software consists of voluntary contributions made by many + * individuals on behalf of the Apache Software Foundation. For more + * information on the Apache Software Foundation, please see + * <http://www.apache.org/>. + */ +package org.apache.commons.codec; -/** * Encodes a string into a soundex value. Sounde is an encoding used to * relate similar names, but can also be used as a general purpose * scheme to find word with similar phonemes. * More information may be found at: http://www.bluepoof.com/Soundex/info2.html * * @todo Needs internationalisation in a future release. * * @author [EMAIL PROTECTED] * @version $Revision: 1.3 $ $Date: 2002/11/18 13:00:25 $ */ public class RefinedSoundex implements Encoder { static public final char[] US_ENGLISH_MAPPING = "01360240043788015936020505".toCharArray(); static public final RefinedSoundex US_ENGLISH = new RefinedSoundex(); private char[] soundexMapping; public RefinedSoundex() { this(US_ENGLISH_MAPPING); } public RefinedSoundex(char[] mapping) { this.soundexMapping = mapping; } /** * Get the SoundEx value of a string. * This implementation is taken from the code-snippers on * http://www.sourceforge.net/ */ public String soundex(String str) { if(null == str || str.length() == 0) { return str; } StringBuffer sBuf = new StringBuffer(); str = str.toUpperCase(); sBuf.append( str.charAt(0) ); char last, mapped, current; last = '*'; for( int i = 0; i < str.length(); i++ ) { current = getMappingCode( str.charAt(i) ); if( current == last ) { continue; } else if( current != 0 ) { sBuf.append( current ); } last = current; } return sBuf.toString(); } public String encode(String pString) { return( soundex( pString ) ); } /** * Used internally by the SoundEx algorithm. */ private char getMappingCode(char c) { if( !Character.isLetter(c) ) { return 0; } else { return soundexMapping[Character.toUpperCase(c) - 'A']; } } } \ No newline at end of file + +/** + * Encodes a string into a soundex value. Sounde is an encoding used to + * relate similar names, but can also be used as a general purpose + * scheme to find word with similar phonemes. + * More information may be found at: http://www.bluepoof.com/Soundex/info2.html + * + * @todo Needs internationalisation in a future release. + * + * @author [EMAIL PROTECTED] + * @version $Revision: 1.3 $ $Date: 2002/11/18 13:00:25 $ + */ +public class RefinedSoundex implements Encoder { + + static public final char[] US_ENGLISH_MAPPING = + "01360240043788015936020505".toCharArray(); + + static public final RefinedSoundex US_ENGLISH = new RefinedSoundex(); + + private char[] soundexMapping; + + public RefinedSoundex() { + this(US_ENGLISH_MAPPING); + } + + public RefinedSoundex(char[] mapping) { + this.soundexMapping = mapping; + } + + /** + * Get the SoundEx value of a string. + * This implementation is taken from the code-snippers on + * http://www.sourceforge.net/ + */ + public String soundex(String str) { + if(null == str || str.length() == 0) { return str; } + + StringBuffer sBuf = new StringBuffer(); + str = str.toUpperCase(); + + sBuf.append( str.charAt(0) ); + + char last, mapped, current; + last = '*'; + + for( int i = 0; i < str.length(); i++ ) { + + current = getMappingCode( str.charAt(i) ); + if( current == last ) { + continue; + } else if( current != 0 ) { + sBuf.append( current ); + } + + last = current; + + } + + return sBuf.toString(); + } + + public String encode(String pString) { + return( soundex( pString ) ); + } + + public String encode(byte[] pBytes) { + return( soundex( new String( pBytes ) ) ); + } + + + /** + * Used internally by the SoundEx algorithm. + */ + private char getMappingCode(char c) { + if( !Character.isLetter(c) ) { + return 0; + } else { + return soundexMapping[Character.toUpperCase(c) - 'A']; + } + } +} Index: src/java/org/apache/commons/codec/Soundex.java =================================================================== RCS file: /home/cvspublic/jakarta-commons-sandbox/codec/src/java/org/apache/commons/codec/Soundex.java,v retrieving revision 1.4 diff -u -r1.4 Soundex.java --- src/java/org/apache/commons/codec/Soundex.java 18 Nov 2002 13:00:26 -0000 1.4 +++ src/java/org/apache/commons/codec/Soundex.java 1 Dec 2002 22:03:16 -0000 @@ -1,4 +1,149 @@ -/* ==================================================================== * The Apache Software License, Version 1.1 * * Copyright (c) 2001-2002 The Apache Software Foundation. All rights * reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in * the documentation and/or other materials provided with the * distribution. * * 3. The end-user documentation included with the redistribution, * if any, must include the following acknowledgment: * "This product includes software developed by the * Apache Software Foundation (http://www.apache.org/)." * Alternately, this acknowledgment may appear in the software itself, * if and wherever such third-party acknowledgments normally appear. * * 4. The names "Apache" and "Apache Software Foundation" and * "Apache Commons" must not be used to endorse or promote products * derived from this software without prior written permission. For * written permission, please contact [EMAIL PROTECTED] * * 5. Products derived from this software may not be called "Apache", * "Apache Turbine", nor may "Apache" appear in their name, without * prior written permission of the Apache Software Foundation. * * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * ==================================================================== * * This software consists of voluntary contributions made by many * individuals on behalf of the Apache Software Foundation. For more * information on the Apache Software Foundation, please see * <http://www.apache.org/>. */ package org.apache.commons.codec; -/** * Encodes a string into a refined soundex value. * A refined soundex code is optimized for spell checking word. * "Soundex" method originally developed by Margaret Odell and * Robert Russell * * http://www.bluepoof.com/Soundex/info2.html * * @todo Needs internationalisation in a future release. * * @author [EMAIL PROTECTED] * @author [EMAIL PROTECTED] * @version $Revision: 1.4 $ $Date: 2002/11/18 13:00:26 $ */ public class Soundex implements Encoder { static public final char[] US_ENGLISH_MAPPING = "01230120022455012623010202".toCharArray(); static public final Soundex US_ENGLISH = new Soundex(); private char[] soundexMapping; private int maxLength = 4;- public Soundex() { this(US_ENGLISH_MAPPING); } - public Soundex(char[] mapping) { this.soundexMapping = mapping; } /** * Get the SoundEx value of a string. * This implementation is taken from the code-snippers on * http://www.sourceforge.net/ */ public String soundex(String str) { if(null == str || str.length() == 0) { return str; } char out[] = { '0', '0', '0', '0' }; char last, mapped; int incount = 1, count = 1; out[0] = Character.toUpperCase( str.charAt(0) ); last = getMappingCode( str.charAt(0) ); while( (incount < str.length() ) && (mapped = getMappingCode(str.charAt(incount++))) != 0 && (count < maxLength) ) { if( (mapped != '0') && (mapped != last) ) { out[count++] = mapped; } last = mapped; } return new String(out); } public String encode(String pString) { return( soundex( pString ) ); } /** * Used internally by the SoundEx algorithm. */ private char getMappingCode(char c) { if( !Character.isLetter(c) ) { return 0; } else { return soundexMapping[Character.toUpperCase(c) - 'A']; } } /** * Returns the maxLength. Standard Soundex * @return int */ public int getMaxLength() { return maxLength; } /** * Sets the maxLength. * @param maxLength The maxLength to set */ public void setMaxLength(int maxLength) { this.maxLength = maxLength; } } \ No newline at end of file +/* ==================================================================== + * The Apache Software License, Version 1.1 + * + * Copyright (c) 2001-2002 The Apache Software Foundation. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * 3. The end-user documentation included with the redistribution, + * if any, must include the following acknowledgment: + * "This product includes software developed by the + * Apache Software Foundation (http://www.apache.org/)." + * Alternately, this acknowledgment may appear in the software itself, + * if and wherever such third-party acknowledgments normally appear. + * + * 4. The names "Apache" and "Apache Software Foundation" and + * "Apache Commons" must not be used to endorse or promote products + * derived from this software without prior written permission. For + * written permission, please contact [EMAIL PROTECTED] + * + * 5. Products derived from this software may not be called "Apache", + * "Apache Turbine", nor may "Apache" appear in their name, without + * prior written permission of the Apache Software Foundation. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF + * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * ==================================================================== + * + * This software consists of voluntary contributions made by many + * individuals on behalf of the Apache Software Foundation. For more + * information on the Apache Software Foundation, please see + * <http://www.apache.org/>. + */ +package org.apache.commons.codec; + +/** + * Encodes a string into a refined soundex value. + * A refined soundex code is optimized for spell checking word. + * "Soundex" method originally developed by Margaret Odell and + * Robert Russell + * + * http://www.bluepoof.com/Soundex/info2.html + * + * @todo Needs internationalisation in a future release. + * + * @author [EMAIL PROTECTED] + * @author [EMAIL PROTECTED] + * @version $Revision: 1.4 $ $Date: 2002/11/18 13:00:26 $ + */ +public class Soundex implements Encoder { + + static public final char[] US_ENGLISH_MAPPING = + "01230120022455012623010202".toCharArray(); + + static public final Soundex US_ENGLISH = new Soundex(); + + private char[] soundexMapping; + private int maxLength = 4; + + + public Soundex() { + this(US_ENGLISH_MAPPING); + } + + public Soundex(char[] mapping) { + this.soundexMapping = mapping; + } + + /** + * Get the SoundEx value of a string. + * This implementation is taken from the code-snippers on + * http://www.sourceforge.net/ + */ + public String soundex(String str) { + if(null == str || str.length() == 0) { return str; } + + char out[] = { '0', '0', '0', '0' }; + char last, mapped; + int incount = 1, count = 1; + out[0] = Character.toUpperCase( str.charAt(0) ); + last = getMappingCode( str.charAt(0) ); + while( (incount < str.length() ) && + (mapped = getMappingCode(str.charAt(incount++))) != 0 && + (count < maxLength) ) + { + if( (mapped != '0') && (mapped != last) ) { + out[count++] = mapped; + } + last = mapped; + } + return new String(out); + } + + public String encode(String pString) { + return( soundex( pString ) ); + } + + public String encode(byte[] pBytes) { + return( soundex( new String( pBytes ) ) ); + } + + /** + * Used internally by the SoundEx algorithm. + */ + private char getMappingCode(char c) { + if( !Character.isLetter(c) ) { + return 0; + } else { + return soundexMapping[Character.toUpperCase(c) - 'A']; + } + } + + /** + * Returns the maxLength. Standard Soundex + * @return int + */ + public int getMaxLength() { + return maxLength; + } + + /** + * Sets the maxLength. + * @param maxLength The maxLength to set + */ + public void setMaxLength(int maxLength) { + this.maxLength = maxLength; + } + +} Index: src/test/org/apache/commons/codec/TestAll.java =================================================================== RCS file: /home/cvspublic/jakarta-commons-sandbox/codec/src/test/org/apache/commons/codec/TestAll.java,v retrieving revision 1.2 diff -u -r1.2 TestAll.java --- src/test/org/apache/commons/codec/TestAll.java 18 Nov 2002 13:00:26 -0000 1.2 +++ src/test/org/apache/commons/codec/TestAll.java 1 Dec 2002 22:03:22 -0000 @@ -78,6 +78,7 @@ public static Test suite() { TestSuite suite = new TestSuite(); suite.addTest(org.apache.commons.codec.base64.TestAll.suite()); + suite.addTest(TestHex.suite()); suite.addTest(TestMetaphone.suite()); suite.addTest(TestSoundex.suite()); suite.addTest(TestRefinedSoundex.suite()); Index: src/test/org/apache/commons/codec/TestEncoder.java =================================================================== RCS file: /home/cvspublic/jakarta-commons-sandbox/codec/src/test/org/apache/commons/codec/TestEncoder.java,v retrieving revision 1.1 diff -u -r1.1 TestEncoder.java --- src/test/org/apache/commons/codec/TestEncoder.java 18 Nov 2002 13:00:26 -0000 1.1 +++ src/test/org/apache/commons/codec/TestEncoder.java 1 Dec 2002 22:03:20 -0000 @@ -85,6 +85,7 @@ public void testEncodeNull() { Encoder encoder = makeEncoder(); - encoder.encode(null); + String nullStr = null; + encoder.encode(nullStr); } } Index: src/test/org/apache/commons/codec/TestHex.java =================================================================== RCS file: src/test/org/apache/commons/codec/TestHex.java diff -N src/test/org/apache/commons/codec/TestHex.java --- /dev/null 1 Jan 1970 00:00:00 -0000 +++ src/test/org/apache/commons/codec/TestHex.java 1 Dec 2002 22:03:22 -0000 @@ -0,0 +1,122 @@ +/* + * $Header: +/home/cvspublic/jakarta-commons-sandbox/codec/src/test/org/apache/commons/codec/TestMetaphone.java,v + 1.2 2002/11/18 13:00:26 rwaldhoff Exp $ + * $Revision: 1.2 $ + * $Date: 2002/11/18 13:00:26 $ + * + * ==================================================================== + * + * The Apache Software License, Version 1.1 + * + * Copyright (c) 2002 The Apache Software Foundation. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * 3. The end-user documentation included with the redistribution, if + * any, must include the following acknowlegement: + * "This product includes software developed by the + * Apache Software Foundation (http://www.apache.org/)." + * Alternately, this acknowlegement may appear in the software itself, + * if and wherever such third-party acknowlegements normally appear. + * + * 4. The names "The Jakarta Project", "Commons", and "Apache Software + * Foundation" must not be used to endorse or promote products derived + * from this software without prior written permission. For written + * permission, please contact [EMAIL PROTECTED] + * + * 5. Products derived from this software may not be called "Apache" + * nor may "Apache" appear in their names without prior written + * permission of the Apache Group. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF + * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * ==================================================================== + * + * This software consists of voluntary contributions made by many + * individuals on behalf of the Apache Software Foundation. For more + * information on the Apache Software Foundation, please see + * <http://www.apache.org/>. + * + */ +package org.apache.commons.codec; + +import junit.framework.Test; +import junit.framework.TestCase; +import junit.framework.TestSuite; + +/** + * @version $Revision: 1.2 $ $Date: 2002/11/18 13:00:26 $ + * @author Rodney Waldhoff + */ +public class TestHex extends TestEncoder { + + public TestHex(String name) { + super(name); + } + + public static Test suite() { + return (new TestSuite(TestHex.class)); + } + + public void setUp() throws Exception { + super.setUp(); + _hex = new Hex(); + } + + public void tearDown() throws Exception { + super.tearDown(); + _hex = null; + } + + protected Encoder makeEncoder() { + return new Hex(); + } + + // ------------------------------------------------------------------------ + + public void testHex() { + + byte[] b1a = { (byte) 0x00, + (byte) 0x00, + (byte) 0x00 }; + assertEquals("000000",_hex.encode(b1a)); + + byte[] b2a = { (byte) 0x00, + (byte) 0x00, + (byte) 0x01 }; + assertEquals("000001",_hex.encode(b2a)); + + byte[] b3a = { (byte) 0xFF, + (byte) 0xFF, + (byte) 0xFF }; + assertEquals("ffffff",_hex.encode(b3a)); + + byte[] b4a = { (byte) 0xCD, + (byte) 0xBB, + (byte) 0x35 }; + assertEquals("cdbb35",_hex.encode(b4a)); + + } + + private Hex _hex = null; +}
-- To unsubscribe, e-mail: <mailto:[EMAIL PROTECTED]> For additional commands, e-mail: <mailto:[EMAIL PROTECTED]>
