Here is a patch for codec, which includes the following:

0. Added a function to Encoder - public String encode(byte[])

1. Added function to all classes that implement encoder.

2. Hex, encodes a byte[] to a hex string - 0xcbd342 -> "cbd342"

3. TestHex, a junit test for Hex class.

4. Some files in the codec package CRLF problems, patch resolves this.  

--------
Tim O'Brien 
Transolutions, Inc.
18 N Waukegan Road
Lake Bluff, Il 60044
W 847-574-2143
F 847-234-3471
M 847-863-7045
Index: src/java/org/apache/commons/codec/Encoder.java
===================================================================
RCS file: 
/home/cvspublic/jakarta-commons-sandbox/codec/src/java/org/apache/commons/codec/Encoder.java,v
retrieving revision 1.2
diff -u -r1.2 Encoder.java
--- src/java/org/apache/commons/codec/Encoder.java      18 Nov 2002 12:41:24 -0000     
 1.2
+++ src/java/org/apache/commons/codec/Encoder.java      1 Dec 2002 22:03:15 -0000
@@ -1,4 +1,16 @@
-/* ====================================================================
 * The Apache 
Software License, Version 1.1
 *
 * Copyright (c) 2002 The Apache Software Foundation. 
 All rights
 * reserved.
 *
 * Redistribution and use in source and binary forms, with 
or without
 * modification, are permitted provided that the following conditions
 * 
are met:
 *
 * 1. Redistributions of source code must retain the above copyright
 *    
notice, this list of conditions and the following disclaimer.
 *
 * 2. Redistributions 
in binary form must reproduce the above copyright
+/* ====================================================================
+ * The Apache Software License, Version 1.1 *
+ * Copyright (c) 2002 The Apache Software Foundation.  All rights
+ * reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in
  *    the documentation and/or other materials provided with the
  *    distribution.
@@ -31,4 +43,25 @@
  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
- * ====================================================================
 *
 * This 
software consists of voluntary contributions made by many
 * individuals on behalf of 
the Apache Software Foundation.  For more
 * information on the Apache Software 
Foundation, please see
 * <http://www.apache.org/>.
 */
package 
org.apache.commons.codec;

/**
 * Encoder is an interface, which is implemented by 
Soundex,
 * Metaphone, Soundex2, etc.
 *
 * @author [EMAIL PROTECTED]
 * 
@version $Revision: 1.2 $ $Date: 2002/11/18 12:41:24 $
 */
public interface Encoder {
 
   String encode(String str);
}  
+ * ====================================================================
+ *
+ * This software consists of voluntary contributions made by many
+ * individuals on behalf of the Apache Software Foundation.  For more
+ * information on the Apache Software Foundation, please see
+ * <http://www.apache.org/>.
+ */
+package org.apache.commons.codec;
+
+/** 
+ * Encoder is an interface, which is implemented by Soundex, 
+ * Metaphone, Soundex2, etc. 
+ * 
+ * @author [EMAIL PROTECTED] 
+ * @version $Revision: 1.2 $ $Date: 2002/11/18 12:41:24 $ 
+ */
+public interface Encoder {
+
+  String encode(String str);
+  String encode(byte[] bytes);
+
+}
Index: src/java/org/apache/commons/codec/EncoderComparator.java
===================================================================
RCS file: 
/home/cvspublic/jakarta-commons-sandbox/codec/src/java/org/apache/commons/codec/EncoderComparator.java,v
retrieving revision 1.2
diff -u -r1.2 EncoderComparator.java
--- src/java/org/apache/commons/codec/EncoderComparator.java    18 Nov 2002 12:41:24 
-0000      1.2
+++ src/java/org/apache/commons/codec/EncoderComparator.java    1 Dec 2002 22:03:16 
+-0000
@@ -1,2 +1,86 @@
-/* ====================================================================
 * The Apache 
Software License, Version 1.1
 *
 * Copyright (c) 2002 The Apache Software Foundation. 
 All rights
 * reserved.
 *
 * Redistribution and use in source and binary forms, with 
or without
 * modification, are permitted provided that the following conditions
 * 
are met:
 *
 * 1. Redistributions of source code must retain the above copyright
 *    
notice, this list of conditions and the following disclaimer.
 *
 * 2. Redistributions 
in binary form must reproduce the above copyright
 *    notice, this list of 
conditions and the following disclaimer in
 *    the documentation and/or other 
materials provided with the
 *    distribution.
 *
 * 3. The end-user documentation 
included with the redistribution,
 *    if any, must include the following 
acknowledgment:
 *       "This product includes software developed by the
 *        
Apache Software Foundation (http://www.apache.org/)."
 *    Alternately, this 
acknowledgment may appear in the software itself,
 *    if and wherever such 
third-party acknowledgments normally appear.
 *
 * 4. The names "Apache" and "Apache 
Software Foundation" and
 *    "Apache Commons" must not be used to endorse or promote 
products
 *    derived from this software without prior written permission. For
 *    
written permission, please contact [EMAIL PROTECTED]
 *
 * 5. Products derived from 
this software may not be called "Apache",
 *    "Apache Turbine", nor may "Apache" 
appear in their name, without
 *    prior written permission of the Apache Software 
Foundation.
 *
 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
 * 
WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 * OF 
MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 * DISCLAIMED.  IN NO EVENT 
SHALL THE APACHE SOFTWARE FOUNDATION OR
 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, 
INDIRECT, INCIDENTAL,
 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 
NOT
 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
 * USE, DATA, 
OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
 * ON ANY THEORY OF 
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 * OR TORT (INCLUDING NEGLIGENCE OR 
OTHERWISE) ARISING IN ANY WAY OUT
 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF 
THE POSSIBILITY OF
 * SUCH DAMAGE.
 * 
====================================================================
 *
 * This 
software consists of voluntary contributions made by many
 * individuals on behalf of 
the Apache Software Foundation.  For more
 * information on the Apache Software 
Foundation, please see
 * <http://www.apache.org/>.
 */
package 
org.apache.commons.codec;
-import java.util.Comparator;

/**
 * Compare using an Encoder.
 *
 * @author 
[EMAIL PROTECTED]
 * @version $Revision: 1.2 $ $Date: 2002/11/18 12:41:24 $
 
*/
public class EncoderComparator implements Comparator {

    private Encoder 
encoder;

    /**
     * Use the default soundex algorithm, US_ENGLISH.
     */
    
public EncoderComparator() {
        this(RefinedSoundex.US_ENGLISH);
    }

    /**
  
   * Use the provided soundex algorithm.
     */
    public EncoderComparator(Encoder 
en) {
        this.encoder = en;
    }

    public int compare(Object o1, Object o2) {
        String s1 = encoder.encode(o1.toString());
        String s2 = 
encoder.encode(o2.toString());
        return s1.compareTo(s2);
    }

}
+/* ====================================================================
+ * The Apache Software License, Version 1.1 *
+ * Copyright (c) 2002 The Apache Software Foundation.  All rights
+ * reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * 3. The end-user documentation included with the redistribution,
+ *    if any, must include the following acknowledgment:
+ *       "This product includes software developed by the
+ *        Apache Software Foundation (http://www.apache.org/)."
+ *    Alternately, this acknowledgment may appear in the software itself,
+ *    if and wherever such third-party acknowledgments normally appear.
+ *
+ * 4. The names "Apache" and "Apache Software Foundation" and
+ *    "Apache Commons" must not be used to endorse or promote products
+ *    derived from this software without prior written permission. For
+ *    written permission, please contact [EMAIL PROTECTED]
+ *
+ * 5. Products derived from this software may not be called "Apache",
+ *    "Apache Turbine", nor may "Apache" appear in their name, without
+ *    prior written permission of the Apache Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ * ====================================================================
+ *
+ * This software consists of voluntary contributions made by many
+ * individuals on behalf of the Apache Software Foundation.  For more
+ * information on the Apache Software Foundation, please see
+ * <http://www.apache.org/>.
+ */
+package org.apache.commons.codec;
+import java.util.Comparator;
+
+/**
+ * Compare using an Encoder.
+ *
+ * @author [EMAIL PROTECTED]
+ * @version $Revision: 1.2 $ $Date: 2002/11/18 12:41:24 $
+ */
+public class EncoderComparator implements Comparator {
+
+    private Encoder encoder;
+
+    /**
+     * Use the default soundex algorithm, US_ENGLISH.
+     */
+    public EncoderComparator() {
+        this(RefinedSoundex.US_ENGLISH);
+    }
+
+    /**
+     * Use the provided soundex algorithm.
+     */
+    public EncoderComparator(Encoder en) {
+        this.encoder = en;
+    }
+
+    public int compare(Object o1, Object o2) {
+        String s1 = encoder.encode(o1.toString());
+        String s2 = encoder.encode(o2.toString());
+        return s1.compareTo(s2);
+    }
+
+}
Index: src/java/org/apache/commons/codec/Hex.java
===================================================================
RCS file: src/java/org/apache/commons/codec/Hex.java
diff -N src/java/org/apache/commons/codec/Hex.java
--- /dev/null   1 Jan 1970 00:00:00 -0000
+++ src/java/org/apache/commons/codec/Hex.java  1 Dec 2002 22:03:18 -0000
@@ -0,0 +1,102 @@
+/* ====================================================================
+ * The Apache Software License, Version 1.1
+ *
+ * Copyright (c) 2002 The Apache Software Foundation.  All rights
+ * reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * 3. The end-user documentation included with the redistribution,
+ *    if any, must include the following acknowledgment:
+ *       "This product includes software developed by the
+ *        Apache Software Foundation (http://www.apache.org/)."
+ *    Alternately, this acknowledgment may appear in the software itself,
+ *    if and wherever such third-party acknowledgments normally appear.
+ *
+ * 4. The names "Apache" and "Apache Software Foundation" and
+ *    "Apache Commons" must not be used to endorse or promote products
+ *    derived from this software without prior written permission. For
+ *    written permission, please contact [EMAIL PROTECTED]
+ *
+ * 5. Products derived from this software may not be called "Apache",
+ *    "Apache Turbine", nor may "Apache" appear in their name, without
+ *    prior written permission of the Apache Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ * ====================================================================
+ *
+ * This software consists of voluntary contributions made by many
+ * individuals on behalf of the Apache Software Foundation.  For more
+ * information on the Apache Software Foundation, please see
+ * <http://www.apache.org/>.
+ */
+ package org.apache.commons.codec;
+
+/**
+ * @author [EMAIL PROTECTED]
+ */
+public class Hex implements Encoder {
+    
+    private char[] hexDigits =  { '0', '1', '2', '3', '4', '5', '6', '7', 
+                                     '8', '9', 'a', 'b', 'c', 'd', 'e', 'f' };    
+
+    public String toHex(byte[] pBytes) {
+     
+        StringBuffer sBuf = new StringBuffer();
+        
+        for( int i = 0; i < pBytes.length; i++ ) {
+     
+            sBuf.append( hexDigits[ (((int) (pBytes[i] >> 4)) & 0x0f) ] );
+            sBuf.append( hexDigits[ (((int) (pBytes[i] & 0x0f)) & 0x0f) ] );    
+            
+        }
+     
+        return( sBuf.toString() );   
+        
+    }
+    
+
+       /**
+        * @see org.apache.commons.codec.Encoder#encode(String)
+        */
+       public String encode(String str) {
+        if( str != null ) {
+               return toHex(str.getBytes());
+        } else {
+            return null;
+        }
+       }
+
+       /**
+        * @see org.apache.commons.codec.Encoder#encode(byte[])
+        */
+       public String encode(byte[] bytes) {
+        if( bytes != null ) {
+               return toHex( bytes );
+        } else {
+            return null;
+        }
+       }
+
+}
Index: src/java/org/apache/commons/codec/Metaphone.java
===================================================================
RCS file: 
/home/cvspublic/jakarta-commons-sandbox/codec/src/java/org/apache/commons/codec/Metaphone.java,v
retrieving revision 1.4
diff -u -r1.4 Metaphone.java
--- src/java/org/apache/commons/codec/Metaphone.java    18 Nov 2002 12:41:24 -0000     
 1.4
+++ src/java/org/apache/commons/codec/Metaphone.java    1 Dec 2002 22:03:19 -0000
@@ -1,2 +1,309 @@
-/* ====================================================================
 * The Apache 
Software License, Version 1.1
 *
 * Copyright (c) 2001-2002 The Apache Software 
Foundation.  All rights
 * reserved.
 *
 * Redistribution and use in source and binary 
forms, with or without
 * modification, are permitted provided that the following 
conditions
 * are met:
 *
 * 1. Redistributions of source code must retain the above 
copyright
 *    notice, this list of conditions and the following disclaimer.
 *
 * 2. 
Redistributions in binary form must reproduce the above copyright
 *    notice, this 
list of conditions and the following disclaimer in
 *    the documentation and/or 
other materials provided with the
 *    distribution.
 *
 * 3. The end-user 
documentation included with the redistribution,
 *    if any, must include the 
following acknowledgment:
 *       "This product includes software developed by the
 * 
       Apache Software Foundation (http://www.apache.org/)."
 *    Alternately, this 
acknowledgment may appear in the software itself,
 *    if and wherever such 
third-party acknowledgments normally appear.
 *
 * 4. The names "Apache" and "Apache 
Software Foundation" and
 *    "Apache Commons" must not be used to endorse or promote 
products
 *    derived from this software without prior written permission. For
 *    
written permission, please contact [EMAIL PROTECTED]
 *
 * 5. Products derived from 
this software may not be called "Apache",
 *    "Apache Turbine", nor may "Apache" 
appear in their name, without
 *    prior written permission of the Apache Software 
Foundation.
 *
 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
 * 
WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 * OF 
MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 * DISCLAIMED.  IN NO EVENT 
SHALL THE APACHE SOFTWARE FOUNDATION OR
 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, 
INDIRECT, INCIDENTAL,
 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 
NOT
 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
 * USE, DATA, 
OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
 * ON ANY THEORY OF 
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 * OR TORT (INCLUDING NEGLIGENCE OR 
OTHERWISE) ARISING IN ANY WAY OUT
 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF 
THE POSSIBILITY OF
 * SUCH DAMAGE.
 * 
====================================================================
 *
 * This 
software consists of voluntary contributions made by many
 * individuals on behalf of 
the Apache Software Foundation.  For more
 * information on the Apache Software 
Foundation, please see
 * <http://www.apache.org/>.
 */
package 
org.apache.commons.codec;

/**
 * A class to generate phonetic code.
 * The initial 
Java implementation, William B. Brogden.  December, 1997
 * Permission given by 
wbrogden for code to be used anywhere.
 * 
 * @see "Hanging on the Metaphone" by 
Lawrence Philips
 *      <i>Computer Language</i> of Dec. 1990, p 39   
 * 
 * 
@version $Revision: 1.4 $ $Date: 2002/11/18 12:41:24 $
 * @author [EMAIL PROTECTED]
 * 
@author [EMAIL PROTECTED]
 * @author [EMAIL PROTECTED]
 */
public 
class Metaphone implements Encoder {
-    private String vowels = "AEIOU" ;
    private String frontv = "EIY"   ;
    
private String varson = "CSPTG" ;

    private int maxCodeLen = 4 ;

    public 
Metaphone() {
        super();
    }

    /**
     * Find the metaphone value of a 
String. This is similar to the
     * soundex algorithm, but better at finding similar 
sounding words.
     * All input is converted to upper case.
     * Limitations: Input 
format is expected to be a single ASCII word
     * with only characters in the A - Z 
range, no punctuation or numbers.
     */
    public String metaphone( String txt ){
  
    int mtsz = 0  ;
      boolean hard = false ;
      if(( txt == null ) ||
         
( txt.length() == 0 )) return "" ;
      // single character is itself
      if( 
txt.length() == 1 ) return txt.toUpperCase() ;
      
      char[] inwd = 
txt.toUpperCase().toCharArray() ;
      
      String tmpS ;
      StringBuffer local 
= new StringBuffer( 40 ); // manipulate
      StringBuffer code = new StringBuffer( 10 
) ; //   output
      // handle initial 2 characters exceptions
      switch( inwd[0] 
){
        case 'K': case 'G' : case 'P' : /* looking for KN, etc*/
          if( 
inwd[1] == 'N')local.append(inwd, 1, inwd.length - 1 );
          else local.append( 
inwd );
          break;
        case 'A': /* looking for AE */
          if( inwd[1] 
== 'E' )local.append(inwd, 1, inwd.length - 1 );
          else local.append( inwd );
 
         break;
        case 'W' : /* looking for WR or WH */
          if( inwd[1] == 
'R' ){   // WR -> R
            local.append(inwd, 1, inwd.length - 1 ); break ;
      
    }
          if( inwd[1] == 'H'){
            local.append(inwd, 1, inwd.length - 1 
);
            local.setCharAt( 0,'W'); // WH -> W
          }
          else 
local.append( inwd );
          break;
        case 'X' : /* initial X becomes S */
   
       inwd[0] = 'S' ;local.append( inwd );
          break ;
        default :
       
   local.append( inwd );
      } // now local has working string with initials fixed
  
    int wdsz = local.length();
      int n = 0 ;
      while((mtsz < maxCodeLen ) && 
// max code size of 4 works well
            (n < wdsz ) ){
        char symb = 
local.charAt(n) ;
        // remove duplicate letters except C
        if(( symb != 
'C' ) &&
           (n > 0 ) && ( local.charAt(n - 1 ) == symb )) n++ ;
        else{ 
// not dup
          switch( symb ){
            case 'A' : case 'E' : case 'I' : case 
'O' : case 'U' :
              if( n == 0 ) { code.append(symb );mtsz++;
              
}
              break ; // only use vowel if leading char
            case 'B' :
      
        if( (n > 0 ) &&
                  !(n + 1 == wdsz ) && // not MB at end of 
word
                  ( local.charAt(n - 1) == 'M')) {
                    
code.append(symb);
                  }
              else code.append(symb);
          
    mtsz++ ;
              break ;
            case 'C' : // lots of C special cases
  
            /* discard if SCI, SCE or SCY */
              if( ( n > 0 ) &&
           
       ( local.charAt(n-1) == 'S' ) &&
                  ( n + 1 < wdsz ) &&
          
        ( frontv.indexOf( local.charAt(n + 1)) >= 0 )){ break ;}
              tmpS = 
local.toString();
              if( tmpS.indexOf("CIA", n ) == n ) { // "CIA" -> X
    
             code.append('X' ); mtsz++; break ;
              }
              if( ( n 
+ 1 < wdsz ) &&
                  (frontv.indexOf( local.charAt(n+1) )>= 0 )){
        
         code.append('S');mtsz++; break ; // CI,CE,CY -> S
              }
            
  if(( n > 0) &&
                 ( tmpS.indexOf("SCH",n-1 )== n-1 )){ // SCH->sk
     
            code.append('K') ; mtsz++;break ;
              }
              if( 
tmpS.indexOf("CH", n ) == n ){ // detect CH
                if((n == 0 ) &&
           
        (wdsz >= 3 ) &&    // CH consonant -> K consonant
                   
(vowels.indexOf( local.charAt( 2) ) < 0 )){
                     code.append('K');
    
            }
                else { code.append('X'); // CHvowel -> X
                
}
                mtsz++;
              }
              else { code.append('K' 
);mtsz++;
              }
              break ;
            case 'D' :
              
if(( n + 2 < wdsz )&&  // DGE DGI DGY -> J
                 ( local.charAt(n+1) == 'G' 
)&&
                 (frontv.indexOf( local.charAt(n+2) )>= 0)){
                    
code.append('J' ); n += 2 ;
              }
              else { code.append( 'T' );
  
            }
              mtsz++;
              break ;
            case 'G' : // GH 
silent at end or before consonant
              if(( n + 2 == wdsz )&&
                
 (local.charAt(n+1) == 'H' )) break ;
              if(( n + 2 < wdsz ) &&
            
     (local.charAt(n+1) == 'H' )&&
                 (vowels.indexOf( 
local.charAt(n+2)) < 0 )) break ;
              tmpS = local.toString();
              
if((n > 0) &&
                 ( tmpS.indexOf("GN", n ) == n)||
                 ( 
tmpS.indexOf("GNED",n) == n )) break ; // silent G
              if(( n > 0 ) &&
      
           (local.charAt(n-1) == 'G')) hard = true ;
              else hard = false ;
              if((n+1 < wdsz) &&
                 (frontv.indexOf( local.charAt(n+1) ) 
>= 0 )&&
                 (!hard) ) code.append( 'J' );
              else 
code.append('K');
              mtsz++;
              break ;
            case 'H':
   
           if( n + 1 == wdsz ) break ; // terminal H
              if((n > 0) &&
      
           (varson.indexOf( local.charAt(n-1)) >= 0)) break ;
              if( 
vowels.indexOf( local.charAt(n+1)) >=0 ){
                  code.append('H') ; 
mtsz++;// Hvowel
              }
              break;
            case 'F': case 'J' : 
case 'L' :
            case 'M': case 'N' : case 'R' :
              code.append( symb 
); mtsz++; break ;
            case 'K' :
              if( n > 0 ){ // not initial
   
             if( local.charAt( n -1) != 'C' ) {
                     code.append(symb 
);
                }
              }
              else   code.append( symb ); // 
initial K
              mtsz++ ;
              break ;
            case 'P' :
         
     if((n + 1 < wdsz) &&  // PH -> F
                 (local.charAt( n+1) == 
'H'))code.append('F');
              else code.append( symb );
              mtsz++;
  
            break ;
            case 'Q' :
              code.append('K' );mtsz++; 
break ;
            case 'S' :
              tmpS = local.toString();
              
if((tmpS.indexOf("SH", n )== n) ||
                 (tmpS.indexOf("SIO",n )== n) ||
   
              (tmpS.indexOf("SIA",n )== n)) code.append('X');
              else 
code.append( 'S' );
              mtsz++ ;
              break ;
            case 'T' 
:
              tmpS = local.toString(); // TIA TIO -> X
              
if((tmpS.indexOf("TIA",n )== n)||
                 (tmpS.indexOf("TIO",n )== n) ){
    
                code.append('X'); mtsz++; break;
              }
              if( 
tmpS.indexOf("TCH",n )==n) break;
              // substitute numeral 0 for TH 
(resembles theta after all)
              if( tmpS.indexOf("TH", n )==n) 
code.append('0');
              else code.append( 'T' );
              mtsz++ ;
       
       break ;
            case 'V' :
              code.append('F'); mtsz++;break ;
  
          case 'W' : case 'Y' : // silent if not followed by vowel
              
if((n+1 < wdsz) &&
                 (vowels.indexOf( local.charAt(n+1))>=0)){
         
           code.append( symb );mtsz++;
              }
              break ;
          
  case 'X' :
              code.append('K'); code.append('S');mtsz += 2;
              
break ;
            case 'Z' :
              code.append('S'); mtsz++; break ;
        
  } // end switch
          n++ ;
        } // end else from symb != 'C'
        if( 
mtsz > 4 )code.setLength( 4);
      }
      return code.toString();
    } // end 
static method metaPhone()
    
    public String encode(String pString) {
        
return( metaphone( pString ) );   
    }

    /**
     * Are the metaphones of two 
strings the same.
     */
    public boolean isMetaphoneEqual(String str1, String 
str2) {
        return metaphone(str1).equals(metaphone(str2));
    }

  /**
     * 
Returns the maxCodeLen.
      * @return int
  */
    public int getMaxCodeLen() {
     
      return maxCodeLen;
     }

     /**
     * Sets the maxCodeLen.
         * 
@param maxCodeLen The maxCodeLen to set
      */
    public void setMaxCodeLen(int 
maxCodeLen) {
            this.maxCodeLen = maxCodeLen;
  }

}
\ No newline at end of file
+/* ====================================================================
+ * The Apache Software License, Version 1.1 *
+ * Copyright (c) 2002 The Apache Software Foundation.  All rights
+ * reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * 3. The end-user documentation included with the redistribution,
+ *    if any, must include the following acknowledgment:
+ *       "This product includes software developed by the
+ *        Apache Software Foundation (http://www.apache.org/)."
+ *    Alternately, this acknowledgment may appear in the software itself,
+ *    if and wherever such third-party acknowledgments normally appear.
+ *
+ * 4. The names "Apache" and "Apache Software Foundation" and
+ *    "Apache Commons" must not be used to endorse or promote products
+ *    derived from this software without prior written permission. For
+ *    written permission, please contact [EMAIL PROTECTED]
+ *
+ * 5. Products derived from this software may not be called "Apache",
+ *    "Apache Turbine", nor may "Apache" appear in their name, without
+ *    prior written permission of the Apache Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ * ====================================================================
+ *
+ * This software consists of voluntary contributions made by many
+ * individuals on behalf of the Apache Software Foundation.  For more
+ * information on the Apache Software Foundation, please see
+ * <http://www.apache.org/>.
+ */
+package org.apache.commons.codec;
+
+/**
+ * A class to generate phonetic code.
+ * The initial Java implementation, William B. Brogden.  December, 1997
+ * Permission given by wbrogden for code to be used anywhere.
+ * 
+ * @see "Hanging on the Metaphone" by Lawrence Philips
+ *      <i>Computer Language</i> of Dec. 1990, p 39   
+ * 
+ * @version $Revision: 1.4 $ $Date: 2002/11/18 12:41:24 $
+ * @author [EMAIL PROTECTED]
+ * @author [EMAIL PROTECTED]
+ * @author [EMAIL PROTECTED]
+ */
+public class Metaphone implements Encoder {
+    private String vowels = "AEIOU" ;
+    private String frontv = "EIY"   ;
+    private String varson = "CSPTG" ;
+
+    private int maxCodeLen = 4 ;
+
+    public Metaphone() {
+        super();
+    }
+
+    /**
+     * Find the metaphone value of a String. This is similar to the
+     * soundex algorithm, but better at finding similar sounding words.
+     * All input is converted to upper case.
+     * Limitations: Input format is expected to be a single ASCII word
+     * with only characters in the A - Z range, no punctuation or numbers.
+     */
+    public String metaphone( String txt ){
+      int mtsz = 0  ;
+      boolean hard = false ;
+      if(( txt == null ) ||
+         ( txt.length() == 0 )) return "" ;
+      // single character is itself
+      if( txt.length() == 1 ) return txt.toUpperCase() ;
+      
+      char[] inwd = txt.toUpperCase().toCharArray() ;
+      
+      String tmpS ;
+      StringBuffer local = new StringBuffer( 40 ); // manipulate
+      StringBuffer code = new StringBuffer( 10 ) ; //   output
+      // handle initial 2 characters exceptions
+      switch( inwd[0] ){
+        case 'K': case 'G' : case 'P' : /* looking for KN, etc*/
+          if( inwd[1] == 'N')local.append(inwd, 1, inwd.length - 1 );
+          else local.append( inwd );
+          break;
+        case 'A': /* looking for AE */
+          if( inwd[1] == 'E' )local.append(inwd, 1, inwd.length - 1 );
+          else local.append( inwd );
+          break;
+        case 'W' : /* looking for WR or WH */
+          if( inwd[1] == 'R' ){   // WR -> R
+            local.append(inwd, 1, inwd.length - 1 ); break ;
+          }
+          if( inwd[1] == 'H'){
+            local.append(inwd, 1, inwd.length - 1 );
+            local.setCharAt( 0,'W'); // WH -> W
+          }
+          else local.append( inwd );
+          break;
+        case 'X' : /* initial X becomes S */
+          inwd[0] = 'S' ;local.append( inwd );
+          break ;
+        default :
+          local.append( inwd );
+      } // now local has working string with initials fixed
+      int wdsz = local.length();
+      int n = 0 ;
+      while((mtsz < maxCodeLen ) && // max code size of 4 works well
+            (n < wdsz ) ){
+        char symb = local.charAt(n) ;
+        // remove duplicate letters except C
+        if(( symb != 'C' ) &&
+           (n > 0 ) && ( local.charAt(n - 1 ) == symb )) n++ ;
+        else{ // not dup
+          switch( symb ){
+            case 'A' : case 'E' : case 'I' : case 'O' : case 'U' :
+              if( n == 0 ) { code.append(symb );mtsz++;
+              }
+              break ; // only use vowel if leading char
+            case 'B' :
+              if( (n > 0 ) &&
+                  !(n + 1 == wdsz ) && // not MB at end of word
+                  ( local.charAt(n - 1) == 'M')) {
+                    code.append(symb);
+                  }
+              else code.append(symb);
+              mtsz++ ;
+              break ;
+            case 'C' : // lots of C special cases
+              /* discard if SCI, SCE or SCY */
+              if( ( n > 0 ) &&
+                  ( local.charAt(n-1) == 'S' ) &&
+                  ( n + 1 < wdsz ) &&
+                  ( frontv.indexOf( local.charAt(n + 1)) >= 0 )){ break ;}
+              tmpS = local.toString();
+              if( tmpS.indexOf("CIA", n ) == n ) { // "CIA" -> X
+                 code.append('X' ); mtsz++; break ;
+              }
+              if( ( n + 1 < wdsz ) &&
+                  (frontv.indexOf( local.charAt(n+1) )>= 0 )){
+                 code.append('S');mtsz++; break ; // CI,CE,CY -> S
+              }
+              if(( n > 0) &&
+                 ( tmpS.indexOf("SCH",n-1 )== n-1 )){ // SCH->sk
+                 code.append('K') ; mtsz++;break ;
+              }
+              if( tmpS.indexOf("CH", n ) == n ){ // detect CH
+                if((n == 0 ) &&
+                   (wdsz >= 3 ) &&    // CH consonant -> K consonant
+                   (vowels.indexOf( local.charAt( 2) ) < 0 )){
+                     code.append('K');
+                }
+                else { code.append('X'); // CHvowel -> X
+                }
+                mtsz++;
+              }
+              else { code.append('K' );mtsz++;
+              }
+              break ;
+            case 'D' :
+              if(( n + 2 < wdsz )&&  // DGE DGI DGY -> J
+                 ( local.charAt(n+1) == 'G' )&&
+                 (frontv.indexOf( local.charAt(n+2) )>= 0)){
+                    code.append('J' ); n += 2 ;
+              }
+              else { code.append( 'T' );
+              }
+              mtsz++;
+              break ;
+            case 'G' : // GH silent at end or before consonant
+              if(( n + 2 == wdsz )&&
+                 (local.charAt(n+1) == 'H' )) break ;
+              if(( n + 2 < wdsz ) &&
+                 (local.charAt(n+1) == 'H' )&&
+                 (vowels.indexOf( local.charAt(n+2)) < 0 )) break ;
+              tmpS = local.toString();
+              if((n > 0) &&
+                 ( tmpS.indexOf("GN", n ) == n)||
+                 ( tmpS.indexOf("GNED",n) == n )) break ; // silent G
+              if(( n > 0 ) &&
+                 (local.charAt(n-1) == 'G')) hard = true ;
+              else hard = false ;
+              if((n+1 < wdsz) &&
+                 (frontv.indexOf( local.charAt(n+1) ) >= 0 )&&
+                 (!hard) ) code.append( 'J' );
+              else code.append('K');
+              mtsz++;
+              break ;
+            case 'H':
+              if( n + 1 == wdsz ) break ; // terminal H
+              if((n > 0) &&
+                 (varson.indexOf( local.charAt(n-1)) >= 0)) break ;
+              if( vowels.indexOf( local.charAt(n+1)) >=0 ){
+                  code.append('H') ; mtsz++;// Hvowel
+              }
+              break;
+            case 'F': case 'J' : case 'L' :
+            case 'M': case 'N' : case 'R' :
+              code.append( symb ); mtsz++; break ;
+            case 'K' :
+              if( n > 0 ){ // not initial
+                if( local.charAt( n -1) != 'C' ) {
+                     code.append(symb );
+                }
+              }
+              else   code.append( symb ); // initial K
+              mtsz++ ;
+              break ;
+            case 'P' :
+              if((n + 1 < wdsz) &&  // PH -> F
+                 (local.charAt( n+1) == 'H'))code.append('F');
+              else code.append( symb );
+              mtsz++;
+              break ;
+            case 'Q' :
+              code.append('K' );mtsz++; break ;
+            case 'S' :
+              tmpS = local.toString();
+              if((tmpS.indexOf("SH", n )== n) ||
+                 (tmpS.indexOf("SIO",n )== n) ||
+                 (tmpS.indexOf("SIA",n )== n)) code.append('X');
+              else code.append( 'S' );
+              mtsz++ ;
+              break ;
+            case 'T' :
+              tmpS = local.toString(); // TIA TIO -> X
+              if((tmpS.indexOf("TIA",n )== n)||
+                 (tmpS.indexOf("TIO",n )== n) ){
+                    code.append('X'); mtsz++; break;
+              }
+              if( tmpS.indexOf("TCH",n )==n) break;
+              // substitute numeral 0 for TH (resembles theta after all)
+              if( tmpS.indexOf("TH", n )==n) code.append('0');
+              else code.append( 'T' );
+              mtsz++ ;
+              break ;
+            case 'V' :
+              code.append('F'); mtsz++;break ;
+            case 'W' : case 'Y' : // silent if not followed by vowel
+              if((n+1 < wdsz) &&
+                 (vowels.indexOf( local.charAt(n+1))>=0)){
+                    code.append( symb );mtsz++;
+              }
+              break ;
+            case 'X' :
+              code.append('K'); code.append('S');mtsz += 2;
+              break ;
+            case 'Z' :
+              code.append('S'); mtsz++; break ;
+          } // end switch
+          n++ ;
+        } // end else from symb != 'C'
+        if( mtsz > 4 )code.setLength( 4);
+      }
+      return code.toString();
+    } // end static method metaPhone()
+    
+    public String encode(String pString) {
+        return( metaphone( pString ) );   
+    }
+    
+    public String encode(byte[] pBytes) {
+        return( metaphone( new String( pBytes ) ) );
+    }
+
+
+    /**
+     * Are the metaphones of two strings the same.
+     */
+    public boolean isMetaphoneEqual(String str1, String str2) {
+        return metaphone(str1).equals(metaphone(str2));
+    }
+
+       /**
+        * Returns the maxCodeLen.
+        * @return int
+        */
+       public int getMaxCodeLen() {
+               return maxCodeLen;
+       }
+
+       /**
+        * Sets the maxCodeLen.
+        * @param maxCodeLen The maxCodeLen to set
+        */
+       public void setMaxCodeLen(int maxCodeLen) {
+               this.maxCodeLen = maxCodeLen;
+       }
+
+}
Index: src/java/org/apache/commons/codec/RefinedSoundex.java
===================================================================
RCS file: 
/home/cvspublic/jakarta-commons-sandbox/codec/src/java/org/apache/commons/codec/RefinedSoundex.java,v
retrieving revision 1.3
diff -u -r1.3 RefinedSoundex.java
--- src/java/org/apache/commons/codec/RefinedSoundex.java       18 Nov 2002 13:00:25 
-0000      1.3
+++ src/java/org/apache/commons/codec/RefinedSoundex.java       1 Dec 2002 22:03:18 
+-0000
@@ -1,3 +1,136 @@
-/* ====================================================================
 * The Apache 
Software License, Version 1.1
 *
 * Copyright (c) 2002 The Apache Software Foundation. 
 All rights
 * reserved.
 *
 * Redistribution and use in source and binary forms, with 
or without
 * modification, are permitted provided that the following conditions
 * 
are met:
 *
 * 1. Redistributions of source code must retain the above copyright
 *    
notice, this list of conditions and the following disclaimer.
 *
 * 2. Redistributions 
in binary form must reproduce the above copyright
 *    notice, this list of 
conditions and the following disclaimer in
 *    the documentation and/or other 
materials provided with the
 *    distribution.
 *
 * 3. The end-user documentation 
included with the redistribution,
 *    if any, must include the following 
acknowledgment:
 *       "This product includes software developed by the
 *        
Apache Software Foundation (http://www.apache.org/)."
 *    Alternately, this 
acknowledgment may appear in the software itself,
 *    if and wherever such 
third-party acknowledgments normally appear.
 *
 * 4. The names "Apache" and "Apache 
Software Foundation" and
 *    "Apache Commons" must not be used to endorse or promote 
products
 *    derived from this software without prior written permission. For
 *    
written permission, please contact [EMAIL PROTECTED]
 *
 * 5. Products derived from 
this software may not be called "Apache",
 *    "Apache Turbine", nor may "Apache" 
appear in their name, without
 *    prior written permission of the Apache Software 
Foundation.
 *
 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
 * 
WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 * OF 
MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 * DISCLAIMED.  IN NO EVENT 
SHALL THE APACHE SOFTWARE FOUNDATION OR
 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, 
INDIRECT, INCIDENTAL,
 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 
NOT
 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
 * USE, DATA, 
OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
 * ON ANY THEORY OF 
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 * OR TORT (INCLUDING NEGLIGENCE OR 
OTHERWISE) ARISING IN ANY WAY OUT
 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF 
THE POSSIBILITY OF
 * SUCH DAMAGE.
 * 
====================================================================
 *
 * This 
software consists of voluntary contributions made by many
 * individuals on behalf of 
the Apache Software Foundation.  For more
 * information on the Apache Software 
Foundation, please see
 * <http://www.apache.org/>.
 */
package 
org.apache.commons.codec;
+/* ====================================================================
+ * The Apache Software License, Version 1.1
+ *
+ * Copyright (c) 2002 The Apache Software Foundation.  All rights
+ * reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * 3. The end-user documentation included with the redistribution,
+ *    if any, must include the following acknowledgment:
+ *       "This product includes software developed by the
+ *        Apache Software Foundation (http://www.apache.org/)."
+ *    Alternately, this acknowledgment may appear in the software itself,
+ *    if and wherever such third-party acknowledgments normally appear.
+ *
+ * 4. The names "Apache" and "Apache Software Foundation" and
+ *    "Apache Commons" must not be used to endorse or promote products
+ *    derived from this software without prior written permission. For
+ *    written permission, please contact [EMAIL PROTECTED]
+ *
+ * 5. Products derived from this software may not be called "Apache",
+ *    "Apache Turbine", nor may "Apache" appear in their name, without
+ *    prior written permission of the Apache Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ * ====================================================================
+ *
+ * This software consists of voluntary contributions made by many
+ * individuals on behalf of the Apache Software Foundation.  For more
+ * information on the Apache Software Foundation, please see
+ * <http://www.apache.org/>.
+ */
+package org.apache.commons.codec;
 
-/**
 * Encodes a string into a soundex value.  Sounde is an encoding used to
 * 
relate similar names, but can also be used as a general purpose
 * scheme to find word 
with similar phonemes. 
 * More information may be found at: 
http://www.bluepoof.com/Soundex/info2.html
 * 
 * @todo Needs internationalisation in 
a future release.
 *
 * @author [EMAIL PROTECTED]
 * @version $Revision: 1.3 $ 
$Date: 2002/11/18 13:00:25 $
 */
public class RefinedSoundex implements Encoder {

    
static public final char[] US_ENGLISH_MAPPING =
        
"01360240043788015936020505".toCharArray();

    static public final RefinedSoundex 
US_ENGLISH = new RefinedSoundex();
    
    private char[] soundexMapping;

    public 
RefinedSoundex() {
        this(US_ENGLISH_MAPPING);
    }

    public 
RefinedSoundex(char[] mapping) {
        this.soundexMapping = mapping;
    }

    /**
     * Get the SoundEx value of a string.
     * This implementation is taken from the 
code-snippers on 
     * http://www.sourceforge.net/
     */
    public String 
soundex(String str) {
        if(null == str || str.length() == 0) { return str; }
    
   
        StringBuffer sBuf = new StringBuffer();        
        str = 
str.toUpperCase();

        sBuf.append( str.charAt(0) );

        char last, mapped, 
current;
        last = '*';

        for( int i = 0; i < str.length(); i++ ) {

      
      current = getMappingCode( str.charAt(i) );
            if( current == last ) {
  
              continue;
            } else if( current != 0 ) {
                
sBuf.append( current );   
            }
            
            last = current;      
       
            
        }
        
        return sBuf.toString();
    }

    
public String encode(String pString) {
        return( soundex( pString ) );   
    }

    /**
     * Used internally by the SoundEx algorithm.
     */
    private char 
getMappingCode(char c) {
        if( !Character.isLetter(c) ) {
            return 0;
 
       } else {
            return soundexMapping[Character.toUpperCase(c) - 'A'];
    
    }
    }
}
\ No newline at end of file
+
+/**
+ * Encodes a string into a soundex value.  Sounde is an encoding used to
+ * relate similar names, but can also be used as a general purpose
+ * scheme to find word with similar phonemes. 
+ * More information may be found at: http://www.bluepoof.com/Soundex/info2.html
+ * 
+ * @todo Needs internationalisation in a future release.
+ *
+ * @author [EMAIL PROTECTED]
+ * @version $Revision: 1.3 $ $Date: 2002/11/18 13:00:25 $
+ */
+public class RefinedSoundex implements Encoder {
+
+    static public final char[] US_ENGLISH_MAPPING =
+        "01360240043788015936020505".toCharArray();
+
+    static public final RefinedSoundex US_ENGLISH = new RefinedSoundex();
+    
+    private char[] soundexMapping;
+
+    public RefinedSoundex() {
+        this(US_ENGLISH_MAPPING);
+    }
+
+    public RefinedSoundex(char[] mapping) {
+        this.soundexMapping = mapping;
+    }
+
+    /**
+     * Get the SoundEx value of a string.
+     * This implementation is taken from the code-snippers on 
+     * http://www.sourceforge.net/
+     */
+    public String soundex(String str) {
+        if(null == str || str.length() == 0) { return str; }
+       
+        StringBuffer sBuf = new StringBuffer();        
+        str = str.toUpperCase();
+
+        sBuf.append( str.charAt(0) );
+
+        char last, mapped, current;
+        last = '*';
+
+        for( int i = 0; i < str.length(); i++ ) {
+
+            current = getMappingCode( str.charAt(i) );
+            if( current == last ) {
+                continue;
+            } else if( current != 0 ) {
+                sBuf.append( current );   
+            }
+            
+            last = current;             
+            
+        }
+        
+        return sBuf.toString();
+    }
+
+    public String encode(String pString) {
+        return( soundex( pString ) );   
+    }
+    
+    public String encode(byte[] pBytes) {
+        return( soundex( new String( pBytes ) ) );
+    }
+
+
+    /**
+     * Used internally by the SoundEx algorithm.
+     */
+    private char getMappingCode(char c) {
+        if( !Character.isLetter(c) ) {
+            return 0;
+        } else {
+            return soundexMapping[Character.toUpperCase(c) - 'A'];
+        }
+    }
+}
Index: src/java/org/apache/commons/codec/Soundex.java
===================================================================
RCS file: 
/home/cvspublic/jakarta-commons-sandbox/codec/src/java/org/apache/commons/codec/Soundex.java,v
retrieving revision 1.4
diff -u -r1.4 Soundex.java
--- src/java/org/apache/commons/codec/Soundex.java      18 Nov 2002 13:00:26 -0000     
 1.4
+++ src/java/org/apache/commons/codec/Soundex.java      1 Dec 2002 22:03:16 -0000
@@ -1,4 +1,149 @@
-/* ====================================================================
 * The Apache 
Software License, Version 1.1
 *
 * Copyright (c) 2001-2002 The Apache Software 
Foundation.  All rights
 * reserved.
 *
 * Redistribution and use in source and binary 
forms, with or without
 * modification, are permitted provided that the following 
conditions
 * are met:
 *
 * 1. Redistributions of source code must retain the above 
copyright
 *    notice, this list of conditions and the following disclaimer.
 *
 * 2. 
Redistributions in binary form must reproduce the above copyright
 *    notice, this 
list of conditions and the following disclaimer in
 *    the documentation and/or 
other materials provided with the
 *    distribution.
 *
 * 3. The end-user 
documentation included with the redistribution,
 *    if any, must include the 
following acknowledgment:
 *       "This product includes software developed by the
 * 
       Apache Software Foundation (http://www.apache.org/)."
 *    Alternately, this 
acknowledgment may appear in the software itself,
 *    if and wherever such 
third-party acknowledgments normally appear.
 *
 * 4. The names "Apache" and "Apache 
Software Foundation" and
 *    "Apache Commons" must not be used to endorse or promote 
products
 *    derived from this software without prior written permission. For
 *    
written permission, please contact [EMAIL PROTECTED]
 *
 * 5. Products derived from 
this software may not be called "Apache",
 *    "Apache Turbine", nor may "Apache" 
appear in their name, without
 *    prior written permission of the Apache Software 
Foundation.
 *
 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
 * 
WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 * OF 
MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 * DISCLAIMED.  IN NO EVENT 
SHALL THE APACHE SOFTWARE FOUNDATION OR
 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, 
INDIRECT, INCIDENTAL,
 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 
NOT
 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
 * USE, DATA, 
OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
 * ON ANY THEORY OF 
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 * OR TORT (INCLUDING NEGLIGENCE OR 
OTHERWISE) ARISING IN ANY WAY OUT
 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF 
THE POSSIBILITY OF
 * SUCH DAMAGE.
 * 
====================================================================
 *
 * This 
software consists of voluntary contributions made by many
 * individuals on behalf of 
the Apache Software Foundation.  For more
 * information on the Apache Software 
Foundation, please see
 * <http://www.apache.org/>.
 */
package 
org.apache.commons.codec;
-/**
 * Encodes a string into a refined soundex value.  
 * A refined soundex code is 
optimized for spell checking word. 
 * "Soundex" method originally developed by 
Margaret Odell and 
 *          Robert Russell
 * 
 * 
http://www.bluepoof.com/Soundex/info2.html
 * 
 * @todo Needs internationalisation in 
a future release.
 *
 * @author [EMAIL PROTECTED]
 * @author 
[EMAIL PROTECTED]
 * @version $Revision: 1.4 $ $Date: 2002/11/18 13:00:26 $
 
*/
public class Soundex implements Encoder {

    static public final char[] 
US_ENGLISH_MAPPING =
        "01230120022455012623010202".toCharArray();

    static 
public final Soundex US_ENGLISH = new Soundex();
    
    private char[] 
soundexMapping;
    private int maxLength = 4;-   public Soundex() {
        
this(US_ENGLISH_MAPPING);
    }
-    public Soundex(char[] mapping) {
        this.soundexMapping = mapping;
    }

   
 /**
     * Get the SoundEx value of a string.
     * This implementation is taken 
from the code-snippers on 
     * http://www.sourceforge.net/
     */
    public 
String soundex(String str) {
        if(null == str || str.length() == 0) { return 
str; }
        
        char out[] = { '0', '0', '0', '0' };
        char last, 
mapped;
        int incount = 1, count = 1;
        out[0] = Character.toUpperCase( 
str.charAt(0) );
        last = getMappingCode( str.charAt(0) );
        while( 
(incount < str.length() ) && 
               (mapped = 
getMappingCode(str.charAt(incount++))) != 0 &&
               (count < maxLength) )
   
     {
            if( (mapped != '0') && (mapped != last) ) {
                
out[count++] = mapped;
            }
            last = mapped;
        }
        
return new String(out);
    }

    public String encode(String pString) {
        
return( soundex( pString ) );   
    }

    /**
     * Used internally by the SoundEx 
algorithm.
     */
    private char getMappingCode(char c) {
        if( 
!Character.isLetter(c) ) {
            return 0;
        } else {
            return 
soundexMapping[Character.toUpperCase(c) - 'A'];
        }
    }

        /**
     * 
Returns the maxLength.  Standard Soundex
     * @return int
  */
    public int 
getMaxLength() {
            return maxLength;
      }

     /**
     * Sets the 
maxLength.
  * @param maxLength The maxLength to set
        */
    public void 
setMaxLength(int maxLength) {
              this.maxLength = maxLength;
    }

}
\ No newline at end of file
+/* ====================================================================
+ * The Apache Software License, Version 1.1
+ *
+ * Copyright (c) 2001-2002 The Apache Software Foundation.  All rights
+ * reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * 3. The end-user documentation included with the redistribution,
+ *    if any, must include the following acknowledgment:
+ *       "This product includes software developed by the
+ *        Apache Software Foundation (http://www.apache.org/)."
+ *    Alternately, this acknowledgment may appear in the software itself,
+ *    if and wherever such third-party acknowledgments normally appear.
+ *
+ * 4. The names "Apache" and "Apache Software Foundation" and
+ *    "Apache Commons" must not be used to endorse or promote products
+ *    derived from this software without prior written permission. For
+ *    written permission, please contact [EMAIL PROTECTED]
+ *
+ * 5. Products derived from this software may not be called "Apache",
+ *    "Apache Turbine", nor may "Apache" appear in their name, without
+ *    prior written permission of the Apache Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ * ====================================================================
+ *
+ * This software consists of voluntary contributions made by many
+ * individuals on behalf of the Apache Software Foundation.  For more
+ * information on the Apache Software Foundation, please see
+ * <http://www.apache.org/>.
+ */
+package org.apache.commons.codec;
+
+/**
+ * Encodes a string into a refined soundex value.  
+ * A refined soundex code is optimized for spell checking word. 
+ * "Soundex" method originally developed by Margaret Odell and 
+ *          Robert Russell
+ * 
+ * http://www.bluepoof.com/Soundex/info2.html
+ * 
+ * @todo Needs internationalisation in a future release.
+ *
+ * @author [EMAIL PROTECTED]
+ * @author [EMAIL PROTECTED]
+ * @version $Revision: 1.4 $ $Date: 2002/11/18 13:00:26 $
+ */
+public class Soundex implements Encoder {
+
+    static public final char[] US_ENGLISH_MAPPING =
+        "01230120022455012623010202".toCharArray();
+
+    static public final Soundex US_ENGLISH = new Soundex();
+    
+    private char[] soundexMapping;
+    private int maxLength = 4;
+
+
+   public Soundex() {
+        this(US_ENGLISH_MAPPING);
+    }
+
+    public Soundex(char[] mapping) {
+        this.soundexMapping = mapping;
+    }
+
+    /**
+     * Get the SoundEx value of a string.
+     * This implementation is taken from the code-snippers on 
+     * http://www.sourceforge.net/
+     */
+    public String soundex(String str) {
+        if(null == str || str.length() == 0) { return str; }
+        
+        char out[] = { '0', '0', '0', '0' };
+        char last, mapped;
+        int incount = 1, count = 1;
+        out[0] = Character.toUpperCase( str.charAt(0) );
+        last = getMappingCode( str.charAt(0) );
+        while( (incount < str.length() ) && 
+               (mapped = getMappingCode(str.charAt(incount++))) != 0 &&
+               (count < maxLength) )
+        {
+            if( (mapped != '0') && (mapped != last) ) {
+                out[count++] = mapped;
+            }
+            last = mapped;
+        }
+        return new String(out);
+    }
+
+    public String encode(String pString) {
+        return( soundex( pString ) );   
+    }
+    
+    public String encode(byte[] pBytes) {
+        return( soundex( new String( pBytes ) ) );
+    }
+
+    /**
+     * Used internally by the SoundEx algorithm.
+     */
+    private char getMappingCode(char c) {
+        if( !Character.isLetter(c) ) {
+            return 0;
+        } else {
+            return soundexMapping[Character.toUpperCase(c) - 'A'];
+        }
+    }
+
+       /**
+        * Returns the maxLength.  Standard Soundex
+        * @return int
+        */
+       public int getMaxLength() {
+               return maxLength;
+       }
+
+       /**
+        * Sets the maxLength.
+        * @param maxLength The maxLength to set
+        */
+       public void setMaxLength(int maxLength) {
+               this.maxLength = maxLength;
+       }
+
+}
Index: src/test/org/apache/commons/codec/TestAll.java
===================================================================
RCS file: 
/home/cvspublic/jakarta-commons-sandbox/codec/src/test/org/apache/commons/codec/TestAll.java,v
retrieving revision 1.2
diff -u -r1.2 TestAll.java
--- src/test/org/apache/commons/codec/TestAll.java      18 Nov 2002 13:00:26 -0000     
 1.2
+++ src/test/org/apache/commons/codec/TestAll.java      1 Dec 2002 22:03:22 -0000
@@ -78,6 +78,7 @@
     public static Test suite() {
         TestSuite suite = new TestSuite();
         suite.addTest(org.apache.commons.codec.base64.TestAll.suite());
+        suite.addTest(TestHex.suite());
         suite.addTest(TestMetaphone.suite());
         suite.addTest(TestSoundex.suite());
         suite.addTest(TestRefinedSoundex.suite());
Index: src/test/org/apache/commons/codec/TestEncoder.java
===================================================================
RCS file: 
/home/cvspublic/jakarta-commons-sandbox/codec/src/test/org/apache/commons/codec/TestEncoder.java,v
retrieving revision 1.1
diff -u -r1.1 TestEncoder.java
--- src/test/org/apache/commons/codec/TestEncoder.java  18 Nov 2002 13:00:26 -0000     
 1.1
+++ src/test/org/apache/commons/codec/TestEncoder.java  1 Dec 2002 22:03:20 -0000
@@ -85,6 +85,7 @@
 
     public void testEncodeNull() {
         Encoder encoder = makeEncoder();
-        encoder.encode(null);
+        String nullStr = null;
+        encoder.encode(nullStr);
     }        
 }
Index: src/test/org/apache/commons/codec/TestHex.java
===================================================================
RCS file: src/test/org/apache/commons/codec/TestHex.java
diff -N src/test/org/apache/commons/codec/TestHex.java
--- /dev/null   1 Jan 1970 00:00:00 -0000
+++ src/test/org/apache/commons/codec/TestHex.java      1 Dec 2002 22:03:22 -0000
@@ -0,0 +1,122 @@
+/*
+ * $Header: 
+/home/cvspublic/jakarta-commons-sandbox/codec/src/test/org/apache/commons/codec/TestMetaphone.java,v
+ 1.2 2002/11/18 13:00:26 rwaldhoff Exp $
+ * $Revision: 1.2 $
+ * $Date: 2002/11/18 13:00:26 $
+ *
+ * ====================================================================
+ *
+ * The Apache Software License, Version 1.1
+ *
+ * Copyright (c) 2002 The Apache Software Foundation.  All rights
+ * reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * 3. The end-user documentation included with the redistribution, if
+ *    any, must include the following acknowlegement:
+ *       "This product includes software developed by the
+ *        Apache Software Foundation (http://www.apache.org/)."
+ *    Alternately, this acknowlegement may appear in the software itself,
+ *    if and wherever such third-party acknowlegements normally appear.
+ *
+ * 4. The names "The Jakarta Project", "Commons", and "Apache Software
+ *    Foundation" must not be used to endorse or promote products derived
+ *    from this software without prior written permission. For written
+ *    permission, please contact [EMAIL PROTECTED]
+ *
+ * 5. Products derived from this software may not be called "Apache"
+ *    nor may "Apache" appear in their names without prior written
+ *    permission of the Apache Group.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ * ====================================================================
+ *
+ * This software consists of voluntary contributions made by many
+ * individuals on behalf of the Apache Software Foundation.  For more
+ * information on the Apache Software Foundation, please see
+ * <http://www.apache.org/>.
+ *
+ */
+package org.apache.commons.codec;
+
+import junit.framework.Test;
+import junit.framework.TestCase;
+import junit.framework.TestSuite;
+
+/**
+ * @version $Revision: 1.2 $ $Date: 2002/11/18 13:00:26 $
+ * @author Rodney Waldhoff
+ */
+public class TestHex extends TestEncoder {
+
+    public TestHex(String name) {
+        super(name);
+    }
+
+    public static Test suite() {
+        return (new TestSuite(TestHex.class));
+    }
+
+    public void setUp() throws Exception {        
+        super.setUp();
+        _hex = new Hex();
+    }
+
+    public void tearDown() throws Exception {
+        super.tearDown();
+        _hex = null;
+    }
+
+    protected Encoder makeEncoder() {
+        return new Hex();
+    }
+    
+    // ------------------------------------------------------------------------
+
+    public void testHex() {
+        
+        byte[] b1a = { (byte) 0x00, 
+                        (byte) 0x00, 
+                        (byte) 0x00 };
+        assertEquals("000000",_hex.encode(b1a));
+
+        byte[] b2a = { (byte) 0x00, 
+                        (byte) 0x00, 
+                        (byte) 0x01 };
+        assertEquals("000001",_hex.encode(b2a));
+
+        byte[] b3a = { (byte) 0xFF, 
+                        (byte) 0xFF, 
+                        (byte) 0xFF };
+        assertEquals("ffffff",_hex.encode(b3a));
+
+        byte[] b4a = { (byte) 0xCD, 
+                        (byte) 0xBB, 
+                        (byte) 0x35 };
+        assertEquals("cdbb35",_hex.encode(b4a));
+        
+    }
+
+    private Hex _hex = null;
+}


--
To unsubscribe, e-mail:   <mailto:[EMAIL PROTECTED]>
For additional commands, e-mail: <mailto:[EMAIL PROTECTED]>

Reply via email to