language Metaphone.java RefinedSoundex.java Soundex.java

tobrien Fri, 30 May 2003 09:10:50 -0700

tobrien     2003/05/29 16:03:29

  Modified:    codec    checkstyle.properties project.properties
               codec/src/java/org/apache/commons/codec/base64 Base64.java
               codec/src/java/org/apache/commons/codec/binary Base64.java
                        Hex.java
               codec/src/java/org/apache/commons/codec/language
                        Metaphone.java RefinedSoundex.java Soundex.java
  Log:
  Fixed a number of checkstyle problems - from around 270 checkstyle issues to 16
  
  Revision  Changes    Path
  1.2       +15 -10    jakarta-commons/codec/checkstyle.properties
  
  Index: checkstyle.properties
  ===================================================================
  RCS file: /home/cvs/jakarta-commons/codec/checkstyle.properties,v
  retrieving revision 1.1
  retrieving revision 1.2
  diff -u -r1.1 -r1.2
  --- checkstyle.properties     25 Apr 2003 17:50:55 -0000      1.1
  +++ checkstyle.properties     29 May 2003 23:03:28 -0000      1.2
  @@ -1,14 +1,19 @@
  -#checkstyle.header.file=LICENSE.txt
  +checkstyle.header.file=LICENSE.txt
   # 2-5 = CVS Header in Commons license, 10 = copyright date, 32 = product name
  -checkstyle.header.ignoreline=2,3,4,5,10,32
  +#checkstyle.header.ignoreline=2,3,4,5,10,32
   
  -checkstyle.ignore.maxlinelen=2
  +# Ignore operator wrap, this has the effect of allowing
  +# operators to appear at both the eol and the nl.  This
  +# setting should be eol, but checkstyle had problems 
  +# parsing this property when set to "eol".  "ignore"
  +# was selected as a fallback.
  +checkstyle.wrap.operator = ignore
  +
  +# Ignore padding around parenthese, this allows for both
  +# foo(a,b), and foo( a, b ).
  +checkstyle.paren.pad = ignore
  +
  +# One should not be instantiating a java.lang.Boolean
  +checkstyle.illegal.instantiations = java.lang.Boolean
   
  -checkstyle.excludes=**/parser/*
  -checkstyle.lcurly.type=eol
  -checkstyle.lcurly.method=nlow
  -checkstyle.lcurly.other=eol
  -checkstyle.rcurly=alone
  -checkstyle.javadoc.scope=nothing
   
  -checkstyle.allow.protected=true
  
  
  
  1.2       +0 -1      jakarta-commons/codec/project.properties
  
  Index: project.properties
  ===================================================================
  RCS file: /home/cvs/jakarta-commons/codec/project.properties,v
  retrieving revision 1.1
  retrieving revision 1.2
  diff -u -r1.1 -r1.2
  --- project.properties        25 Apr 2003 17:50:55 -0000      1.1
  +++ project.properties        29 May 2003 23:03:28 -0000      1.2
  @@ -3,7 +3,6 @@
   ##
   
   maven.checkstyle.properties=${basedir}/checkstyle.properties 
  -maven.checkstyle.excludes=**/parser/*
   maven.test.failure = false
   maven.junit.fork=true
   maven.linkcheck.enable=true 
  
  
  
  1.2       +95 -39    
jakarta-commons/codec/src/java/org/apache/commons/codec/base64/Base64.java
  
  Index: Base64.java
  ===================================================================
  RCS file: 
/home/cvs/jakarta-commons/codec/src/java/org/apache/commons/codec/base64/Base64.java,v
  retrieving revision 1.1
  retrieving revision 1.2
  diff -u -r1.1 -r1.2
  --- Base64.java       25 Apr 2003 17:50:56 -0000      1.1
  +++ Base64.java       29 May 2003 23:03:28 -0000      1.2
  @@ -84,17 +84,56 @@
   public final class Base64 {
   
       protected static final String DEFAULT_CHAR_ENCODING = "ISO-8859-1";
  -    private static final int BASELENGTH = 255;
  -    private static final int LOOKUPLENGTH = 64;
  -    private static final int TWENTYFOURBITGROUP = 24;
  -    private static final int EIGHTBIT = 8;
  -    private static final int SIXTEENBIT = 16;
  -    private static final int SIXBIT = 6;
  -    private static final int FOURBYTE = 4;
  -    private static final int SIGN = -128;
  -    private static final byte PAD = (byte) '=';
  -    private static final byte[] EMPTY_BYTE_ARRAY = new byte[0];
  +
  +    /**
  +     * The bsae length
  +     */
  +    static final int BASELENGTH = 255;
  +
  +    /**
  +     * Lookup length
  +     */
  +    static final int LOOKUPLENGTH = 64;
  +
  +    /**
  +     * Used to calculate the number of bits in a byte.
  +     */
  +    static final int EIGHTBIT = 8;
  +
  +    /**
  +     * Used when encoding something which has fewer than 24 bits
  +     */
  +    static final int SIXTEENBIT = 16;
  +
  +    /**
  +     * Constant used to determine how many bits data contains
  +     */
  +    static final int TWENTYFOURBITGROUP = 24;
  +
  +    /**
  +     * Used to get the number of Quadruples
  +     */
  +    static final int FOURBYTE = 4;
  +
  +    /**
  +     * Used to test the sign of a byte
  +     */
  +    static final int SIGN = -128;
  +    
  +    /**
  +     * Byte used to pad output
  +     */
  +    static final byte PAD = (byte) '=';
  +
  +    // Create arrays to hold the base64 characters and a 
  +    // lookup for base64 chars
       private static byte[] base64Alphabet = new byte[BASELENGTH];
  +
  +    private static final byte[] EMPTY_BYTE_ARRAY = new byte[0];
  +    
  +    /**
  +     * Lookup table
  +     */
       private static byte[] lookUpBase64Alphabet = new byte[LOOKUPLENGTH];
   
       static {
  @@ -133,17 +172,37 @@
   
       }
   
  +    /**
  +     * Tests to see whether the bytes of this string are
  +     * Base64
  +     *
  +     * @param isValidString String to test
  +     * @return trus if String is base64
  +     */
       public static boolean isBase64(String isValidString) {
           return (isBase64(isValidString.getBytes()));
       }
   
  -
  +    /**
  +     * Tests a byte to see whether it falls within the Base64
  +     * alphabet (or if it is a padding character).
  +     *
  +     * @param octect byte to test
  +     * @return true if byte is in alphabet or padding
  +     */
       public static boolean isBase64(byte octect) {
           // Should we ignore white space?
           return (octect == PAD || base64Alphabet[octect] != -1);
       }
   
  -
  +    /**
  +     * Tests byte array to see if all characters are within the
  +     * Base64 alphabet
  +     *
  +     * @param arrayOctect A byte[] to test
  +     * @return true if all data falls within the Base64 alphabet OR if the
  +     *         array is empty.
  +     */
       public static boolean isBase64(byte[] arrayOctect) {
           int length = arrayOctect.length;
           if (length == 0) {
  @@ -177,8 +236,7 @@
           if (fewerThan24bits != 0) {
               //data not divisible by 24 bit
               encodedData = new byte[(numberTriplets + 1) * 4];
  -        }
  -        else {
  +        } else {
               // 16 or 8 bit 
               encodedData = new byte[numberTriplets * 4];
           }
  @@ -212,8 +270,10 @@
                   : (byte) ((b3) >> 6 ^ 0xfc);
   
               encodedData[encodedIndex] = lookUpBase64Alphabet[val1];
  -            encodedData[encodedIndex + 1] = lookUpBase64Alphabet[val2 | (k << 4)];
  -            encodedData[encodedIndex + 2] = lookUpBase64Alphabet[(l << 2) | val3];
  +            encodedData[encodedIndex + 1] = lookUpBase64Alphabet[val2 
  +                                                                 | (k << 4)];
  +            encodedData[encodedIndex + 2] = lookUpBase64Alphabet[(l << 2) 
  +                                                                 | val3];
               encodedData[encodedIndex + 3] = lookUpBase64Alphabet[b3 & 0x3f];
           }
   
  @@ -231,8 +291,7 @@
               encodedData[encodedIndex + 1] = lookUpBase64Alphabet[k << 4];
               encodedData[encodedIndex + 2] = PAD;
               encodedData[encodedIndex + 3] = PAD;
  -        } 
  -        else if (fewerThan24bits == SIXTEENBIT) {
  +        } else if (fewerThan24bits == SIXTEENBIT) {
               b1 = binaryData[dataIndex];
               b2 = binaryData[dataIndex + 1];
               l = (byte) (b2 & 0x0f);
  @@ -247,7 +306,8 @@
                   : (byte) ((b2) >> 4 ^ 0xf0);
   
               encodedData[encodedIndex] = lookUpBase64Alphabet[val1];
  -            encodedData[encodedIndex + 1] = lookUpBase64Alphabet[val2 | (k << 4)];
  +            encodedData[encodedIndex + 1] = lookUpBase64Alphabet[val2 
  +                                                                 | (k << 4)];
               encodedData[encodedIndex + 2] = lookUpBase64Alphabet[l << 2];
               encodedData[encodedIndex + 3] = PAD;
           }
  @@ -266,8 +326,7 @@
       public static String encode(String data) {
            try {
                return encode(data, DEFAULT_CHAR_ENCODING);
  -         }
  -         catch (UnsupportedEncodingException uee) {
  +         } catch (UnsupportedEncodingException uee) {
                throw new IllegalStateException(uee.toString());
            }
        }
  @@ -281,12 +340,11 @@
        *
        * @param data String of data to convert
        * @param charEncoding the character encoding to use when converting
  -     * a String to a byte[]
  +     *                     a String to a byte[]
        * @return Base64-encoded String
        */
       public static String encode(String data, String charEncoding)
  -        throws UnsupportedEncodingException 
  -    {
  +        throws UnsupportedEncodingException {
   
           // Check arguments
           if (data == null) {
  @@ -301,8 +359,7 @@
           OutputStreamWriter osw = new OutputStreamWriter(bos, charEncoding);
           try {
               osw.write(data);
  -        }
  -        catch (IOException ioe) {
  +        } catch (IOException ioe) {
               throw new RuntimeException(ioe.toString());
           }
   
  @@ -316,8 +373,7 @@
           bos = new ByteArrayOutputStream(encodedData.length);
           try {
               bos.write(encodedData);
  -        }
  -        catch (IOException ioe) {
  +        } catch (IOException ioe) {
               throw new RuntimeException(ioe.toString());
           }
   
  @@ -327,7 +383,7 @@
       /**
        * Decodes Base64 data into octects
        *
  -     * @param binaryData Byte array containing Base64 data
  +     * @param base64Data Byte array containing Base64 data
        * @return Array containing decoded data.
        */
       public static byte[] decode(byte[] base64Data) {
  @@ -370,19 +426,19 @@
                   b4 = base64Alphabet[marker1];
   
                   decodedData[encodedIndex] = (byte) (b1 << 2 | b2 >> 4);
  -                decodedData[encodedIndex + 1] = (byte) (((b2 & 0xf) << 4) | ((b3 >> 
2) & 0xf));
  +                decodedData[encodedIndex + 1] = (byte) (((b2 & 0xf) << 4) 
  +                                                        | ((b3 >> 2) & 0xf));
                   decodedData[encodedIndex + 2] = (byte) (b3 << 6 | b4);
  -            } 
  -            else if (marker0 == PAD) {
  +            } else if (marker0 == PAD) {
                   //Two PAD e.g. 3c[Pad][Pad]
                   decodedData[encodedIndex] = (byte) (b1 << 2 | b2 >> 4);
  -            } 
  -            else if (marker1 == PAD) {
  +            } else if (marker1 == PAD) {
                   //One PAD e.g. 3cQ[Pad]
                   b3 = base64Alphabet[marker0];
   
                   decodedData[encodedIndex] = (byte) (b1 << 2 | b2 >> 4);
  -                decodedData[encodedIndex + 1] = (byte) (((b2 & 0xf) << 4) | ((b3 >> 
2) & 0xf));
  +                decodedData[encodedIndex + 1] = (byte) (((b2 & 0xf) << 4) 
  +                                                        | ((b3 >> 2) & 0xf));
               }
               encodedIndex += 3;
           }
  
  
  
  1.4       +153 -63   
jakarta-commons/codec/src/java/org/apache/commons/codec/binary/Base64.java
  
  Index: Base64.java
  ===================================================================
  RCS file: 
/home/cvs/jakarta-commons/codec/src/java/org/apache/commons/codec/binary/Base64.java,v
  retrieving revision 1.3
  retrieving revision 1.4
  diff -u -r1.3 -r1.4
  --- Base64.java       14 May 2003 02:40:18 -0000      1.3
  +++ Base64.java       29 May 2003 23:03:28 -0000      1.4
  @@ -78,25 +78,60 @@
     * @author <a href="[EMAIL PROTECTED]">Daniel Rall</a>
     * @author <a href="[EMAIL PROTECTED]">Martin Redington</a>
     * @author <a href="mailto:[EMAIL PROTECTED]">Gary Gregory</a>
  +  * @author <a href="mailto:[EMAIL PROTECTED]">Tim O'Brien</a>
     * @since 1.0-dev
  -  *
  -  * @todo Add more documentation
     */
   public class Base64 implements BinaryEncoder, BinaryDecoder {
   
  -    // Create constants pertaining to the chunk requirement
  +    /**
  +     * Chunk size according to RFC 2045
  +     */
       static final int CHUNK_SIZE = 76;
  +
  +    /**
  +     * Chunk separator, we use a newline to separate chunks
  +     * of encoded data (if you ask for it to be chunked)
  +     */
       static final byte[] CHUNK_SEPARATOR = "\n".getBytes();
   
  -    // Create numerical and byte constants 
  +    /**
  +     * The bsae length
  +     */
       static final int BASELENGTH = 255;
  +
  +    /**
  +     * Lookup length
  +     */
       static final int LOOKUPLENGTH = 64;
  -    static final int TWENTYFOURBITGROUP = 24;
  +
  +    /**
  +     * Used to calculate the number of bits in a byte.
  +     */
       static final int EIGHTBIT = 8;
  +
  +    /**
  +     * Used when encoding something which has fewer than 24 bits
  +     */
       static final int SIXTEENBIT = 16;
  -    static final int SIXBIT = 6;
  +
  +    /**
  +     * Constant used to determine how many bits data contains
  +     */
  +    static final int TWENTYFOURBITGROUP = 24;
  +
  +    /**
  +     * Used to get the number of Quadruples
  +     */
       static final int FOURBYTE = 4;
  +
  +    /**
  +     * Used to test the sign of a byte
  +     */
       static final int SIGN = -128;
  +    
  +    /**
  +     * Byte used to pad output
  +     */
       static final byte PAD = (byte) '=';
   
       // Create arrays to hold the base64 characters and a 
  @@ -141,15 +176,21 @@
       private static boolean isBase64(byte octect) {
           if (octect == PAD) {
               return true;
  -        } 
  -        else if (base64Alphabet[octect] == -1) {
  +        } else if (base64Alphabet[octect] == -1) {
               return false;
  -        } 
  -        else {
  +        } else {
               return true;
           }
       }
   
  +    /**
  +     * This array tests a given byte array to see if it contains
  +     * only valid characters within the Base64 alphabet.
  +     *
  +     * @param arrayOctect byte array to test
  +     * @return true if all bytes are valid characters in the Base64
  +     *         alphabet or if the byte array is empty; false, otherwise
  +     */
       public static boolean isArrayByteBase64(byte[] arrayOctect) {
   
           arrayOctect = discardWhitespace(arrayOctect);
  @@ -168,15 +209,41 @@
           return true;
       }
   
  -
  +    /**
  +     * Encodes binary data using the base64 algorithm (this
  +     * does not "chunk" the output).
  +     *
  +     * @param binaryData binary data to encode
  +     * @return Base64 characters
  +     */
       public static byte[] encodeBase64(byte[] binaryData) {
           return (encodeBase64(binaryData, false));
       }
   
  +    /**
  +     * Encodes binary data using the base64 algorithm and chunks
  +     * the encoded output into 76 character blocks
  +     *
  +     * @param binaryData binary data to encode
  +     * @return Base64 characters chunked in 76 character blocks
  +     */
       public static byte[] encodeBase64Chunked(byte[] binaryData) {
           return (encodeBase64(binaryData, true));
       }
   
  +
  +    /**
  +     * Decodes an Object using the base64 algorithm.  This method
  +     * is provided in order to satisfy the requirements of the
  +     * Decoder interface, and will throw a DecoderException if the
  +     * supplied object is not of type byte[].
  +     *
  +     * @param pObject Object to decode
  +     * @return An object (of type byte[]) containing the 
  +     *         binary data which corresponds to the byte[] supplied.
  +     * @throws DecoderException if the parameter supplied is not
  +     *                          of type byte[]
  +     */
       public Object decode(Object pObject) throws DecoderException {
   
           Object result;
  @@ -186,8 +253,7 @@
                   "Parameter supplied to "
                       + "Base64 "
                       + "decode is not a byte[]");
  -        } 
  -        else {
  +        } else {
               result = decode((byte[]) pObject);
           }
   
  @@ -195,6 +261,15 @@
   
       }
   
  +    /**
  +     * Decodes a byte[] containing containing
  +     * characters in the Base64 alphabet.
  +     *
  +     * @param pArray A byte array containing Base64 character data
  +     * @return a byte array containing binary data
  +     * @throws DecoderException if there is an Decoder specific exception
  +     *                          during the decoding process
  +     */
       public byte[] decode(byte[] pArray) throws DecoderException {
           byte[] result;
           result = decodeBase64((byte[]) pArray);
  @@ -205,6 +280,8 @@
        * Encodes hex octects into Base64.
        *
        * @param binaryData Array containing binary data to encode.
  +     * @param isChunked if isChunked is true this encoder will chunk
  +     *                  the base64 output into 76 character blocks
        * @return Base64-encoded data.
        */
       public static byte[] encodeBase64(byte[] binaryData, boolean isChunked) {
  @@ -218,8 +295,7 @@
           if (fewerThan24bits != 0) {
               //data not divisible by 24 bit
               encodedDataLength = (numberTriplets + 1) * 4;
  -        } 
  -        else {
  +        } else {
               // 16 or 8 bit
               encodedDataLength = numberTriplets * 4;
           }
  @@ -318,8 +394,7 @@
               encodedData[encodedIndex + 1] = lookUpBase64Alphabet[k << 4];
               encodedData[encodedIndex + 2] = PAD;
               encodedData[encodedIndex + 3] = PAD;
  -        } 
  -        else if (fewerThan24bits == SIXTEENBIT) {
  +        } else if (fewerThan24bits == SIXTEENBIT) {
   
               b1 = binaryData[dataIndex];
               b2 = binaryData[dataIndex + 1];
  @@ -360,7 +435,7 @@
       /**
        * Decodes Base64 data into octects
        *
  -     * @param binaryData Byte array containing Base64 data
  +     * @param base64Data Byte array containing Base64 data
        * @return Array containing decoded data.
        */
       public static byte[] decodeBase64(byte[] base64Data) {
  @@ -391,33 +466,31 @@
               }
               decodedData = new byte[lastData - numberQuadruple];
           }
  -
  +        
           for (int i = 0; i < numberQuadruple; i++) {
               dataIndex = i * 4;
               marker0 = base64Data[dataIndex + 2];
               marker1 = base64Data[dataIndex + 3];
  -
  +            
               b1 = base64Alphabet[base64Data[dataIndex]];
               b2 = base64Alphabet[base64Data[dataIndex + 1]];
  -
  +            
               if (marker0 != PAD && marker1 != PAD) {
                   //No PAD e.g 3cQl
                   b3 = base64Alphabet[marker0];
                   b4 = base64Alphabet[marker1];
  -
  +                
                   decodedData[encodedIndex] = (byte) (b1 << 2 | b2 >> 4);
                   decodedData[encodedIndex + 1] =
                       (byte) (((b2 & 0xf) << 4) | ((b3 >> 2) & 0xf));
                   decodedData[encodedIndex + 2] = (byte) (b3 << 6 | b4);
  -            } 
  -            else if (marker0 == PAD) {
  +            } else if (marker0 == PAD) {
                   //Two PAD e.g. 3c[Pad][Pad]
                   decodedData[encodedIndex] = (byte) (b1 << 2 | b2 >> 4);
  -            } 
  -            else if (marker1 == PAD) {
  +            } else if (marker1 == PAD) {
                   //One PAD e.g. 3cQ[Pad]
                   b3 = base64Alphabet[marker0];
  -
  +                
                   decodedData[encodedIndex] = (byte) (b1 << 2 | b2 >> 4);
                   decodedData[encodedIndex + 1] =
                       (byte) (((b2 & 0xf) << 4) | ((b3 >> 2) & 0xf));
  @@ -426,7 +499,7 @@
           }
           return decodedData;
       }
  -
  +    
       /**
        * Discards any whitespace from a base-64 encoded block.
        *
  @@ -437,15 +510,15 @@
       static byte[] discardWhitespace(byte[] data) {
           byte groomedData[] = new byte[data.length];
           int bytesCopied = 0;
  -
  +        
           for (int i = 0; i < data.length; i++) {
               switch (data[i]) {
  -                case (byte) ' ' :
  -                case (byte) '\n' :
  -                case (byte) '\r' :
  -                case (byte) '\t' :
  +            case (byte) ' ' :
  +            case (byte) '\n' :
  +            case (byte) '\r' :
  +            case (byte) '\t' :
                       break;
  -                default:
  +            default:
                       groomedData[bytesCopied++] = data[i];
               }
           }
  @@ -456,38 +529,47 @@
   
           return packedData;
       }
  -    
  -     /**
  -      * Discards any characters outside of the base64 alphabet, per
  -      * the requirements on page 25 of RFC 2045 - "Any characters
  -      * outside of the base64 alphabet are to be ignored in base64
  -      * encoded data."
  -      *
  -      * @param data The base-64 encoded data to groom
  -      * @return The data, less non-base64 characters (see RFC 2045).
  -      */
  -     static byte[] discardNonBase64(byte[] data) {
  -             byte groomedData[] = new byte[data.length];
  -             int bytesCopied = 0;
  -
  -             for (int i = 0; i < data.length; i++) {
  -                     if( isBase64(data[i]) ) {
  -                       groomedData[bytesCopied++] = data[i];
  -                     }
  -             }
   
  -             byte packedData[] = new byte[bytesCopied];
  +    /**
  +     * Discards any characters outside of the base64 alphabet, per
  +     * the requirements on page 25 of RFC 2045 - "Any characters
  +     * outside of the base64 alphabet are to be ignored in base64
  +     * encoded data."
  +     *
  +     * @param data The base-64 encoded data to groom
  +     * @return The data, less non-base64 characters (see RFC 2045).
  +     */
  +    static byte[] discardNonBase64(byte[] data) {
  +        byte groomedData[] = new byte[data.length];
  +        int bytesCopied = 0;
  +
  +        for (int i = 0; i < data.length; i++) {
  +            if (isBase64(data[i])) {
  +                groomedData[bytesCopied++] = data[i];
  +            }
  +        }
   
  -             System.arraycopy(groomedData, 0, packedData, 0, bytesCopied);
  +        byte packedData[] = new byte[bytesCopied];
  +
  +        System.arraycopy(groomedData, 0, packedData, 0, bytesCopied);
   
  -             return packedData;
  -     }
  +        return packedData;
  +    }
   
   
       // Implementation of the Encoder Interface
   
       /**
  -     * encode an Object
  +     * Encodes an Object using the base64 algorithm.  This method
  +     * is provided in order to satisfy the requirements of the
  +     * Encoder interface, and will throw an EncoderException if the
  +     * supplied object is not of type byte[].
  +     *
  +     * @param pObject Object to encode
  +     * @return An object (of type byte[]) containing the 
  +     *         base64 encoded data which corresponds to the byte[] supplied.
  +     * @throws EncoderException if the parameter supplied is not
  +     *                          of type byte[]
        */
       public Object encode(Object pObject) throws EncoderException {
   
  @@ -498,8 +580,7 @@
                   "Parameter supplied to "
                       + "Base64 "
                       + "encode is not a byte[]");
  -        } 
  -        else {
  +        } else {
               result = encode((byte[]) pObject);
           }
   
  @@ -507,6 +588,15 @@
   
       }
   
  +    /**
  +     * Encodes a byte[] containing binary data, into a byte[] containing
  +     * characters in the Base64 alphabet.
  +     *
  +     * @param pArray a byte array containing binary data
  +     * @return A byte array containing only Base64 character data
  +     * @throws EncoderException if there is an Encoder specific exception
  +     *                          during the encoding process
  +     */
       public byte[] encode(byte[] pArray) throws EncoderException {
           return (encodeBase64(pArray, false));
       }
  
  
  
  1.2       +20 -11    
jakarta-commons/codec/src/java/org/apache/commons/codec/binary/Hex.java
  
  Index: Hex.java
  ===================================================================
  RCS file: 
/home/cvs/jakarta-commons/codec/src/java/org/apache/commons/codec/binary/Hex.java,v
  retrieving revision 1.1
  retrieving revision 1.2
  diff -u -r1.1 -r1.2
  --- Hex.java  25 Apr 2003 17:50:56 -0000      1.1
  +++ Hex.java  29 May 2003 23:03:28 -0000      1.2
  @@ -72,11 +72,14 @@
   
   
       /**
  -       Converts an array of bytes into an array of characters representing the
  -       hexidecimal values of each byte in order. The returned array will be
  -       double the length of the passed array, as it takes two characters to
  -       represent any given byte.
  -    */
  +     * Converts an array of bytes into an array of characters representing the
  +     * hexidecimal values of each byte in order. The returned array will be
  +     * double the length of the passed array, as it takes two characters to
  +     * represent any given byte.
  +     *
  +     * @param data array of byte to convert to Hex characters
  +     * @return A char[] containing hexidecimal characters
  +     */
       public static char[] encodeHex(byte[] data) {
   
           int l = data.length;
  @@ -95,12 +98,18 @@
   
   
       /**
  -       Converts an array of characters representing hexidecimal values into an
  -       array of bytes of those same values. The returned array will be half the
  -       length of the passed array, as it takes two characters to represent any
  -       given byte. An exception is thrown if the passed char array has an odd
  -       number of elements.
  -    */
  +     * Converts an array of characters representing hexidecimal values into an
  +     * array of bytes of those same values. The returned array will be half the
  +     * length of the passed array, as it takes two characters to represent any
  +     * given byte. An exception is thrown if the passed char array has an odd
  +     * number of elements.
  +     * 
  +     * @param data An array of characters containing hexidecimal digits
  +     * @return A byte array array containing binary data decoded from
  +     *         the supplied char array.
  +     * @throws Exception Thrown if an odd number of characters is supplied
  +     *                   to this function
  +     */
       public static byte[] decodeHex(char[] data) throws Exception {
   
           int l = data.length;
  
  
  
  1.2       +218 -111  
jakarta-commons/codec/src/java/org/apache/commons/codec/language/Metaphone.java
  
  Index: Metaphone.java
  ===================================================================
  RCS file: 
/home/cvs/jakarta-commons/codec/src/java/org/apache/commons/codec/language/Metaphone.java,v
  retrieving revision 1.1
  retrieving revision 1.2
  diff -u -r1.1 -r1.2
  --- Metaphone.java    25 Apr 2003 17:50:56 -0000      1.1
  +++ Metaphone.java    29 May 2003 23:03:29 -0000      1.2
  @@ -71,12 +71,29 @@
    */
   public class Metaphone implements StringEncoder {
   
  +    /**
  +     * Five values in the English language 
  +     */
       private String vowels = "AEIOU" ;
  +
  +    /**
  +     * Variable used in Metaphone algorithm
  +     */
       private String frontv = "EIY"   ;
  +
  +    /**
  +     * Variable used in Metaphone algorithm
  +     */
       private String varson = "CSPTG" ;
   
  +    /**
  +     * The max code length for metaphone is 4
  +     */
       private int maxCodeLen = 4 ;
   
  +    /**
  +     * Creates an instance of the Metaphone encoder
  +     */
       public Metaphone() {
           super();
       }
  @@ -87,14 +104,21 @@
        * All input is converted to upper case.
        * Limitations: Input format is expected to be a single ASCII word
        * with only characters in the A - Z range, no punctuation or numbers.
  +     *
  +     * @param txt String to find the metaphone code for
  +     * @return A metaphone code corresponding to the String supplied
        */
  -    public String metaphone(String txt){
  +    public String metaphone(String txt) {
           int mtsz = 0  ;
           boolean hard = false ;
  -        if ((txt == null) ||
  -            (txt.length() == 0)) return "" ;
  +        if ((txt == null) 
  +            || (txt.length() == 0)) {
  +            return "" ;
  +        }
           // single character is itself
  -        if (txt.length() == 1) return txt.toUpperCase() ;
  +        if (txt.length() == 1) {
  +            return txt.toUpperCase() ;
  +        }
         
           char[] inwd = txt.toUpperCase().toCharArray() ;
         
  @@ -102,167 +126,231 @@
           StringBuffer local = new StringBuffer(40); // manipulate
           StringBuffer code = new StringBuffer(10) ; //   output
           // handle initial 2 characters exceptions
  -        switch(inwd[0]){
  -        case 'K': case 'G' : case 'P' : /* looking for KN, etc*/
  -            if (inwd[1] == 'N')local.append(inwd, 1, inwd.length - 1);
  -            else local.append(inwd);
  +        switch(inwd[0]) {
  +        case 'K' : 
  +        case 'G' : 
  +        case 'P' : /* looking for KN, etc*/
  +            if (inwd[1] == 'N') {
  +                local.append(inwd, 1, inwd.length - 1);
  +            } else {
  +                local.append(inwd);
  +            }
               break;
           case 'A': /* looking for AE */
  -            if (inwd[1] == 'E')local.append(inwd, 1, inwd.length - 1);
  -            else local.append(inwd);
  +            if (inwd[1] == 'E') {
  +                local.append(inwd, 1, inwd.length - 1);
  +            } else {
  +                local.append(inwd);
  +            }
               break;
           case 'W' : /* looking for WR or WH */
  -            if (inwd[1] == 'R'){   // WR -> R
  -                local.append(inwd, 1, inwd.length - 1); break ;
  +            if (inwd[1] == 'R') {   // WR -> R
  +                local.append(inwd, 1, inwd.length - 1); 
  +                break ;
               }
  -            if (inwd[1] == 'H'){
  +            if (inwd[1] == 'H') {
                   local.append(inwd, 1, inwd.length - 1);
  -                local.setCharAt(0,'W'); // WH -> W
  +                local.setCharAt(0, 'W'); // WH -> W
  +            } else {
  +                local.append(inwd);
               }
  -            else local.append(inwd);
               break;
           case 'X' : /* initial X becomes S */
  -            inwd[0] = 'S' ;local.append(inwd);
  +            inwd[0] = 'S';
  +            local.append(inwd);
               break ;
           default :
               local.append(inwd);
           } // now local has working string with initials fixed
  +
           int wdsz = local.length();
           int n = 0 ;
  -        while((mtsz < maxCodeLen) && // max code size of 4 works well
  -              (n < wdsz)){
  +
  +        while ((mtsz < maxCodeLen) // max code size of 4 works well
  +               && (n < wdsz)) {
               char symb = local.charAt(n) ;
               // remove duplicate letters except C
  -            if ((symb != 'C') &&
  -                (n > 0) && (local.charAt(n - 1) == symb)) n++ ;
  -            else{ // not dup
  -                switch(symb){
  +            if ((symb != 'C') 
  +                && (n > 0) && (local.charAt(n - 1) == symb)) {
  +                n++ ;
  +            } else { // not dup
  +                switch(symb) {
                   case 'A' : case 'E' : case 'I' : case 'O' : case 'U' :
  -                    if (n == 0) { code.append(symb);mtsz++;
  +                    if (n == 0) { 
  +                        code.append(symb);
  +                        mtsz++;
                       }
                       break ; // only use vowel if leading char
                   case 'B' :
  -                    if ((n > 0) &&
  -                        !(n + 1 == wdsz) && // not MB at end of word
  -                        (local.charAt(n - 1) == 'M')) {
  +                    if ((n > 0) 
  +                        && !(n + 1 == wdsz) // not MB at end of word
  +                        && (local.charAt(n - 1) == 'M')) {
  +                        code.append(symb);
  +                    } else {
                           code.append(symb);
                       }
  -                    else code.append(symb);
  -                    mtsz++ ;
  -                    break ;
  +                    mtsz++;
  +                    break;
                   case 'C' : // lots of C special cases
                       /* discard if SCI, SCE or SCY */
  -                    if ((n > 0) &&
  -                        (local.charAt(n-1) == 'S') &&
  -                        (n + 1 < wdsz) &&
  -                        (frontv.indexOf(local.charAt(n + 1)) >= 0)){ break ;}
  +                    if ((n > 0) 
  +                        && (local.charAt(n - 1) == 'S') 
  +                        && (n + 1 < wdsz) 
  +                        && (frontv.indexOf(local.charAt(n + 1)) >= 0)) { 
  +                        break ;
  +                    }
                       tmpS = local.toString();
                       if (tmpS.indexOf("CIA", n) == n) { // "CIA" -> X
                           code.append('X'); mtsz++; break ;
                       }
  -                    if ((n + 1 < wdsz) &&
  -                        (frontv.indexOf(local.charAt(n+1))>= 0)){
  -                        code.append('S');mtsz++; break ; // CI,CE,CY -> S
  -                    }
  -                    if ((n > 0) &&
  -                        (tmpS.indexOf("SCH",n-1)== n-1)){ // SCH->sk
  -                        code.append('K') ; mtsz++;break ;
  -                    }
  -                    if (tmpS.indexOf("CH", n) == n){ // detect CH
  -                        if ((n == 0) &&
  -                            (wdsz >= 3) &&    // CH consonant -> K consonant
  -                            (vowels.indexOf(local.charAt(2)) < 0)){
  +                    if ((n + 1 < wdsz) 
  +                        && (frontv.indexOf(local.charAt(n + 1)) >= 0)) {
  +                        code.append('S');
  +                        mtsz++; 
  +                        break ; // CI,CE,CY -> S
  +                    }
  +                    if ((n > 0) 
  +                        && (tmpS.indexOf("SCH", n - 1) == n - 1)) { // SCH->sk
  +                        code.append('K') ; 
  +                        mtsz++;
  +                        break ;
  +                    }
  +                    if (tmpS.indexOf("CH", n) == n) { // detect CH
  +                        if ((n == 0) 
  +                            && (wdsz >= 3)     // CH consonant -> K consonant
  +                            && (vowels.indexOf(local.charAt(2)) < 0)) {
                               code.append('K');
  -                        }
  -                        else { code.append('X'); // CHvowel -> X
  +                        } else { 
  +                            code.append('X'); // CHvowel -> X
                           }
                           mtsz++;
  -                    }
  -                    else { code.append('K');mtsz++;
  +                    } else { 
  +                        code.append('K');
  +                        mtsz++;
                       }
                       break ;
                   case 'D' :
  -                    if ((n + 2 < wdsz)&&  // DGE DGI DGY -> J
  -                        (local.charAt(n+1) == 'G')&&
  -                        (frontv.indexOf(local.charAt(n+2))>= 0)){
  +                    if ((n + 2 < wdsz)   // DGE DGI DGY -> J
  +                        && (local.charAt(n + 1) == 'G')
  +                        && (frontv.indexOf(local.charAt(n + 2)) >= 0)) {
                           code.append('J'); n += 2 ;
  -                    }
  -                    else { code.append('T');
  +                    } else { 
  +                        code.append('T');
                       }
                       mtsz++;
                       break ;
                   case 'G' : // GH silent at end or before consonant
  -                    if ((n + 2 == wdsz)&&
  -                        (local.charAt(n+1) == 'H')) break ;
  -                    if ((n + 2 < wdsz) &&
  -                        (local.charAt(n+1) == 'H')&&
  -                        (vowels.indexOf(local.charAt(n+2)) < 0)) break ;
  +                    if ((n + 2 == wdsz)
  +                        && (local.charAt(n + 1) == 'H')) {
  +                        break;
  +                    }
  +                    if ((n + 2 < wdsz) 
  +                        && (local.charAt(n + 1) == 'H')
  +                        && (vowels.indexOf(local.charAt(n + 2)) < 0)) {
  +                        break;
  +                    }
                       tmpS = local.toString();
  -                    if ((n > 0) &&
  -                        (tmpS.indexOf("GN", n) == n)||
  -                        (tmpS.indexOf("GNED",n) == n)) break ; // silent G
  -                    if ((n > 0) &&
  -                        (local.charAt(n-1) == 'G')) hard = true ;
  -                    else hard = false ;
  -                    if ((n+1 < wdsz) &&
  -                        (frontv.indexOf(local.charAt(n+1)) >= 0)&&
  -                        (!hard)) code.append('J');
  -                    else code.append('K');
  +                    if ((n > 0) 
  +                        && (tmpS.indexOf("GN", n) == n)
  +                        || (tmpS.indexOf("GNED", n) == n)) {
  +                        break; // silent G
  +                    }
  +                    if ((n > 0) 
  +                        && (local.charAt(n - 1) == 'G')) {
  +                        hard = true ;
  +                    } else {
  +                        hard = false ;
  +                    }
  +                    if ((n + 1 < wdsz) 
  +                        && (frontv.indexOf(local.charAt(n + 1)) >= 0)
  +                        && (!hard)) {
  +                        code.append('J');
  +                    } else {
  +                        code.append('K');
  +                    }
                       mtsz++;
                       break ;
                   case 'H':
  -                    if (n + 1 == wdsz) break ; // terminal H
  -                    if ((n > 0) &&
  -                        (varson.indexOf(local.charAt(n-1)) >= 0)) break ;
  -                    if (vowels.indexOf(local.charAt(n+1)) >=0){
  -                        code.append('H') ; mtsz++;// Hvowel
  +                    if (n + 1 == wdsz) {
  +                        break ; // terminal H
                       }
  +                    if ((n > 0) 
  +                        && (varson.indexOf(local.charAt(n - 1)) >= 0)) {
  +                        break;
  +                    }
  +                    if (vowels.indexOf(local.charAt(n + 1)) >= 0) {
  +                        code.append('H'); 
  +                        mtsz++;// Hvowel
  +                    }
  +                    break;
  +                case 'F': 
  +                case 'J' : 
  +                case 'L' :
  +                case 'M': 
  +                case 'N' : 
  +                case 'R' :
  +                    code.append(symb); 
  +                    mtsz++; 
                       break;
  -                case 'F': case 'J' : case 'L' :
  -                case 'M': case 'N' : case 'R' :
  -                    code.append(symb); mtsz++; break ;
                   case 'K' :
  -                    if (n > 0){ // not initial
  -                        if (local.charAt(n -1) != 'C') {
  +                    if (n > 0) { // not initial
  +                        if (local.charAt(n - 1) != 'C') {
                               code.append(symb);
                           }
  +                    } else {
  +                        code.append(symb); // initial K
                       }
  -                    else   code.append(symb); // initial K
                       mtsz++ ;
                       break ;
                   case 'P' :
  -                    if ((n + 1 < wdsz) &&  // PH -> F
  -                        (local.charAt(n+1) == 'H'))code.append('F');
  -                    else code.append(symb);
  +                    if ((n + 1 < wdsz) 
  +                        && (local.charAt(n + 1) == 'H')) {
  +                        // PH -> F
  +                        code.append('F');
  +                    } else {
  +                        code.append(symb);
  +                    }
                       mtsz++;
                       break ;
                   case 'Q' :
  -                    code.append('K');mtsz++; break ;
  +                    code.append('K');
  +                    mtsz++; 
  +                    break;
                   case 'S' :
                       tmpS = local.toString();
  -                    if ((tmpS.indexOf("SH", n)== n) ||
  -                        (tmpS.indexOf("SIO",n)== n) ||
  -                        (tmpS.indexOf("SIA",n)== n)) code.append('X');
  -                    else code.append('S');
  -                    mtsz++ ;
  -                    break ;
  +                    if ((tmpS.indexOf("SH", n) == n) 
  +                        || (tmpS.indexOf("SIO", n) == n) 
  +                        || (tmpS.indexOf("SIA", n) == n)) {
  +                        code.append('X');
  +                    } else {
  +                        code.append('S');
  +                    }
  +                    mtsz++;
  +                    break;
                   case 'T' :
                       tmpS = local.toString(); // TIA TIO -> X
  -                    if ((tmpS.indexOf("TIA",n)== n)||
  -                        (tmpS.indexOf("TIO",n)== n)){
  -                        code.append('X'); mtsz++; break;
  +                    if ((tmpS.indexOf("TIA", n) == n) 
  +                        || (tmpS.indexOf("TIO", n) == n)) {
  +                        code.append('X'); 
  +                        mtsz++; 
  +                        break;
  +                    }
  +                    if (tmpS.indexOf("TCH", n) == n) {
  +                        break;
                       }
  -                    if (tmpS.indexOf("TCH",n)==n) break;
                       // substitute numeral 0 for TH (resembles theta after all)
  -                    if (tmpS.indexOf("TH", n)==n) code.append('0');
  -                    else code.append('T');
  +                    if (tmpS.indexOf("TH", n) == n) {
  +                        code.append('0');
  +                    } else {
  +                        code.append('T');
  +                    }
                       mtsz++ ;
                       break ;
                   case 'V' :
                       code.append('F'); mtsz++;break ;
                   case 'W' : case 'Y' : // silent if not followed by vowel
  -                    if ((n+1 < wdsz) &&
  -                        (vowels.indexOf(local.charAt(n + 1)) >= 0)) {
  +                    if ((n + 1 < wdsz) 
  +                     && (vowels.indexOf(local.charAt(n + 1)) >= 0)) {
                           code.append(symb);
                           mtsz++;
                       }
  @@ -278,29 +366,52 @@
               if (mtsz > 4) { code.setLength(4); }
           }
           return code.toString();
  -    } // end static method metaPhone()
  +    } 
  +    
       
  +    /**
  +     * Encodes an Object using the metaphone algorithm.  This method
  +     * is provided in order to satisfy the requirements of the
  +     * Encoder interface, and will throw an EncoderException if the
  +     * supplied object is not of type java.lang.String.
  +     *
  +     * @param pObject Object to encode
  +     * @return An object (or type java.lang.String) containing the 
  +     *         metaphone code which corresponds to the String supplied.
  +     * @throws EncoderException if the parameter supplied is not
  +     *                          of type java.lang.String
  +     */
       public Object encode(Object pObject) throws EncoderException {
           Object result;
  -
           if (!(pObject instanceof java.lang.String)) {
               throw new EncoderException("Parameter supplied to Metaphone " 
                                          + "encode is not of type " 
                                          + "java.lang.String"); 
  -     } 
  -        else {
  +        } else {
               result = metaphone((String) pObject);
           }
  -
           return result;
       }
   
  +    /**
  +     * Encodes a String using the Metaphone algorithm. 
  +     *
  +     * @param pString String object to encode
  +     * @return The metaphone code corresponding to the String supplied
  +     * @throws EncoderException thrown if a Metaphone specific exception
  +     *                          is encountered.
  +     */
       public String encode(String pString) throws EncoderException {
           return (metaphone(pString));   
       }
   
       /**
  -     * Are the metaphones of two strings the same.
  +     * Tests is the metaphones of two strings are identical.
  +     *
  +     * @param str1 First of two strings to compare
  +     * @param str2 Second of two strings to compare
  +     * @return true if the metaphones of these strings are identical, 
  +     *         false otherwise.
        */
       public boolean isMetaphoneEqual(String str1, String str2) {
           return metaphone(str1).equals(metaphone(str2));
  @@ -310,16 +421,12 @@
        * Returns the maxCodeLen.
        * @return int
        */
  -    public int getMaxCodeLen() {
  -        return maxCodeLen;
  -    }
  +    public int getMaxCodeLen() { return maxCodeLen; }
   
       /**
        * Sets the maxCodeLen.
        * @param maxCodeLen The maxCodeLen to set
        */
  -    public void setMaxCodeLen(int maxCodeLen) {
  -        this.maxCodeLen = maxCodeLen;
  -    }
  +    public void setMaxCodeLen(int maxCodeLen) { this.maxCodeLen = maxCodeLen; }
   
   }
  
  
  
  1.5       +63 -16    
jakarta-commons/codec/src/java/org/apache/commons/codec/language/RefinedSoundex.java
  
  Index: RefinedSoundex.java
  ===================================================================
  RCS file: 
/home/cvs/jakarta-commons/codec/src/java/org/apache/commons/codec/language/RefinedSoundex.java,v
  retrieving revision 1.4
  retrieving revision 1.5
  diff -u -r1.4 -r1.5
  --- RefinedSoundex.java       29 May 2003 21:14:59 -0000      1.4
  +++ RefinedSoundex.java       29 May 2003 23:03:29 -0000      1.5
  @@ -68,25 +68,55 @@
    */
   public class RefinedSoundex implements StringEncoder {
   
  +    /**
  +     * RefinedSoundex is *refined* for a number of
  +     * reasons one being that the mappings have been
  +     * altered.  This implementation contains default
  +     * mappings for US English.
  +     */
       public static final char[] US_ENGLISH_MAPPING =
           "01360240043788015936020505".toCharArray();
   
  +    /**
  +     * This static variable contains an instance of the
  +     * RefinedSoundex using the US_ENGLISH mapping.
  +     */
       public static final RefinedSoundex US_ENGLISH = new RefinedSoundex();
       
  +    /**
  +     * Every letter of the alphabet is "mapped" to a numerical 
  +     * value.  This char array holds the values to which each
  +     * letter is mapped.  This implementation contains a default
  +     * map for US_ENGLISH
  +     */
       private char[] soundexMapping;
   
  +    /**
  +     * Creates an instance of the RefinedSoundex object using the
  +     * default US English mapping.
  +     */
       public RefinedSoundex() {
           this(US_ENGLISH_MAPPING);
       }
   
  +    /**
  +     * Creates a refined soundex instance using a custom mapping.  This
  +     * constructor can be used to customize the mapping, and/or possibly
  +     * provide an internationalized mapping for a non-Western character
  +     * set.
  +     *
  +     * @param mapping Mapping array to use when finding the corresponding
  +     *                code for a given character
  +     */
       public RefinedSoundex(char[] mapping) {
           this.soundexMapping = mapping;
       }
   
       /**
  -     * Get the SoundEx value of a string.
  -     * This implementation is taken from the code-snippers on 
  -     * http://www.sourceforge.net/
  +     * Retreives the Refined Soundex code for a given String object.
  +     *
  +     * @param str String to encode using the Refined Soundex algorithm
  +     * @return A soundex code for the String supplied
        */
       public String soundex(String str) {
           if (null == str || str.length() == 0) { return str; }
  @@ -104,8 +134,7 @@
               current = getMappingCode(str.charAt(i));
               if (current == last) {
                   continue;
  -            } 
  -            else if (current != 0) {
  +            } else if (current != 0) {
                   sBuf.append(current);   
               }
               
  @@ -116,37 +145,55 @@
           return sBuf.toString();
       }
   
  +    /**
  +     * Encodes a String using the refined soundex algorithm. 
  +     *
  +     * @param pString A String object to encode
  +     * @return A Soundex code corresponding to the String supplied
  +     * @throws EncoderException throws exception if there is an
  +     *                          encoding-specific problem
  +     */
       public String encode(String pString) throws EncoderException {
           return (soundex(pString));   
       }
   
  +    /**
  +     * Encodes an Object using the refined soundex algorithm.  This method
  +     * is provided in order to satisfy the requirements of the
  +     * Encoder interface, and will throw an EncoderException if the
  +     * supplied object is not of type java.lang.String.
  +     *
  +     * @param pObject Object to encode
  +     * @return An object (or type java.lang.String) containing the 
  +     *         refined soundex code which corresponds to the String supplied.
  +     * @throws EncoderException if the parameter supplied is not
  +     *                          of type java.lang.String
  +     */
       public Object encode(Object pObject) throws EncoderException {
  -
           Object result;
  -
           if (!(pObject instanceof java.lang.String)) {
               throw new EncoderException("Parameter supplied to " 
                                          + "RefinedSoundex " 
                                          + "encode is not of type " 
                                          + "java.lang.String"); 
  -        } 
  -        else {
  +        } else {
               result = soundex((String) pObject);
           }
  -
           return result;
  -
       }
   
  -
       /**
  -     * Used internally by the SoundEx algorithm.
  +     * Returns the mapping code for a given character.  The mapping
  +     * codes are maintained in an internal char array named soundexMapping,
  +     * and the default values of these mappings are US English.
  +     *
  +     * @param c char to get mapping for
  +     * @return A character (really a numeral) to return for the given char
        */
       private char getMappingCode(char c) {
           if (!Character.isLetter(c)) {
               return 0;
  -        } 
  -        else {
  +        } else {
               return soundexMapping[Character.toUpperCase(c) - 'A'];
           }
       }
  
  
  
  1.4       +62 -15    
jakarta-commons/codec/src/java/org/apache/commons/codec/language/Soundex.java
  
  Index: Soundex.java
  ===================================================================
  RCS file: 
/home/cvs/jakarta-commons/codec/src/java/org/apache/commons/codec/language/Soundex.java,v
  retrieving revision 1.3
  retrieving revision 1.4
  diff -u -r1.3 -r1.4
  --- Soundex.java      12 May 2003 17:17:24 -0000      1.3
  +++ Soundex.java      29 May 2003 23:03:29 -0000      1.4
  @@ -61,39 +61,66 @@
    * relate similar names, but can also be used as a general purpose
    * scheme to find word with similar phonemes. 
    * 
  - * <!-- This link is broken: -->
  - * <!-- @see <a 
href="http://www.bluepoof.com/Soundex/info2.html";>http://www.bluepoof.com/Soundex/info2.html</a>
 -->
  - * 
    * @author [EMAIL PROTECTED]
    * @author [EMAIL PROTECTED]
    * @author [EMAIL PROTECTED]
    * @version $Revision$ $Date$
  - *
  - * @todo Internationalize Exception Messages
    */
   public class Soundex implements StringEncoder {
   
  +    /**
  +     * This is a default mapping of the 26 letters used
  +     * in US english.
  +     */
       public static final char[] US_ENGLISH_MAPPING =
           "01230120022455012623010202".toCharArray();
   
  +    /**
  +     * This static variable contains an instance of the
  +     * Soundex using the US_ENGLISH mapping.
  +     */
       public static final Soundex US_ENGLISH = new Soundex();
       
  +    /**
  +     * Every letter of the alphabet is "mapped" to a numerical 
  +     * value.  This char array holds the values to which each
  +     * letter is mapped.  This implementation contains a default
  +     * map for US_ENGLISH
  +     */
       private char[] soundexMapping;
  -    private int maxLength = 4;
   
  +    /**
  +     * The maximum length of a Soundex code - Soundex codes are
  +     * only four characters by definition.
  +     */
  +    private int maxLength = 4;
   
  +    /**
  +     * Creates an instance of the Soundex object using the default
  +     * US_ENGLISH mapping.
  +     */
       public Soundex() {
           this(US_ENGLISH_MAPPING);
       }
   
  +    /**
  +     * Creates a soundex instance using a custom mapping.  This
  +     * constructor can be used to customize the mapping, and/or possibly
  +     * provide an internationalized mapping for a non-Western character
  +     * set.
  +     *
  +     * @param mapping Mapping array to use when finding the corresponding
  +     *                code for a given character
  +     */
       public Soundex(char[] mapping) {
           this.soundexMapping = mapping;
       }
   
       /**
  -     * Get the SoundEx value of a string.
  -     * This implementation is taken from the code-snippers on 
  -     * http://www.sourceforge.net/
  +     * Retreives the Soundex code for a given String object.
  +     *
  +     * @param str String to encode using the Soundex algorithm
  +     * @return A soundex code for the String supplied
        */
       public String soundex(String str) {
           if (null == str || str.length() == 0) { return str; }
  @@ -114,6 +141,18 @@
           return new String(out);
       }
   
  +    /**
  +     * Encodes an Object using the soundex algorithm.  This method
  +     * is provided in order to satisfy the requirements of the
  +     * Encoder interface, and will throw an EncoderException if the
  +     * supplied object is not of type java.lang.String.
  +     *
  +     * @param pObject Object to encode
  +     * @return An object (or type java.lang.String) containing the 
  +     *         soundex code which corresponds to the String supplied.
  +     * @throws EncoderException if the parameter supplied is not
  +     *                          of type java.lang.String
  +     */
       public Object encode(Object pObject) throws EncoderException {
   
           Object result;
  @@ -123,8 +162,7 @@
                                          + "Soundex " 
                                          + "encode is not of type " 
                                          + "java.lang.String"); 
  -        } 
  -        else {
  +        } else {
               result = soundex((String) pObject);
           }
   
  @@ -132,19 +170,28 @@
   
       }
   
  -
  +    /**
  +     * Encodes a String using the soundex algorithm. 
  +     *
  +     * @param pString A String object to encode
  +     * @return A Soundex code corresponding to the String supplied
  +     * @throws EncoderException throws exception if there is an
  +     *                          encoding-specific problem
  +     */
       public String encode(String pString) throws EncoderException {
           return (soundex(pString));   
       }
   
       /**
        * Used internally by the SoundEx algorithm.
  +     *
  +     * @param c character to use to retrieve mapping code
  +     * @return Mapping code for a particular character
        */
       private char getMappingCode(char c) {
           if (!Character.isLetter(c)) {
               return 0;
  -        } 
  -        else {
  +        } else {
               return soundexMapping[Character.toUpperCase(c) - 'A'];
           }
       }


---------------------------------------------------------------------
To unsubscribe, e-mail: [EMAIL PROTECTED]
For additional commands, e-mail: [EMAIL PROTECTED]

cvs commit: jakarta-commons/codec/src/java/org/apache/commons/codec/language Metaphone.java RefinedSoundex.java Soundex.java

Reply via email to