bayard      02/02/21 21:13:18

  Modified:    util/src/java/org/apache/commons/util StringUtils.java
  Log:
  Removed metaphone, isMetaphoneEquals and soundex methods from StringUtils.
  These methods were there due to that decision having been made in PHP.
  They will now live in the codec package.
  
  Revision  Changes    Path
  1.26      +1 -262    
jakarta-commons-sandbox/util/src/java/org/apache/commons/util/StringUtils.java
  
  Index: StringUtils.java
  ===================================================================
  RCS file: 
/home/cvs/jakarta-commons-sandbox/util/src/java/org/apache/commons/util/StringUtils.java,v
  retrieving revision 1.25
  retrieving revision 1.26
  diff -u -r1.25 -r1.26
  --- StringUtils.java  14 Feb 2002 05:03:31 -0000      1.25
  +++ StringUtils.java  22 Feb 2002 05:13:18 -0000      1.26
  @@ -85,7 +85,7 @@
    * @author <a href="mailto:[EMAIL PROTECTED]";>Greg Coladonato</a>
    * @author <a href="mailto:[EMAIL PROTECTED]";>Bayard</a>
    * @author <a href="mailto:[EMAIL PROTECTED]";>Ed Korthof</a>
  - * @version $Id: StringUtils.java,v 1.25 2002/02/14 05:03:31 bayard Exp $
  + * @version $Id: StringUtils.java,v 1.26 2002/02/22 05:13:18 bayard Exp $
    */
   public class StringUtils
   {
  @@ -1234,38 +1234,6 @@
           return d[n][m];
       }
   
  -
  -    /**
  -     * Get the SoundEx value of a string.
  -     * This implementation is taken from the code-snippers on 
  -     * http://www.sourceforge.net/
  -     */
  -    static public String soundex(String str) {
  -        return Soundex.US_ENGLISH.soundex(str);
  -    }
  -
  -    // contact [EMAIL PROTECTED]
  -    /**
  -     * Find the metaphone value of a String. This is similar to the 
  -     * soundex algorithm, but better at finding similar sounding words.
  -     */
  -    static public String metaphone(String str) {
  -        return Metaphone.metaPhone(str);
  -    }
  -    /**
  -     * Are the metaphones of two strings the same.
  -     */
  -    static public boolean isMetaphoneEqual(String str1, String str2) {
  -        return metaphone(str1).equals(metaphone(str2));
  -    }
  -
  -    /*
  -    // cciteseer.nj.nec.com/oliver93decision.html
  -    static public boolean isSimilarText(String st1, String str2) {
  -        return false;
  -    }
  -    */
  -
       /**
        * Quote a string so that it may be used in a regular expression 
        * without any parts of the string being considered as a 
  @@ -2040,232 +2008,3 @@
   
   
   
  -// Permission given by wbrogden for code to be used anywhere.
  -//package com.bga.wbrogden.metaphone;
  -
  -/*                                                     Metaphone.java
  - * A class to generate phonetic code and keep lists of objects
  - *  retrievable by a phonetic code.
  - * reference: Computer Language of Dec. 1990, p 39
  - *  "Hanging on the Metaphone" by Lawrence Philips
  - *
  - * This Java implementation, Copyright 1997, William B. Brogden
  - * is hereby released for all uses. I would appreciate hearing about it
  - * if you find a good use for the class.  December, 1997
  - *
  - * @author [EMAIL PROTECTED]    CompuServe  75415,610
  - */
  -
  -/*
  - * List functionality removed: 2001-06-21 [EMAIL PROTECTED]
  - */
  -
  -/* 
  - * Notes:
  - * The static method metaPhone converts an input String into a code.
  - *   All input is converted to upper case.
  - *   Limitations: Input format is expected to be a single ASCII word
  - *   with only characters in the A - Z range, no punctuation or numbers.
  - *
  - */
  -
  -class Metaphone {
  -
  -    static String vowels = "AEIOU" ;
  -    static String frontv = "EIY"   ;
  -    static String varson = "CSPTG" ;
  -
  -    static final int maxCodeLen = 4 ;
  -
  -    static public String metaPhone( String txt ){
  -      int mtsz = 0  ;
  -      boolean hard = false ;
  -      if(( txt == null ) ||
  -         ( txt.length() == 0 )) return "" ;
  -      // single character is itself
  -      if( txt.length() == 1 ) return txt.toUpperCase() ;
  -      
  -      char[] inwd = txt.toUpperCase().toCharArray() ;
  -      
  -      String tmpS ;
  -      StringBuffer local = new StringBuffer( 40 ); // manipulate
  -      StringBuffer code = new StringBuffer( 10 ) ; //   output
  -      // handle initial 2 characters exceptions
  -      switch( inwd[0] ){
  -        case 'K': case 'G' : case 'P' : /* looking for KN, etc*/
  -          if( inwd[1] == 'N')local.append(inwd, 1, inwd.length - 1 );
  -          else local.append( inwd );
  -          break;
  -        case 'A': /* looking for AE */
  -          if( inwd[1] == 'E' )local.append(inwd, 1, inwd.length - 1 );
  -          else local.append( inwd );
  -          break;
  -        case 'W' : /* looking for WR or WH */
  -          if( inwd[1] == 'R' ){   // WR -> R
  -            local.append(inwd, 1, inwd.length - 1 ); break ;
  -          }
  -          if( inwd[1] == 'H'){
  -            local.append(inwd, 1, inwd.length - 1 );
  -            local.setCharAt( 0,'W'); // WH -> W
  -          }
  -          else local.append( inwd );
  -          break;
  -        case 'X' : /* initial X becomes S */
  -          inwd[0] = 'S' ;local.append( inwd );
  -          break ;
  -        default :
  -          local.append( inwd );
  -      } // now local has working string with initials fixed
  -      int wdsz = local.length();
  -      int n = 0 ;
  -      while((mtsz < maxCodeLen ) && // max code size of 4 works well
  -            (n < wdsz ) ){
  -        char symb = local.charAt(n) ;
  -        // remove duplicate letters except C
  -        if(( symb != 'C' ) &&
  -           (n > 0 ) && ( local.charAt(n - 1 ) == symb )) n++ ;
  -        else{ // not dup
  -          switch( symb ){
  -            case 'A' : case 'E' : case 'I' : case 'O' : case 'U' :
  -              if( n == 0 ) { code.append(symb );mtsz++;
  -              }
  -              break ; // only use vowel if leading char
  -            case 'B' :
  -              if( (n > 0 ) &&
  -                  !(n + 1 == wdsz ) && // not MB at end of word
  -                  ( local.charAt(n - 1) == 'M')) {
  -                    code.append(symb);
  -                  }
  -              else code.append(symb);
  -              mtsz++ ;
  -              break ;
  -            case 'C' : // lots of C special cases
  -              /* discard if SCI, SCE or SCY */
  -              if( ( n > 0 ) &&
  -                  ( local.charAt(n-1) == 'S' ) &&
  -                  ( n + 1 < wdsz ) &&
  -                  ( frontv.indexOf( local.charAt(n + 1)) >= 0 )){ break ;}
  -              tmpS = local.toString();
  -              if( tmpS.indexOf("CIA", n ) == n ) { // "CIA" -> X
  -                 code.append('X' ); mtsz++; break ;
  -              }
  -              if( ( n + 1 < wdsz ) &&
  -                  (frontv.indexOf( local.charAt(n+1) )>= 0 )){
  -                 code.append('S');mtsz++; break ; // CI,CE,CY -> S
  -              }
  -              if(( n > 0) &&
  -                 ( tmpS.indexOf("SCH",n-1 )== n-1 )){ // SCH->sk
  -                 code.append('K') ; mtsz++;break ;
  -              }
  -              if( tmpS.indexOf("CH", n ) == n ){ // detect CH
  -                if((n == 0 ) &&
  -                   (wdsz >= 3 ) &&    // CH consonant -> K consonant
  -                   (vowels.indexOf( local.charAt( 2) ) < 0 )){
  -                     code.append('K');
  -                }
  -                else { code.append('X'); // CHvowel -> X
  -                }
  -                mtsz++;
  -              }
  -              else { code.append('K' );mtsz++;
  -              }
  -              break ;
  -            case 'D' :
  -              if(( n + 2 < wdsz )&&  // DGE DGI DGY -> J
  -                 ( local.charAt(n+1) == 'G' )&&
  -                 (frontv.indexOf( local.charAt(n+2) )>= 0)){
  -                    code.append('J' ); n += 2 ;
  -              }
  -              else { code.append( 'T' );
  -              }
  -              mtsz++;
  -              break ;
  -            case 'G' : // GH silent at end or before consonant
  -              if(( n + 2 == wdsz )&&
  -                 (local.charAt(n+1) == 'H' )) break ;
  -              if(( n + 2 < wdsz ) &&
  -                 (local.charAt(n+1) == 'H' )&&
  -                 (vowels.indexOf( local.charAt(n+2)) < 0 )) break ;
  -              tmpS = local.toString();
  -              if((n > 0) &&
  -                 ( tmpS.indexOf("GN", n ) == n)||
  -                 ( tmpS.indexOf("GNED",n) == n )) break ; // silent G
  -              if(( n > 0 ) &&
  -                 (local.charAt(n-1) == 'G')) hard = true ;
  -              else hard = false ;
  -              if((n+1 < wdsz) &&
  -                 (frontv.indexOf( local.charAt(n+1) ) >= 0 )&&
  -                 (!hard) ) code.append( 'J' );
  -              else code.append('K');
  -              mtsz++;
  -              break ;
  -            case 'H':
  -              if( n + 1 == wdsz ) break ; // terminal H
  -              if((n > 0) &&
  -                 (varson.indexOf( local.charAt(n-1)) >= 0)) break ;
  -              if( vowels.indexOf( local.charAt(n+1)) >=0 ){
  -                  code.append('H') ; mtsz++;// Hvowel
  -              }
  -              break;
  -            case 'F': case 'J' : case 'L' :
  -            case 'M': case 'N' : case 'R' :
  -              code.append( symb ); mtsz++; break ;
  -            case 'K' :
  -              if( n > 0 ){ // not initial
  -                if( local.charAt( n -1) != 'C' ) {
  -                     code.append(symb );
  -                }
  -              }
  -              else   code.append( symb ); // initial K
  -              mtsz++ ;
  -              break ;
  -            case 'P' :
  -              if((n + 1 < wdsz) &&  // PH -> F
  -                 (local.charAt( n+1) == 'H'))code.append('F');
  -              else code.append( symb );
  -              mtsz++;
  -              break ;
  -            case 'Q' :
  -              code.append('K' );mtsz++; break ;
  -            case 'S' :
  -              tmpS = local.toString();
  -              if((tmpS.indexOf("SH", n )== n) ||
  -                 (tmpS.indexOf("SIO",n )== n) ||
  -                 (tmpS.indexOf("SIA",n )== n)) code.append('X');
  -              else code.append( 'S' );
  -              mtsz++ ;
  -              break ;
  -            case 'T' :
  -              tmpS = local.toString(); // TIA TIO -> X
  -              if((tmpS.indexOf("TIA",n )== n)||
  -                 (tmpS.indexOf("TIO",n )== n) ){
  -                    code.append('X'); mtsz++; break;
  -              }
  -              if( tmpS.indexOf("TCH",n )==n) break;
  -              // substitute numeral 0 for TH (resembles theta after all)
  -              if( tmpS.indexOf("TH", n )==n) code.append('0');
  -              else code.append( 'T' );
  -              mtsz++ ;
  -              break ;
  -            case 'V' :
  -              code.append('F'); mtsz++;break ;
  -            case 'W' : case 'Y' : // silent if not followed by vowel
  -              if((n+1 < wdsz) &&
  -                 (vowels.indexOf( local.charAt(n+1))>=0)){
  -                    code.append( symb );mtsz++;
  -              }
  -              break ;
  -            case 'X' :
  -              code.append('K'); code.append('S');mtsz += 2;
  -              break ;
  -            case 'Z' :
  -              code.append('S'); mtsz++; break ;
  -          } // end switch
  -          n++ ;
  -        } // end else from symb != 'C'
  -        if( mtsz > 4 )code.setLength( 4);
  -      }
  -      return code.toString();
  -    } // end static method metaPhone()
  -
  -}
  
  
  

--
To unsubscribe, e-mail:   <mailto:[EMAIL PROTECTED]>
For additional commands, e-mail: <mailto:[EMAIL PROTECTED]>

Reply via email to