Sorry what is this for? What has it to do with Uris? I know in German
characters with umlauts are sometimes replaced with two character, but is
this like this in every language?

Thanks for explaining.

Reto

On Thu, Jun 10, 2010 at 11:24 AM, <[email protected]> wrote:

> Author: mir
> Date: Thu Jun 10 09:24:31 2010
> New Revision: 953259
>
> URL: http://svn.apache.org/viewvc?rev=953259&view=rev
> Log:
> added removeAccents()-method to UriUtil (implemented by Andre)
>
> Modified:
>
>  
> incubator/clerezza/trunk/org.apache.clerezza.parent/org.apache.clerezza.utils/src/main/java/org/apache/clerezza/utils/UriUtil.java
>
> Modified:
> incubator/clerezza/trunk/org.apache.clerezza.parent/org.apache.clerezza.utils/src/main/java/org/apache/clerezza/utils/UriUtil.java
> URL:
> http://svn.apache.org/viewvc/incubator/clerezza/trunk/org.apache.clerezza.parent/org.apache.clerezza.utils/src/main/java/org/apache/clerezza/utils/UriUtil.java?rev=953259&r1=953258&r2=953259&view=diff
>
> ==============================================================================
> ---
> incubator/clerezza/trunk/org.apache.clerezza.parent/org.apache.clerezza.utils/src/main/java/org/apache/clerezza/utils/UriUtil.java
> (original)
> +++
> incubator/clerezza/trunk/org.apache.clerezza.parent/org.apache.clerezza.utils/src/main/java/org/apache/clerezza/utils/UriUtil.java
> Thu Jun 10 09:24:31 2010
> @@ -687,5 +687,153 @@ public class UriUtil {
>             return result.toString();
>         }
>     }
> +
> +    /**
> +     * Removes the accents from the given string and replaces them by
> 2-charaters equivalents.
> +     * <p>
> +     * ex.
> +     * 'ä' gets 'ae'
> +     *
> +     * @param s the s
> +     * @return the string
> +     */
> +    public static String removeAccents(String s) {
> +        if ( s == null ) return s;
> +        StringBuffer chars = new StringBuffer();
>
> +        // Loop over the characters, replace those that need to be.
> +        for (int i = 0; i < s.length(); i++) {
> +            switch (s.charAt(i)) {
> +                       case '\u00C0':
> +                case '\u00C1':
> +                case '\u00C2':
> +                case '\u00C3':
> +                case '\u00C5':
> +                    chars.append("A");
> +                    break;
> +                case '\u00C4':
> +                    chars.append("AE");
> +                    break;
> +                case '\u00C6':
> +                    chars.append("AE");
> +                    break;
> +                case '\u00C7':
> +                    chars.append("C");
> +                    break;
> +                case '\u00C8':
> +                case '\u00C9':
> +                case '\u00CA':
> +                case '\u00CB':
> +                    chars.append("E");
> +                    break;
> +                case '\u00CC':
> +                case '\u00CD':
> +                case '\u00CE':
> +                case '\u00CF':
> +                    chars.append("I");
> +                    break;
> +                case '\u00D0':
> +                    chars.append("D");
> +                    break;
> +                case '\u00D1':
> +                    chars.append("N");
> +                    break;
> +                case '\u00D2':
> +                case '\u00D3':
> +                case '\u00D4':
> +                case '\u00D5':
> +                case '\u00D8':
> +                    chars.append("O");
> +                    break;
> +                case '\u00D6':
> +                    chars.append("OE");
> +                    break;
> +                case '\u0152':
> +                    chars.append("OE");
> +                    break;
> +                case '\u00DE':
> +                    chars.append("TH");
> +                    break;
> +                case '\u00D9':
> +                case '\u00DA':
> +                case '\u00DB':
> +                    chars.append("U");
> +                    break;
> +                case '\u00DC':
> +                    chars.append("UE");
> +                    break;
> +                case '\u00DD':
> +                case '\u0178':
> +                    chars.append("Y");
> +                    break;
> +                case '\u00E0':
> +                case '\u00E1':
> +                case '\u00E2':
> +                case '\u00E3':
> +                case '\u00E4':
> +                case '\u00E5':
> +                    chars.append("a");
> +                    break;
> +                case '\u00E6':
> +                    chars.append("ae");
> +                    break;
> +                case '\u00E7':
> +                    chars.append("c");
> +                    break;
> +                case '\u00E8':
> +                case '\u00E9':
> +                case '\u00EA':
> +                case '\u00EB':
> +                    chars.append("e");
> +                    break;
> +                case '\u00EC':
> +                case '\u00ED':
> +                case '\u00EE':
> +                case '\u00EF':
> +                    chars.append("i");
> +                    break;
> +                case '\u00F0':
> +                    chars.append("d");
> +                    break;
> +                case '\u00F1':
> +                    chars.append("n");
> +                    break;
> +                case '\u00F2':
> +                case '\u00F3':
> +                case '\u00F4':
> +                case '\u00F5':
> +                case '\u00F8':
> +                    chars.append("o");
> +                    break;
> +                case '\u00F6':
> +                    chars.append("oe");
> +                       break;
> +                case '\u0153':
> +                    chars.append("oe");
> +                    break;
> +                case '\u00DF':
> +                    chars.append("ss");
> +                    break;
> +                case '\u00FE':
> +                    chars.append("th");
> +                    break;
> +                case '\u00F9':
> +                   case '\u00FA':
> +                   case '\u00FB':
> +                    chars.append("u");
> +                    break;
> +                   case '\u00FC':
> +                    chars.append("ue");
> +                    break;
> +                case '\u00FD':
> +                case '\u00FF':
> +                    chars.append("y");
> +                    break;
> +                default:
> +                    chars.append(s.charAt(i));
> +                    break;
> +            }
> +        }
> +        return chars.toString();
> +    }
>  }
> \ No newline at end of file
>
>
>

Reply via email to