remm        01/04/30 11:25:52

  Added:       httpclient/src/java/org/apache/commons/httpclient
                        URIUtil.java
  Log:
  - Add a prototype for a new helper class for URL escaping / unescaping.
    Warning : still needs a bit of work.
  
  Revision  Changes    Path
  1.1                  
jakarta-commons/httpclient/src/java/org/apache/commons/httpclient/URIUtil.java
  
  Index: URIUtil.java
  ===================================================================
  /*
   * $Header: 
/home/cvs/jakarta-commons/httpclient/src/java/org/apache/commons/httpclient/URIUtil.java,v
 1.1 2001/04/30 18:25:50 remm Exp $
   * $Revision: 1.1 $
   * $Date: 2001/04/30 18:25:50 $
   *
   * ====================================================================
   *
   * The Apache Software License, Version 1.1
   *
   * Copyright (c) 1999 The Apache Software Foundation.  All rights 
   * reserved.
   *
   * Redistribution and use in source and binary forms, with or without
   * modification, are permitted provided that the following conditions
   * are met:
   *
   * 1. Redistributions of source code must retain the above copyright
   *    notice, this list of conditions and the following disclaimer. 
   *
   * 2. Redistributions in binary form must reproduce the above copyright
   *    notice, this list of conditions and the following disclaimer in
   *    the documentation and/or other materials provided with the
   *    distribution.
   *
   * 3. The end-user documentation included with the redistribution, if
   *    any, must include the following acknowlegement:  
   *       "This product includes software developed by the 
   *        Apache Software Foundation (http://www.apache.org/)."
   *    Alternately, this acknowlegement may appear in the software itself,
   *    if and wherever such third-party acknowlegements normally appear.
   *
   * 4. The names "The Jakarta Project", "Tomcat", and "Apache Software
   *    Foundation" must not be used to endorse or promote products derived
   *    from this software without prior written permission. For written 
   *    permission, please contact [EMAIL PROTECTED]
   *
   * 5. Products derived from this software may not be called "Apache"
   *    nor may "Apache" appear in their names without prior written
   *    permission of the Apache Group.
   *
   * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
   * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
   * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
   * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
   * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
   * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
   * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
   * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
   * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
   * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
   * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   * SUCH DAMAGE.
   * ====================================================================
   *
   * This software consists of voluntary contributions made by many
   * individuals on behalf of the Apache Software Foundation.  For more
   * information on the Apache Software Foundation, please see
   * <http://www.apache.org/>.
   *
   * [Additional notices, if required by prior licensing conditions]
   *
   */ 
  
  package org.apache.commons.httpclient;
  
  import java.io.UnsupportedEncodingException;
  import java.io.ByteArrayOutputStream;
  import java.io.OutputStreamWriter;
  import java.io.IOException;
  import java.util.BitSet;
  
  
  /**
   * General purpose escaping and unescaping utility methods.
   * For "character encoding", The whole escaped characters must be done.
   * It's different between "character encoding" and "escaping of characters".
   *
   * NOTICE: In order to do URI escaping, using the reserved characters defined
   * in this class is not recommended for the the specific protocol.
   *
   * @author Craig R. McClanahan
   * @author Tim Tye
   * @author Remy Maucherat
   * @author Park, Sung-Gu
   * @version $Revision: 1.1 $ $Date: 2001/04/30 18:25:50 $
   * @see <a href=http://www.ietf.org/rfc/rfc2396.txt?number=2396>RFC 2396</a>
   */
  
  public class URIUtil {
  
      // -------------------------------------------------------------- Constants
  
      
      /**
       * Array containing the ASCII expression for hexadecimal.
       */
      private static final char[] hexadecimal =
      {'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 
       'A', 'B', 'C', 'D', 'E', 'F'};
  
  
      // ----------------------------------------------------- Instance Variables
  
  
      /**
       * Array containing the alphanum URI character set.
       */
      public static final BitSet alphanum = new BitSet(128);
  
  
      /**
       * Array containing the reserved URI character set of the scheme part.
       */
      public static final BitSet schemeReserved = new BitSet(128);
  
  
      /**
       * Array containing the reserved URI character set of the authority part.
       */
      public static final BitSet authorityReserved = new BitSet(128);
  
  
      /**
       * Array containing the reserved URI character set of the userinfo part.
       */
      public static final BitSet userinfoReserved = new BitSet(128);
  
  
      /**
       * Array containing the reserved URI character set of the host part.
       */
      public static final BitSet hostReserved = new BitSet(128);
  
  
      /**
       * Array containing the reserved URI character set of the path part.
       */
      public static final BitSet pathReserved = new BitSet(128);
  
  
      /**
       * Array containing the reserved URI character set of the query.
       */
      public static final BitSet queryReserved = new BitSet(128);
  
  
      // ----------------------------------------------------- Static Initializer
  
  
      static {
  
          // Save the alphanum URI characters that is common to do URI escaping.
          for (int i = 'a'; i <= 'z'; i++) {
              alphanum.set(i);
          }
          for (int i = 'A'; i <= 'Z'; i++) {
              alphanum.set(i);
          }
          for (int i = '0'; i <= '9'; i++) {
              alphanum.set(i);
          }
  
          // Save the reserved URI characters within the sheme component.
          /**
           * Actually, this should be any combination of lower case letters,
           * digits, plus ("+"), period ("."), or hyphen ("-").
           * The upper case letters should be treated as equivalent to lower
           * case in scheme names.
           */
          schemeReserved.set('+');
          schemeReserved.set('.');
          schemeReserved.set('-');
  
          // Save the reserved URI characters within the authority component.
          authorityReserved.set(';');
          authorityReserved.set(':');
          authorityReserved.set('@');
          authorityReserved.set('?');
          authorityReserved.set('/');
  
          // Save the reserved URI characters within the userinfo component.
          userinfoReserved.set(';');
          userinfoReserved.set(':');
          userinfoReserved.set('&');
          userinfoReserved.set('=');
          userinfoReserved.set('+');
          userinfoReserved.set('$');
          userinfoReserved.set(',');
  
          // Save the reserved URI characters within the host component.
          hostReserved.set('.');
          hostReserved.set('-');
  
          // Save the reserved URI characters within the path component.
          pathReserved.set('/');
          pathReserved.set(';');
          pathReserved.set('=');
          pathReserved.set('?');
  
          // Save the reserved URI characters within the query component.
          queryReserved.set(';');
          queryReserved.set('/');
          queryReserved.set('?');
          queryReserved.set(':');
          queryReserved.set('@');
          queryReserved.set('&');
          queryReserved.set('=');
          queryReserved.set('+');
          queryReserved.set(',');
          queryReserved.set('$');
  
      }
  
  
      // ------------------------------------------------------------ Properties
  
  
      /**
       * Get the reserved URI character set of alphanum.
       */
      public static BitSet alphanum() {
          return alphanum;
      }
      
  
      /**
       * Get the reserved URI character set of the scheme component.
       */
      public static BitSet schemeReserved() {
          return schemeReserved;
      }
  
  
      /**
       * Get the reserved URI character set of the authority component.
       */
      public static BitSet authorityReserved() {
          return authorityReserved;
      }
  
  
      /**
       * Get the reserved URI character set of the userinfo component.
       */
      public static BitSet userinfoReserved() {
          return userinfoReserved;
      }
  
  
      /**
       * Get the reserved URI character set of the host component.
       */
      public static BitSet hostReserved() {
          return hostReserved;
      }
  
  
      /**
       * Get the reserved URI character set of the path component.
       */
      public static BitSet pathReserved() {
          return pathReserved;
      }
  
  
      /**
       * Get the reserved URI character set of the query component.
       */
      public static BitSet queryReserved() {
          return queryReserved;
      }
  
  
      // -------------------------------------------------------- Private Methods
  
  
      /**
       * Convert a byte character value to hexidecimal digit value.
       *
       * @param b the character value byte
       */
      private static byte convertHexDigit(byte b) {
          if ((b >= '0') && (b <= '9')) return (byte)(b - '0');
          if ((b >= 'a') && (b <= 'f')) return (byte)(b - 'a' + 10);
          if ((b >= 'A') && (b <= 'F')) return (byte)(b - 'A' + 10);
          return 0;
      }
  
      
      // --------------------------------------------------------- Public Methods
      
      
      /**
       * Unescape the escaped URI string.
       *
       * @param str The escaped URI string.
       * @exception IllegalArgumentException if a '%' character is not followed
       * by a valid 2-digit hexadecimal number
       */
      public static String unescape(String str) {
          return (str == null) ? null : unescape(str.getBytes());
      }
      
  
      /**
       * Unescape the escaped URI string.
       *
       * @param bytes The escaped URI byte array.
       * @exception IllegalArgumentException if a '%' character is not followed
       * by a valid 2-digit hexadecimal number
       */
      public static String unescape(byte[] bytes) {
          return unescape(bytes, null);
      }
  
  
      /**
       * Unescape the escaped URI string.
       *
       * @param bytes The escaped URI byte array.
       * @exception IllegalArgumentException if a '%' character is not followed
       * by a valid 2-digit hexadecimal number
       */
      public static String unescape(byte[] bytes, int off, int len) {
          return unescape(bytes, off, len, null);
      }
  
  
      /**
       * Unescape the escaped URI string with character encoding.
       *
       * @param bytes The escaped URI byte array.
       * @param enc The encoding to use.
       *            If null or wrong, the default encoding is used.
       * @exception IllegalArgumentException if a '%' character is not followed
       * by a valid 2-digit hexadecimal number
       */
      public static String unescape(byte[] bytes, String enc) {
          if (bytes == null)
              return (null);
          return unescape(bytes, 0, bytes.length, enc);
      }
  
  
      /**
       * Unescape the escaped URI string with character encoding.
       *
       * @param bytes The escaped URI byte array.
       * @param enc The encoding to use.
       *            If null or wrong, the default encoding is used.
       * @exception IllegalArgumentException if a '%' character is not followed
       * by a valid 2-digit hexadecimal number
       */
      public static String unescape(byte[] bytes, int off, int len, String enc) {
          
          if (bytes == null)
              return (null);
          
          int end = off + len;
          int ix = off;
          int ox = off;
          while (ix < end) {
              byte b = bytes[ix++];     // Get byte to test
              if (b == '+') {
                  b = (byte) ' ';
              } else if (b == '%') {
                  b = (byte) ((convertHexDigit(bytes[ix++]) << 4)
                              + convertHexDigit(bytes[ix++]));
              }
              bytes[ox++] = b;
          }
          if (enc != null) {
              try {
                  return new String(bytes, off, ox, enc);
              } catch (UnsupportedEncodingException e) {
                  e.printStackTrace();
              }
          }
  
          return new String(bytes, off, ox);
  
      }
  
  
      /**
       * Escape the unescaped URI string.
       * 
       * @param str The unescaped URI string which has to be rewritten.
       */
      public static String escape(String str) {
          return escape(str, null);
      }
  
  
      /**
       * Escape the unescaped URI string.
       * 
       * @param str The unescaped URI string which has to be rewritten.
       * @param reserved The additional reserved URI character set.
       */
      public static String escape(String str, BitSet reserved) {
          return (str == null) ? null : escape(str.getBytes(), reserved);
      }
  
  
      /**
       * Escape the unescaped URI byte array.
       * 
       * @param bytes The unescaped URI byte array which has to be rewritten.
       * @param reserved The additional reserved URI character set.
       */
      public static String escape(byte[] bytes, BitSet reserved) {
          return (bytes == null) ? null 
              : escape(bytes, 0, bytes.length, reserved);
      }
  
  
      /**
       * Escape the unescaped URI byte array.
       * 
       * @param bytes The unescaped URI byte array which has to be rewritten.
       * @param reserved The additional reserved URI character set.
       */
      public static String escape(byte[] bytes, int off, 
                                  int len, BitSet reserved) {
          
          if (bytes == null)
              return (null);
          
          StringBuffer rewrittenStr = new StringBuffer(len);
  
          for (int i = off; i < len; i++) {
              char c = (char) bytes[i];
              if (alphanum.get(c)) {
                  rewrittenStr.append(c);
              } else if (reserved != null && reserved.get(c)) {
                  rewrittenStr.append(c);
              } else {
                  byte toEscape = bytes[i];
                  rewrittenStr.append('%');
                  int low = (int) (toEscape & 0x0f);
                  int high = (int) ((toEscape & 0xf0) >> 4);
                  rewrittenStr.append(hexadecimal[high]);
                  rewrittenStr.append(hexadecimal[low]);
              }
          }
          
          return rewrittenStr.toString();
      }
  
  
      /**
       * Escape the unescaped URI string with character encoding.
       *
       * @param str The string which has to be rewiten.
       * @param reserved The additional reserved URI character set.
       * @param enc The encoding to use.
       *            If wrong, the default encoding is used.
       */
      public static String escape(String str, BitSet reserved, String enc) {
          try {
              return escape(str.getBytes(enc), reserved);
          } catch (UnsupportedEncodingException e) {
              e.printStackTrace();
              return escape(str.getBytes(), reserved);
          }
      }
  
  }
  
  
  
  

Reply via email to