sandygao    2004/01/20 09:01:53

  Modified:    java/src/org/apache/xerces/impl/dv/xs AnyURIDV.java
  Log:
  Fixing a bug in the schema anyURI implementation.
  Now we accept special characters and non-ascii characters in anyURI values.
  This partially fixes bug [18803].
  
  Revision  Changes    Path
  1.5       +100 -2    xml-xerces/java/src/org/apache/xerces/impl/dv/xs/AnyURIDV.java
  
  Index: AnyURIDV.java
  ===================================================================
  RCS file: /home/cvs/xml-xerces/java/src/org/apache/xerces/impl/dv/xs/AnyURIDV.java,v
  retrieving revision 1.4
  retrieving revision 1.5
  diff -u -r1.4 -r1.5
  --- AnyURIDV.java     18 Nov 2002 23:10:10 -0000      1.4
  +++ AnyURIDV.java     20 Jan 2004 17:01:53 -0000      1.5
  @@ -2,7 +2,7 @@
    * The Apache Software License, Version 1.1
    *
    *
  - * Copyright (c) 2001, 2002 The Apache Software Foundation.  All rights
  + * Copyright (c) 2001-2004 The Apache Software Foundation.  All rights
    * reserved.
    *
    * Redistribution and use in source and binary forms, with or without
  @@ -91,6 +91,8 @@
           // check 3.2.17.c0 must: URI (rfc 2396/2723)
           try {
               if( content.length() != 0 ) {
  +                // encode special characters using XLink 5.4 algorithm
  +                content = encode(content);
                   // Support for relative URLs
                   // According to Java 1.1: URLs may also be specified with a
                   // String and the URL object that it is related to.
  @@ -102,6 +104,102 @@
   
           // REVISIT: do we need to return the new URI object?
           return content;
  +    }
  +
  +    // which ASCII characters need to be escaped
  +    private static boolean gNeedEscaping[] = new boolean[128];
  +    // the first hex character if a character needs to be escaped
  +    private static char gAfterEscaping1[] = new char[128];
  +    // the second hex character if a character needs to be escaped
  +    private static char gAfterEscaping2[] = new char[128];
  +    private static char[] gHexChs = {'0', '1', '2', '3', '4', '5', '6', '7',
  +                                     '8', '9', 'A', 'B', 'C', 'D', 'E', 'F'};
  +    // initialize the above 3 arrays
  +    static {
  +        for (int i = 0; i <= 0x1f; i++) {
  +            gNeedEscaping[i] = true;
  +            gAfterEscaping1[i] = gHexChs[i >> 4];
  +            gAfterEscaping2[i] = gHexChs[i & 0xf];
  +        }
  +        gNeedEscaping[0x7f] = true;
  +        gAfterEscaping1[0x7f] = '7';
  +        gAfterEscaping2[0x7f] = 'F';
  +        char[] escChs = {' ', '<', '>', '"', '{', '}',
  +                         '|', '\\', '^', '~', '`'};
  +        int len = escChs.length;
  +        char ch;
  +        for (int i = 0; i < len; i++) {
  +            ch = escChs[i];
  +            gNeedEscaping[ch] = true;
  +            gAfterEscaping1[ch] = gHexChs[ch >> 4];
  +            gAfterEscaping2[ch] = gHexChs[ch & 0xf];
  +        }
  +    }
  +
  +    // To encode special characters in anyURI, by using %HH to represent
  +    // special ASCII characters: 0x00~0x1F, 0x7F, ' ', '<', '>', etc.
  +    // and non-ASCII characters (whose value >= 128).
  +    private static String encode(String anyURI){
  +        int len = anyURI.length(), ch;
  +        StringBuffer buffer = new StringBuffer(len*3);
  +
  +        // for each character in the anyURI
  +        int i = 0;
  +        for (; i < len; i++) {
  +            ch = anyURI.charAt(i);
  +            // if it's not an ASCII character, break here, and use UTF-8 encoding
  +            if (ch >= 128)
  +                break;
  +            if (gNeedEscaping[ch]) {
  +                buffer.append('%');
  +                buffer.append(gAfterEscaping1[ch]);
  +                buffer.append(gAfterEscaping2[ch]);
  +            }
  +            else {
  +                buffer.append((char)ch);
  +            }
  +        }
  +
  +        // we saw some non-ascii character
  +        if (i < len) {
  +            // get UTF-8 bytes for the remaining sub-string
  +            byte[] bytes = null;
  +            byte b;
  +            try {
  +                bytes = anyURI.substring(i).getBytes("UTF-8");
  +            } catch (java.io.UnsupportedEncodingException e) {
  +                // should never happen
  +                return anyURI;
  +            }
  +            len = bytes.length;
  +
  +            // for each byte
  +            for (i = 0; i < len; i++) {
  +                b = bytes[i];
  +                // for non-ascii character: make it positive, then escape
  +                if (b < 0) {
  +                    ch = b + 256;
  +                    buffer.append('%');
  +                    buffer.append(gHexChs[ch >> 4]);
  +                    buffer.append(gHexChs[ch & 0xf]);
  +                }
  +                else if (gNeedEscaping[b]) {
  +                    buffer.append('%');
  +                    buffer.append(gAfterEscaping1[b]);
  +                    buffer.append(gAfterEscaping2[b]);
  +                }
  +                else {
  +                    buffer.append((char)b);
  +                }
  +            }
  +        }
  +
  +        // If encoding happened, create a new string;
  +        // otherwise, return the orginal one.
  +        if (buffer.length() != len)
  +            return buffer.toString();
  +        else
  +            return anyURI;
       }
   
   } // class AnyURIDV
  
  
  

---------------------------------------------------------------------
To unsubscribe, e-mail: [EMAIL PROTECTED]
For additional commands, e-mail: [EMAIL PROTECTED]

Reply via email to