mrglavas    2004/09/30 20:06:14

  Modified:    java/src/org/apache/xerces/xinclude XIncludeHandler.java
               java/src/org/apache/xerces/impl/msg
                        XIncludeMessages.properties
  Log:
  Updates for changes in PR draft:

  http://www.w3.org/TR/2004/PR-xinclude-20040930/

  

  Report a fatal error if the value of the href attribute [1]

  contains a fragment identifier or after escaping has

  been applied results in a syntactically invalid URI or IRI.

  

  Apply the escaping rules described in section 4.1.1 [2]

  when processing href attributes.

  

  [1] http://www.w3.org/TR/2004/PR-xinclude-20040930/#include_element

  [2] http://www.w3.org/TR/2004/PR-xinclude-20040930/#IRIs
  
  Revision  Changes    Path
  1.38      +159 -1    
xml-xerces/java/src/org/apache/xerces/xinclude/XIncludeHandler.java
  
  Index: XIncludeHandler.java
  ===================================================================
  RCS file: 
/home/cvs/xml-xerces/java/src/org/apache/xerces/xinclude/XIncludeHandler.java,v
  retrieving revision 1.37
  retrieving revision 1.38
  diff -u -r1.37 -r1.38
  --- XIncludeHandler.java      30 Sep 2004 04:26:30 -0000      1.37
  +++ XIncludeHandler.java      1 Oct 2004 03:06:13 -0000       1.38
  @@ -1354,6 +1354,38 @@
               parse = XINCLUDE_PARSE_XML;
           }
           
  +        URI hrefURI = null;
  +        
  +        // Check whether href is correct and perform escaping as per section 4.1.1 
of the XInclude spec.
  +        // Report fatal error if the href value contains a fragment identifier or 
if the value after
  +        // escaping is a syntactically invalid URI or IRI.
  +        if (href != null) {
  +            try {
  +                hrefURI = new URI(href, true);
  +                if (hrefURI.getFragment() != null) {
  +                    reportFatalError("HrefFragmentIdentifierIllegal", new Object[] 
{href});
  +                }
  +            }
  +            catch (URI.MalformedURIException exc) {
  +                String newHref = escapeHref(href);
  +                if (href != newHref) {
  +                    href = newHref;
  +                    try {
  +                        hrefURI = new URI(href, true);
  +                        if (hrefURI.getFragment() != null) {
  +                            reportFatalError("HrefFragmentIdentifierIllegal", new 
Object[] {href});
  +                        }
  +                    }
  +                    catch (URI.MalformedURIException exc2) {
  +                        reportFatalError("HrefSyntacticallyInvalid", new Object[] 
{href});
  +                    }
  +                }
  +                else {
  +                    reportFatalError("HrefSyntacticallyInvalid", new Object[] 
{href});
  +                }
  +            }
  +        }
  +        
           // Verify that if an accept and/or an accept-language attribute exist
           // that the value(s) don't contain disallowed characters.
           if (accept != null && !isValidInHTTPHeader(accept)) {
  @@ -2583,5 +2615,131 @@
               httpSource.setHTTPRequestProperty(XIncludeHandler.HTTP_ACCEPT_LANGUAGE, 
acceptLanguage);
           }
           return httpSource;
  +    }
  +    
  +    // which ASCII characters need to be escaped
  +    private static boolean gNeedEscaping[] = new boolean[128];
  +    // the first hex character if a character needs to be escaped
  +    private static char gAfterEscaping1[] = new char[128];
  +    // the second hex character if a character needs to be escaped
  +    private static char gAfterEscaping2[] = new char[128];
  +    private static char[] gHexChs = {'0', '1', '2', '3', '4', '5', '6', '7',
  +                                     '8', '9', 'A', 'B', 'C', 'D', 'E', 'F'};
  +    // initialize the above 3 arrays
  +    static {
  +        char[] escChs = {' ', '<', '>', '"', '{', '}', '|', '\\', '^', '`'};
  +        int len = escChs.length;
  +        char ch;
  +        for (int i = 0; i < len; i++) {
  +            ch = escChs[i];
  +            gNeedEscaping[ch] = true;
  +            gAfterEscaping1[ch] = gHexChs[ch >> 4];
  +            gAfterEscaping2[ch] = gHexChs[ch & 0xf];
  +        }
  +    }
  +    
  +    //
  +    // Escape an href value according to (4.1.1):
  +    //
  +    // To convert the value of the href attribute to an IRI reference, the 
following characters must be escaped:
  +    // space #x20
  +    // the delimiters < #x3C, > #x3E and " #x22
  +    // the unwise characters { #x7B, } #x7D, | #x7C, \ #x5C, ^ #x5E and ` #x60
  +    //
  +    // To convert an IRI reference to a URI reference, the following characters 
must also be escaped:
  +    // the Unicode plane 0 characters #xA0 - #xD7FF, #xF900-#xFDCF, #xFDF0-#xFFEF
  +    // the Unicode plane 1-14 characters #x10000-#x1FFFD ... #xE0000-#xEFFFD
  +    //
  +    private String escapeHref(String href) {
  +        int len = href.length();
  +        int ch;
  +        StringBuffer buffer = new StringBuffer(len*3);
  +
  +        // for each character in the href
  +        int i = 0;
  +        for (; i < len; i++) {
  +            ch = href.charAt(i);
  +            // if it's not an ASCII character (excluding 0x7F), break here, and use 
UTF-8 encoding
  +            if (ch > 0x7E) {
  +                break;
  +            }
  +            // abort: href does not allow this character
  +            if (ch < 0x20) {
  +                return href;
  +            }
  +            if (gNeedEscaping[ch]) {
  +                buffer.append('%');
  +                buffer.append(gAfterEscaping1[ch]);
  +                buffer.append(gAfterEscaping2[ch]);
  +            }
  +            else {
  +                buffer.append((char)ch);
  +            }
  +        }
  +
  +        // we saw some non-ascii character
  +        if (i < len) {
  +            // check if remainder of href contains any illegal characters before 
proceeding
  +            for (int j = i; j < len; ++j) {
  +                ch = href.charAt(j);
  +                if ((ch >= 0x20 && ch <= 0x7E) || 
  +                    (ch >= 0xA0 && ch <= 0xD7FF) ||
  +                    (ch >= 0xF900 && ch <= 0xFDCF) ||
  +                    (ch >= 0xFDF0 && ch <= 0xFFEF)) {
  +                    continue;
  +                }
  +                if (XMLChar.isHighSurrogate(ch) && ++j < len) {
  +                    int ch2 = href.charAt(j);
  +                    if (XMLChar.isLowSurrogate(ch2)) {
  +                        ch2 = XMLChar.supplemental((char)ch, (char)ch2);
  +                        if (ch2 < 0xF0000 && (ch2 & 0xFFFF) <= 0xFFFD) {
  +                            continue;
  +                        }
  +                    }
  +                }
  +                // abort: href does not allow this character
  +                return href;
  +            }
  +            
  +            // get UTF-8 bytes for the remaining sub-string
  +            byte[] bytes = null;
  +            byte b;
  +            try {
  +                bytes = href.substring(i).getBytes("UTF-8");
  +            } catch (java.io.UnsupportedEncodingException e) {
  +                // should never happen
  +                return href;
  +            }
  +            len = bytes.length;
  +
  +            // for each byte
  +            for (i = 0; i < len; i++) {
  +                b = bytes[i];
  +                // for non-ascii character: make it positive, then escape
  +                if (b < 0) {
  +                    ch = b + 256;
  +                    buffer.append('%');
  +                    buffer.append(gHexChs[ch >> 4]);
  +                    buffer.append(gHexChs[ch & 0xf]);
  +                }
  +                else if (gNeedEscaping[b]) {
  +                    buffer.append('%');
  +                    buffer.append(gAfterEscaping1[b]);
  +                    buffer.append(gAfterEscaping2[b]);
  +                }
  +                else {
  +                    buffer.append((char)b);
  +                }
  +            }
  +        }
  +
  +        // If escaping happened, create a new string;
  +        // otherwise, return the orginal one.
  +        if (buffer.length() != len) {
  +            return buffer.toString();
  +        }
  +        else {
  +            return href;
  +        }
       }
   }
  
  
  
  1.11      +2 -0      
xml-xerces/java/src/org/apache/xerces/impl/msg/XIncludeMessages.properties
  
  Index: XIncludeMessages.properties
  ===================================================================
  RCS file: 
/home/cvs/xml-xerces/java/src/org/apache/xerces/impl/msg/XIncludeMessages.properties,v
  retrieving revision 1.10
  retrieving revision 1.11
  diff -u -r1.10 -r1.11
  --- XIncludeMessages.properties       16 Aug 2004 01:05:01 -0000      1.10
  +++ XIncludeMessages.properties       1 Oct 2004 03:06:13 -0000       1.11
  @@ -23,6 +23,8 @@
   MultipleRootElements = A well-formed document must not contain multiple root 
elements.
   ContentIllegalAtTopLevel = The replacement of an 'include' element appearing as the 
document element in the top-level source infoset cannot contain characters.
   UnexpandedEntityReferenceIllegal = The replacement of an 'include' element 
appearing as the document element in the top-level source infoset cannot contain 
unexpanded entity references.
  +HrefFragmentIdentifierIllegal = Fragment identifiers must not be used. The ''href'' 
attribute value ''{0}'' is not permitted.
  +HrefSyntacticallyInvalid = ''href'' attribute value ''{0}'' is syntactically 
invalid. After applying escaping rules the value is neither a syntactically correct 
URI or IRI.
   
   # Messages from erroneous set-up
   IncompatibleNamespaceContext = The type of the NamespaceContext is incompatible 
with using XInclude; it must be an instance of XIncludeNamespaceSupport
  
  
  

---------------------------------------------------------------------
To unsubscribe, e-mail: [EMAIL PROTECTED]
For additional commands, e-mail: [EMAIL PROTECTED]

Reply via email to