mrglavas 2004/09/30 20:06:14 Modified: java/src/org/apache/xerces/xinclude XIncludeHandler.java java/src/org/apache/xerces/impl/msg XIncludeMessages.properties Log: Updates for changes in PR draft:
http://www.w3.org/TR/2004/PR-xinclude-20040930/ Report a fatal error if the value of the href attribute [1] contains a fragment identifier or after escaping has been applied results in a syntactically invalid URI or IRI. Apply the escaping rules described in section 4.1.1 [2] when processing href attributes. [1] http://www.w3.org/TR/2004/PR-xinclude-20040930/#include_element [2] http://www.w3.org/TR/2004/PR-xinclude-20040930/#IRIs Revision Changes Path 1.38 +159 -1 xml-xerces/java/src/org/apache/xerces/xinclude/XIncludeHandler.java Index: XIncludeHandler.java =================================================================== RCS file: /home/cvs/xml-xerces/java/src/org/apache/xerces/xinclude/XIncludeHandler.java,v retrieving revision 1.37 retrieving revision 1.38 diff -u -r1.37 -r1.38 --- XIncludeHandler.java 30 Sep 2004 04:26:30 -0000 1.37 +++ XIncludeHandler.java 1 Oct 2004 03:06:13 -0000 1.38 @@ -1354,6 +1354,38 @@ parse = XINCLUDE_PARSE_XML; } + URI hrefURI = null; + + // Check whether href is correct and perform escaping as per section 4.1.1 of the XInclude spec. + // Report fatal error if the href value contains a fragment identifier or if the value after + // escaping is a syntactically invalid URI or IRI. + if (href != null) { + try { + hrefURI = new URI(href, true); + if (hrefURI.getFragment() != null) { + reportFatalError("HrefFragmentIdentifierIllegal", new Object[] {href}); + } + } + catch (URI.MalformedURIException exc) { + String newHref = escapeHref(href); + if (href != newHref) { + href = newHref; + try { + hrefURI = new URI(href, true); + if (hrefURI.getFragment() != null) { + reportFatalError("HrefFragmentIdentifierIllegal", new Object[] {href}); + } + } + catch (URI.MalformedURIException exc2) { + reportFatalError("HrefSyntacticallyInvalid", new Object[] {href}); + } + } + else { + reportFatalError("HrefSyntacticallyInvalid", new Object[] {href}); + } + } + } + // Verify that if an accept and/or an accept-language attribute exist // that the value(s) don't contain disallowed characters. if (accept != null && !isValidInHTTPHeader(accept)) { @@ -2583,5 +2615,131 @@ httpSource.setHTTPRequestProperty(XIncludeHandler.HTTP_ACCEPT_LANGUAGE, acceptLanguage); } return httpSource; + } + + // which ASCII characters need to be escaped + private static boolean gNeedEscaping[] = new boolean[128]; + // the first hex character if a character needs to be escaped + private static char gAfterEscaping1[] = new char[128]; + // the second hex character if a character needs to be escaped + private static char gAfterEscaping2[] = new char[128]; + private static char[] gHexChs = {'0', '1', '2', '3', '4', '5', '6', '7', + '8', '9', 'A', 'B', 'C', 'D', 'E', 'F'}; + // initialize the above 3 arrays + static { + char[] escChs = {' ', '<', '>', '"', '{', '}', '|', '\\', '^', '`'}; + int len = escChs.length; + char ch; + for (int i = 0; i < len; i++) { + ch = escChs[i]; + gNeedEscaping[ch] = true; + gAfterEscaping1[ch] = gHexChs[ch >> 4]; + gAfterEscaping2[ch] = gHexChs[ch & 0xf]; + } + } + + // + // Escape an href value according to (4.1.1): + // + // To convert the value of the href attribute to an IRI reference, the following characters must be escaped: + // space #x20 + // the delimiters < #x3C, > #x3E and " #x22 + // the unwise characters { #x7B, } #x7D, | #x7C, \ #x5C, ^ #x5E and ` #x60 + // + // To convert an IRI reference to a URI reference, the following characters must also be escaped: + // the Unicode plane 0 characters #xA0 - #xD7FF, #xF900-#xFDCF, #xFDF0-#xFFEF + // the Unicode plane 1-14 characters #x10000-#x1FFFD ... #xE0000-#xEFFFD + // + private String escapeHref(String href) { + int len = href.length(); + int ch; + StringBuffer buffer = new StringBuffer(len*3); + + // for each character in the href + int i = 0; + for (; i < len; i++) { + ch = href.charAt(i); + // if it's not an ASCII character (excluding 0x7F), break here, and use UTF-8 encoding + if (ch > 0x7E) { + break; + } + // abort: href does not allow this character + if (ch < 0x20) { + return href; + } + if (gNeedEscaping[ch]) { + buffer.append('%'); + buffer.append(gAfterEscaping1[ch]); + buffer.append(gAfterEscaping2[ch]); + } + else { + buffer.append((char)ch); + } + } + + // we saw some non-ascii character + if (i < len) { + // check if remainder of href contains any illegal characters before proceeding + for (int j = i; j < len; ++j) { + ch = href.charAt(j); + if ((ch >= 0x20 && ch <= 0x7E) || + (ch >= 0xA0 && ch <= 0xD7FF) || + (ch >= 0xF900 && ch <= 0xFDCF) || + (ch >= 0xFDF0 && ch <= 0xFFEF)) { + continue; + } + if (XMLChar.isHighSurrogate(ch) && ++j < len) { + int ch2 = href.charAt(j); + if (XMLChar.isLowSurrogate(ch2)) { + ch2 = XMLChar.supplemental((char)ch, (char)ch2); + if (ch2 < 0xF0000 && (ch2 & 0xFFFF) <= 0xFFFD) { + continue; + } + } + } + // abort: href does not allow this character + return href; + } + + // get UTF-8 bytes for the remaining sub-string + byte[] bytes = null; + byte b; + try { + bytes = href.substring(i).getBytes("UTF-8"); + } catch (java.io.UnsupportedEncodingException e) { + // should never happen + return href; + } + len = bytes.length; + + // for each byte + for (i = 0; i < len; i++) { + b = bytes[i]; + // for non-ascii character: make it positive, then escape + if (b < 0) { + ch = b + 256; + buffer.append('%'); + buffer.append(gHexChs[ch >> 4]); + buffer.append(gHexChs[ch & 0xf]); + } + else if (gNeedEscaping[b]) { + buffer.append('%'); + buffer.append(gAfterEscaping1[b]); + buffer.append(gAfterEscaping2[b]); + } + else { + buffer.append((char)b); + } + } + } + + // If escaping happened, create a new string; + // otherwise, return the orginal one. + if (buffer.length() != len) { + return buffer.toString(); + } + else { + return href; + } } } 1.11 +2 -0 xml-xerces/java/src/org/apache/xerces/impl/msg/XIncludeMessages.properties Index: XIncludeMessages.properties =================================================================== RCS file: /home/cvs/xml-xerces/java/src/org/apache/xerces/impl/msg/XIncludeMessages.properties,v retrieving revision 1.10 retrieving revision 1.11 diff -u -r1.10 -r1.11 --- XIncludeMessages.properties 16 Aug 2004 01:05:01 -0000 1.10 +++ XIncludeMessages.properties 1 Oct 2004 03:06:13 -0000 1.11 @@ -23,6 +23,8 @@ MultipleRootElements = A well-formed document must not contain multiple root elements. ContentIllegalAtTopLevel = The replacement of an 'include' element appearing as the document element in the top-level source infoset cannot contain characters. UnexpandedEntityReferenceIllegal = The replacement of an 'include' element appearing as the document element in the top-level source infoset cannot contain unexpanded entity references. +HrefFragmentIdentifierIllegal = Fragment identifiers must not be used. The ''href'' attribute value ''{0}'' is not permitted. +HrefSyntacticallyInvalid = ''href'' attribute value ''{0}'' is syntactically invalid. After applying escaping rules the value is neither a syntactically correct URI or IRI. # Messages from erroneous set-up IncompatibleNamespaceContext = The type of the NamespaceContext is incompatible with using XInclude; it must be an instance of XIncludeNamespaceSupport --------------------------------------------------------------------- To unsubscribe, e-mail: [EMAIL PROTECTED] For additional commands, e-mail: [EMAIL PROTECTED]