sandygao 2004/01/20 09:01:53
Modified: java/src/org/apache/xerces/impl/dv/xs AnyURIDV.java
Log:
Fixing a bug in the schema anyURI implementation.
Now we accept special characters and non-ascii characters in anyURI values.
This partially fixes bug [18803].
Revision Changes Path
1.5 +100 -2 xml-xerces/java/src/org/apache/xerces/impl/dv/xs/AnyURIDV.java
Index: AnyURIDV.java
===================================================================
RCS file: /home/cvs/xml-xerces/java/src/org/apache/xerces/impl/dv/xs/AnyURIDV.java,v
retrieving revision 1.4
retrieving revision 1.5
diff -u -r1.4 -r1.5
--- AnyURIDV.java 18 Nov 2002 23:10:10 -0000 1.4
+++ AnyURIDV.java 20 Jan 2004 17:01:53 -0000 1.5
@@ -2,7 +2,7 @@
* The Apache Software License, Version 1.1
*
*
- * Copyright (c) 2001, 2002 The Apache Software Foundation. All rights
+ * Copyright (c) 2001-2004 The Apache Software Foundation. All rights
* reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -91,6 +91,8 @@
// check 3.2.17.c0 must: URI (rfc 2396/2723)
try {
if( content.length() != 0 ) {
+ // encode special characters using XLink 5.4 algorithm
+ content = encode(content);
// Support for relative URLs
// According to Java 1.1: URLs may also be specified with a
// String and the URL object that it is related to.
@@ -102,6 +104,102 @@
// REVISIT: do we need to return the new URI object?
return content;
+ }
+
+ // which ASCII characters need to be escaped
+ private static boolean gNeedEscaping[] = new boolean[128];
+ // the first hex character if a character needs to be escaped
+ private static char gAfterEscaping1[] = new char[128];
+ // the second hex character if a character needs to be escaped
+ private static char gAfterEscaping2[] = new char[128];
+ private static char[] gHexChs = {'0', '1', '2', '3', '4', '5', '6', '7',
+ '8', '9', 'A', 'B', 'C', 'D', 'E', 'F'};
+ // initialize the above 3 arrays
+ static {
+ for (int i = 0; i <= 0x1f; i++) {
+ gNeedEscaping[i] = true;
+ gAfterEscaping1[i] = gHexChs[i >> 4];
+ gAfterEscaping2[i] = gHexChs[i & 0xf];
+ }
+ gNeedEscaping[0x7f] = true;
+ gAfterEscaping1[0x7f] = '7';
+ gAfterEscaping2[0x7f] = 'F';
+ char[] escChs = {' ', '<', '>', '"', '{', '}',
+ '|', '\\', '^', '~', '`'};
+ int len = escChs.length;
+ char ch;
+ for (int i = 0; i < len; i++) {
+ ch = escChs[i];
+ gNeedEscaping[ch] = true;
+ gAfterEscaping1[ch] = gHexChs[ch >> 4];
+ gAfterEscaping2[ch] = gHexChs[ch & 0xf];
+ }
+ }
+
+ // To encode special characters in anyURI, by using %HH to represent
+ // special ASCII characters: 0x00~0x1F, 0x7F, ' ', '<', '>', etc.
+ // and non-ASCII characters (whose value >= 128).
+ private static String encode(String anyURI){
+ int len = anyURI.length(), ch;
+ StringBuffer buffer = new StringBuffer(len*3);
+
+ // for each character in the anyURI
+ int i = 0;
+ for (; i < len; i++) {
+ ch = anyURI.charAt(i);
+ // if it's not an ASCII character, break here, and use UTF-8 encoding
+ if (ch >= 128)
+ break;
+ if (gNeedEscaping[ch]) {
+ buffer.append('%');
+ buffer.append(gAfterEscaping1[ch]);
+ buffer.append(gAfterEscaping2[ch]);
+ }
+ else {
+ buffer.append((char)ch);
+ }
+ }
+
+ // we saw some non-ascii character
+ if (i < len) {
+ // get UTF-8 bytes for the remaining sub-string
+ byte[] bytes = null;
+ byte b;
+ try {
+ bytes = anyURI.substring(i).getBytes("UTF-8");
+ } catch (java.io.UnsupportedEncodingException e) {
+ // should never happen
+ return anyURI;
+ }
+ len = bytes.length;
+
+ // for each byte
+ for (i = 0; i < len; i++) {
+ b = bytes[i];
+ // for non-ascii character: make it positive, then escape
+ if (b < 0) {
+ ch = b + 256;
+ buffer.append('%');
+ buffer.append(gHexChs[ch >> 4]);
+ buffer.append(gHexChs[ch & 0xf]);
+ }
+ else if (gNeedEscaping[b]) {
+ buffer.append('%');
+ buffer.append(gAfterEscaping1[b]);
+ buffer.append(gAfterEscaping2[b]);
+ }
+ else {
+ buffer.append((char)b);
+ }
+ }
+ }
+
+ // If encoding happened, create a new string;
+ // otherwise, return the orginal one.
+ if (buffer.length() != len)
+ return buffer.toString();
+ else
+ return anyURI;
}
} // class AnyURIDV
---------------------------------------------------------------------
To unsubscribe, e-mail: [EMAIL PROTECTED]
For additional commands, e-mail: [EMAIL PROTECTED]