neilg 2003/01/13 14:21:48
Modified: java/src/org/apache/xerces/impl XMLEntityManager.java
Log:
fixing bug# 15574. Thanks to Michael Glavassevich for both pointing out these bugs
and providing an excellent fix for them!
Revision Changes Path
1.56 +70 -1 xml-xerces/java/src/org/apache/xerces/impl/XMLEntityManager.java
Index: XMLEntityManager.java
===================================================================
RCS file:
/home/cvs/xml-xerces/java/src/org/apache/xerces/impl/XMLEntityManager.java,v
retrieving revision 1.55
retrieving revision 1.56
diff -u -r1.55 -r1.56
--- XMLEntityManager.java 8 Jan 2003 23:04:10 -0000 1.55
+++ XMLEntityManager.java 13 Jan 2003 22:21:47 -0000 1.56
@@ -973,6 +973,75 @@
// use specified encoding
else {
+ encoding = encoding.toUpperCase(Locale.ENGLISH);
+
+ // If encoding is UTF-8, consume BOM if one is present.
+ if (encoding.equals("UTF-8")) {
+ final int[] b3 = new int[3];
+ int count = 0;
+ for (; count < 3; ++count) {
+ b3[count] = stream.read();
+ if (b3[count] == -1)
+ break;
+ }
+ if (count == 3) {
+ if (b3[0] != 0xEF || b3[1] != 0xBB || b3[2] != 0xBF) {
+ // First three bytes are not BOM, so reset.
+ stream.reset();
+ }
+ }
+ else {
+ stream.reset();
+ }
+ }
+ // If encoding is UCS-4, we still need to read the first four bytes
+ // in order to discover the byte order.
+ else if (encoding.equals("ISO-10646-UCS-4")) {
+ final int[] b4 = new int[4];
+ int count = 0;
+ for (; count < 4; ++count) {
+ b4[count] = stream.read();
+ if (b4[count] == -1)
+ break;
+ }
+ stream.reset();
+
+ // Ignore unusual octet order for now.
+ if (count == 4) {
+ // UCS-4, big endian (1234)
+ if (b4[0] == 0x00 && b4[1] == 0x00 && b4[2] == 0x00 &&
b4[3] == 0x3C) {
+ isBigEndian = Boolean.TRUE;
+ }
+ // UCS-4, little endian (1234)
+ else if (b4[0] == 0x3C && b4[1] == 0x00 && b4[2] == 0x00 &&
b4[3] == 0x00) {
+ isBigEndian = Boolean.FALSE;
+ }
+ }
+ }
+ // If encoding is UCS-2, we still need to read the first four bytes
+ // in order to discover the byte order.
+ else if (encoding.equals("ISO-10646-UCS-2")) {
+ final int[] b4 = new int[4];
+ int count = 0;
+ for (; count < 4; ++count) {
+ b4[count] = stream.read();
+ if (b4[count] == -1)
+ break;
+ }
+ stream.reset();
+
+ if (count == 4) {
+ // UCS-2, big endian
+ if (b4[0] == 0x00 && b4[1] == 0x3C && b4[2] == 0x00 &&
b4[3] == 0x3F) {
+ isBigEndian = Boolean.TRUE;
+ }
+ // UCS-2, little endian
+ else if (b4[0] == 0x3C && b4[1] == 0x00 && b4[2] == 0x3F &&
b4[3] == 0x00) {
+ isBigEndian = Boolean.FALSE;
+ }
+ }
+ }
+
reader = createReader(stream, encoding, isBigEndian);
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [EMAIL PROTECTED]
For additional commands, e-mail: [EMAIL PROTECTED]