neilg 2003/01/13 14:21:48 Modified: java/src/org/apache/xerces/impl XMLEntityManager.java Log: fixing bug# 15574. Thanks to Michael Glavassevich for both pointing out these bugs and providing an excellent fix for them! Revision Changes Path 1.56 +70 -1 xml-xerces/java/src/org/apache/xerces/impl/XMLEntityManager.java Index: XMLEntityManager.java =================================================================== RCS file: /home/cvs/xml-xerces/java/src/org/apache/xerces/impl/XMLEntityManager.java,v retrieving revision 1.55 retrieving revision 1.56 diff -u -r1.55 -r1.56 --- XMLEntityManager.java 8 Jan 2003 23:04:10 -0000 1.55 +++ XMLEntityManager.java 13 Jan 2003 22:21:47 -0000 1.56 @@ -973,6 +973,75 @@ // use specified encoding else { + encoding = encoding.toUpperCase(Locale.ENGLISH); + + // If encoding is UTF-8, consume BOM if one is present. + if (encoding.equals("UTF-8")) { + final int[] b3 = new int[3]; + int count = 0; + for (; count < 3; ++count) { + b3[count] = stream.read(); + if (b3[count] == -1) + break; + } + if (count == 3) { + if (b3[0] != 0xEF || b3[1] != 0xBB || b3[2] != 0xBF) { + // First three bytes are not BOM, so reset. + stream.reset(); + } + } + else { + stream.reset(); + } + } + // If encoding is UCS-4, we still need to read the first four bytes + // in order to discover the byte order. + else if (encoding.equals("ISO-10646-UCS-4")) { + final int[] b4 = new int[4]; + int count = 0; + for (; count < 4; ++count) { + b4[count] = stream.read(); + if (b4[count] == -1) + break; + } + stream.reset(); + + // Ignore unusual octet order for now. + if (count == 4) { + // UCS-4, big endian (1234) + if (b4[0] == 0x00 && b4[1] == 0x00 && b4[2] == 0x00 && b4[3] == 0x3C) { + isBigEndian = Boolean.TRUE; + } + // UCS-4, little endian (1234) + else if (b4[0] == 0x3C && b4[1] == 0x00 && b4[2] == 0x00 && b4[3] == 0x00) { + isBigEndian = Boolean.FALSE; + } + } + } + // If encoding is UCS-2, we still need to read the first four bytes + // in order to discover the byte order. + else if (encoding.equals("ISO-10646-UCS-2")) { + final int[] b4 = new int[4]; + int count = 0; + for (; count < 4; ++count) { + b4[count] = stream.read(); + if (b4[count] == -1) + break; + } + stream.reset(); + + if (count == 4) { + // UCS-2, big endian + if (b4[0] == 0x00 && b4[1] == 0x3C && b4[2] == 0x00 && b4[3] == 0x3F) { + isBigEndian = Boolean.TRUE; + } + // UCS-2, little endian + else if (b4[0] == 0x3C && b4[1] == 0x00 && b4[2] == 0x3F && b4[3] == 0x00) { + isBigEndian = Boolean.FALSE; + } + } + } + reader = createReader(stream, encoding, isBigEndian); }
--------------------------------------------------------------------- To unsubscribe, e-mail: [EMAIL PROTECTED] For additional commands, e-mail: [EMAIL PROTECTED]