neilg       2003/01/13 14:21:48

  Modified:    java/src/org/apache/xerces/impl XMLEntityManager.java
  Log:
  fixing bug# 15574.  Thanks to Michael Glavassevich for both pointing out these bugs 
and providing an excellent fix for them!
  
  Revision  Changes    Path
  1.56      +70 -1     xml-xerces/java/src/org/apache/xerces/impl/XMLEntityManager.java
  
  Index: XMLEntityManager.java
  ===================================================================
  RCS file: 
/home/cvs/xml-xerces/java/src/org/apache/xerces/impl/XMLEntityManager.java,v
  retrieving revision 1.55
  retrieving revision 1.56
  diff -u -r1.55 -r1.56
  --- XMLEntityManager.java     8 Jan 2003 23:04:10 -0000       1.55
  +++ XMLEntityManager.java     13 Jan 2003 22:21:47 -0000      1.56
  @@ -973,6 +973,75 @@
   
               // use specified encoding
               else {
  +                encoding = encoding.toUpperCase(Locale.ENGLISH);
  +                
  +                // If encoding is UTF-8, consume BOM if one is present.
  +                if (encoding.equals("UTF-8")) {
  +                    final int[] b3 = new int[3];
  +                    int count = 0;
  +                    for (; count < 3; ++count) {
  +                        b3[count] = stream.read();
  +                        if (b3[count] == -1)
  +                            break;
  +                    }
  +                    if (count == 3) {
  +                        if (b3[0] != 0xEF || b3[1] != 0xBB || b3[2] != 0xBF) {
  +                            // First three bytes are not BOM, so reset.
  +                            stream.reset();
  +                        }
  +                    }
  +                    else {
  +                        stream.reset();
  +                    }
  +                }
  +                // If encoding is UCS-4, we still need to read the first four bytes
  +                // in order to discover the byte order.
  +                else if (encoding.equals("ISO-10646-UCS-4")) {
  +                    final int[] b4 = new int[4];
  +                    int count = 0;
  +                    for (; count < 4; ++count) {
  +                        b4[count] = stream.read();
  +                        if (b4[count] == -1)
  +                            break;
  +                    }
  +                    stream.reset();
  +
  +                    // Ignore unusual octet order for now.
  +                    if (count == 4) {
  +                        // UCS-4, big endian (1234)
  +                        if (b4[0] == 0x00 && b4[1] == 0x00 && b4[2] == 0x00 && 
b4[3] == 0x3C) {
  +                            isBigEndian = Boolean.TRUE;
  +                        }
  +                        // UCS-4, little endian (1234)
  +                        else if (b4[0] == 0x3C && b4[1] == 0x00 && b4[2] == 0x00 && 
b4[3] == 0x00) {
  +                            isBigEndian = Boolean.FALSE;
  +                        }
  +                    }
  +                }
  +                // If encoding is UCS-2, we still need to read the first four bytes
  +                // in order to discover the byte order.
  +                else if (encoding.equals("ISO-10646-UCS-2")) {
  +                    final int[] b4 = new int[4];
  +                    int count = 0;
  +                    for (; count < 4; ++count) {
  +                        b4[count] = stream.read();
  +                        if (b4[count] == -1)
  +                            break;
  +                    }
  +                    stream.reset();
  +
  +                    if (count == 4) {
  +                        // UCS-2, big endian
  +                        if (b4[0] == 0x00 && b4[1] == 0x3C && b4[2] == 0x00 && 
b4[3] == 0x3F) {
  +                            isBigEndian = Boolean.TRUE;
  +                        }
  +                        // UCS-2, little endian
  +                        else if (b4[0] == 0x3C && b4[1] == 0x00 && b4[2] == 0x3F && 
b4[3] == 0x00) {
  +                            isBigEndian = Boolean.FALSE;
  +                        }
  +                    }
  +                }
  +                
                   reader = createReader(stream, encoding, isBigEndian);
               }
   
  
  
  

---------------------------------------------------------------------
To unsubscribe, e-mail: [EMAIL PROTECTED]
For additional commands, e-mail: [EMAIL PROTECTED]

Reply via email to