This patch fixes the problems encountered parsing larger XML files, and adds some small validation features and fixes. The parser has now been tested successfully with a number of large XML files and external general entities, so I'm reverting the service description for the SAX parser factory.
2005-12-27 Chris Burdess <[EMAIL PROTECTED]>
* gnu/java/net/CRLFInputStream.java,
gnu/xml/stream/SAXParser.java,
gnu/xml/stream/XMLParser.java,
resource/META-INF/services/javax.xml.parsers.SAXParserFactory:
Fix XML parser stream issues. Add support for ignorable whitespace
to SAX parser. Allow validating parser to parse invalid files.
* gnu/xml/stream/BufferedReader.java,
gnu/xml/stream/XMLInputStreamReader.java: Move buffering
functionality of XMLInputStreamReader to BufferedReader.
--
Chris Burdess
"They that can give up essential liberty to obtain a little safety
deserve neither liberty nor safety." - Benjamin Franklin
Index: resource/META-INF/services/javax.xml.parsers.SAXParserFactory
===================================================================
RCS file:
/cvsroot/classpath/classpath/resource/META-INF/services/javax.xml.parsers.SAXParserFactory,v
retrieving revision 1.3
diff -u -r1.3 javax.xml.parsers.SAXParserFactory
--- resource/META-INF/services/javax.xml.parsers.SAXParserFactory 25 Dec
2005 09:49:42 -0000 1.3
+++ resource/META-INF/services/javax.xml.parsers.SAXParserFactory 27 Dec
2005 19:42:43 -0000
@@ -1 +1 @@
-gnu.xml.aelfred2.JAXPFactory
+gnu.xml.stream.SAXParserFactory
Index: gnu/java/net/CRLFInputStream.java
===================================================================
RCS file: /cvsroot/classpath/classpath/gnu/java/net/CRLFInputStream.java,v
retrieving revision 1.4
diff -u -r1.4 CRLFInputStream.java
--- gnu/java/net/CRLFInputStream.java 2 Jul 2005 20:32:13 -0000 1.4
+++ gnu/java/net/CRLFInputStream.java 27 Dec 2005 19:42:43 -0000
@@ -128,7 +128,7 @@
in.reset();
if (i != -1)
{
- l = in.read(b, off, i + 1); // read to CR
+ l = in.read(b, off, (i + 1) - off); // read to CR
in.read(); // skip LF
b[i] = LF; // fix CR as LF
}
Index: gnu/xml/stream/BufferedReader.java
===================================================================
RCS file: gnu/xml/stream/BufferedReader.java
diff -N gnu/xml/stream/BufferedReader.java
--- /dev/null 1 Jan 1970 00:00:00 -0000
+++ gnu/xml/stream/BufferedReader.java 27 Dec 2005 19:42:43 -0000
@@ -0,0 +1,208 @@
+/* BufferedReader.java --
+ Copyright (C) 2005 Free Software Foundation, Inc.
+
+This file is part of GNU Classpath.
+
+GNU Classpath is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2, or (at your option)
+any later version.
+
+GNU Classpath is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GNU Classpath; see the file COPYING. If not, write to the
+Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+02110-1301 USA.
+
+Linking this library statically or dynamically with other modules is
+making a combined work based on this library. Thus, the terms and
+conditions of the GNU General Public License cover the whole
+combination.
+
+As a special exception, the copyright holders of this library give you
+permission to link this library with independent modules to produce an
+executable, regardless of the license terms of these independent
+modules, and to copy and distribute the resulting executable under
+terms of your choice, provided that you also meet, for each linked
+independent module, the terms and conditions of the license of that
+module. An independent module is a module which is not derived from
+or based on this library. If you modify this library, you may extend
+this exception to your version of the library, but you are not
+obligated to do so. If you do not wish to do so, delete this
+exception statement from your version. */
+
+package gnu.xml.stream;
+
+import java.io.IOException;
+import java.io.Reader;
+
+/**
+ * A mark-capable buffered reader.
+ *
+ * @author <a href='mailto:[EMAIL PROTECTED]'>Chris Burdess</a>
+ */
+class BufferedReader
+ extends Reader
+{
+
+ static final int DEFAULT_BUFFER_SIZE = 4096;
+
+ final Reader in;
+ char[] buf;
+ int pos, count, markpos, marklimit, bufferSize;
+
+ BufferedReader(Reader in)
+ {
+ this(in, DEFAULT_BUFFER_SIZE);
+ }
+
+ BufferedReader(Reader in, int bufferSize)
+ {
+ if (bufferSize < 1)
+ throw new IllegalArgumentException();
+ this.in = in;
+ this.bufferSize = bufferSize;
+ buf = new char[bufferSize];
+ pos = count = bufferSize;
+ }
+
+ public void close()
+ throws IOException
+ {
+ buf = null;
+ pos = count = 0;
+ markpos = -1;
+ in.close();
+ }
+
+ public void mark(int readlimit)
+ throws IOException
+ {
+ marklimit = readlimit;
+ markpos = pos;
+ //System.out.println("--mark@"+Integer.toHexString(pos)+":"+marklimit);
+ }
+
+ public boolean markSupported()
+ {
+ return true;
+ }
+
+ public int read()
+ throws IOException
+ {
+ if (pos >= count && !refill())
+ return -1;
+ //System.out.println("--read1@"+Integer.toHexString(pos)+":"+new
String(buf, pos, 1));
+ return (int) buf[pos++];
+ }
+
+ public int read(char[] b)
+ throws IOException
+ {
+ return read(b, 0, b.length);
+ }
+
+ public int read(char[] b, int off, int len)
+ throws IOException
+ {
+ if (off < 0 || len < 0 || b.length - off < len)
+ throw new IndexOutOfBoundsException();
+
+ if (len == 0)
+ return 0;
+
+ if (pos >= count && !refill())
+ return -1;
+
+ int ret = Math.min(count - pos, len);
+ System.arraycopy(buf, pos, b, off, ret);
+ //System.out.println("--read2@"+Integer.toHexString(pos)+":"+new String(b,
off, ret)+" ("+ret+")");
+ pos += ret;
+ off += ret;
+ len -= ret;
+
+ while (len > 0 && refill())
+ {
+ int remain = Math.min(count - pos, len);
+ System.arraycopy(buf, pos, b, off, remain);
+ //System.out.println("--read3@"+Integer.toHexString(pos)+":"+new
String(b, off, remain));
+ pos += remain;
+ off += remain;
+ len -= remain;
+ ret += remain;
+ }
+
+ return ret;
+ }
+
+ public void reset()
+ throws IOException
+ {
+ if (markpos == -1)
+ throw new IOException(buf == null ? "Stream closed." : "Invalid mark.");
+ pos = markpos;
+ //System.out.println("--reset@"+Integer.toHexString(pos));
+ }
+
+ public long skip(long n)
+ throws IOException
+ {
+ if (buf == null)
+ throw new IOException("Stream closed.");
+ //System.out.println("--skip:"+n);
+ final long origN = n;
+ while (n > 0L)
+ {
+ if (pos >= count && !refill())
+ break;
+ int numread = (int) Math.min((long) (count - pos), n);
+ pos += numread;
+ n -= numread;
+ }
+ return origN - n;
+ }
+
+ private boolean refill()
+ throws IOException
+ {
+ if (buf == null)
+ throw new IOException("Stream closed.");
+
+ //System.out.println("--refill:pos="+Integer.toHexString(pos)+"
count="+Integer.toHexString(count));
+ int markcount = count - markpos;
+ if (markpos == -1 || markcount >= marklimit)
+ {
+ markpos = -1;
+ pos = count = 0;
+ //System.out.println("--refill1@"+Integer.toHexString(pos));
+ }
+ else
+ {
+ char[] newbuf = buf;
+ if (markpos < bufferSize)
+ {
+ newbuf = new char[count - markpos + bufferSize];
+ }
+ System.arraycopy(buf, markpos, newbuf, 0, markcount);
+ buf = newbuf;
+ count = markcount;
+ pos -= markpos;
+ markpos = 0;
+
//System.out.println("--refill2@"+Integer.toHexString(pos)+":"+Integer.toHexString(count));
+ }
+
+ int numread = in.read(buf, count, bufferSize);
+ if (numread <= 0)
+ return false;
+
+ //System.out.println("--refill3("+Integer.toHexString(numread)+"):"+new
String(buf, count, numread));
+ count += numread;
+ return true;
+ }
+
+}
Index: gnu/xml/stream/SAXParser.java
===================================================================
RCS file: /cvsroot/classpath/classpath/gnu/xml/stream/SAXParser.java,v
retrieving revision 1.7
diff -u -r1.7 SAXParser.java
--- gnu/xml/stream/SAXParser.java 24 Dec 2005 17:56:32 -0000 1.7
+++ gnu/xml/stream/SAXParser.java 27 Dec 2005 19:42:43 -0000
@@ -357,8 +357,10 @@
if (contentHandler != null)
{
char[] b = reader.getTextCharacters();
- // TODO determine whether whitespace is ignorable
- contentHandler.characters(b, 0, b.length);
+ if (isIgnorableWhitespace(parser, b, false))
+ contentHandler.ignorableWhitespace(b, 0, b.length);
+ else
+ contentHandler.characters(b, 0, b.length);
}
break;
case XMLStreamConstants.CDATA:
@@ -367,8 +369,10 @@
if (contentHandler != null)
{
char[] b = reader.getTextCharacters();
- // TODO determine whether whitespace and ignorable
- contentHandler.characters(b, 0, b.length);
+ if (isIgnorableWhitespace(parser, b, true))
+ contentHandler.ignorableWhitespace(b, 0, b.length);
+ else
+ contentHandler.characters(b, 0, b.length);
}
if (lexicalHandler != null)
lexicalHandler.endCDATA();
@@ -619,6 +623,31 @@
}
}
+ private boolean isIgnorableWhitespace(XMLParser reader, char[] b,
+ boolean testCharacters)
+ {
+ XMLParser.Doctype doctype = reader.doctype;
+ if (doctype == null)
+ return false;
+ String currentElement = reader.getCurrentElement();
+ XMLParser.ContentModel model = doctype.getElementModel(currentElement);
+ if (model == null || model.type != XMLParser.ContentModel.ELEMENT)
+ return false;
+ boolean white = true;
+ if (testCharacters)
+ {
+ for (int i = 0; i < b.length; i++)
+ {
+ if (b[i] != ' ' && b[i] != '\t' && b[i] != '\n' && b[i] != '\r')
+ {
+ white = false;
+ break;
+ }
+ }
+ }
+ return white;
+ }
+
public void parse(String systemId)
throws IOException, SAXException
{
Index: gnu/xml/stream/XMLInputStreamReader.java
===================================================================
RCS file: gnu/xml/stream/XMLInputStreamReader.java
diff -N gnu/xml/stream/XMLInputStreamReader.java
--- gnu/xml/stream/XMLInputStreamReader.java 12 Dec 2005 11:35:38 -0000
1.1
+++ /dev/null 1 Jan 1970 00:00:00 -0000
@@ -1,211 +0,0 @@
-/* XMLInputStreamReader.java --
- Copyright (C) 2005 Free Software Foundation, Inc.
-
-This file is part of GNU Classpath.
-
-GNU Classpath is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2, or (at your option)
-any later version.
-
-GNU Classpath is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with GNU Classpath; see the file COPYING. If not, write to the
-Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
-02110-1301 USA.
-
-Linking this library statically or dynamically with other modules is
-making a combined work based on this library. Thus, the terms and
-conditions of the GNU General Public License cover the whole
-combination.
-
-As a special exception, the copyright holders of this library give you
-permission to link this library with independent modules to produce an
-executable, regardless of the license terms of these independent
-modules, and to copy and distribute the resulting executable under
-terms of your choice, provided that you also meet, for each linked
-independent module, the terms and conditions of the license of that
-module. An independent module is a module which is not derived from
-or based on this library. If you modify this library, you may extend
-this exception to your version of the library, but you are not
-obligated to do so. If you do not wish to do so, delete this
-exception statement from your version. */
-
-package gnu.xml.stream;
-
-import java.io.FilterReader;
-import java.io.InputStream;
-import java.io.InputStreamReader;
-import java.io.IOException;
-import java.io.Reader;
-import java.io.UnsupportedEncodingException;
-
-/**
- * A mark-capable reader that can copy its buffer state into another
- * instance with a different encoding.
- *
- * @author <a href='mailto:[EMAIL PROTECTED]'>Chris Burdess</a>
- */
-class XMLInputStreamReader
- extends FilterReader
-{
-
- final InputStream is;
- char[] buf;
- int pos, count, markpos, marklimit, bufferSize;
-
- XMLInputStreamReader(InputStream is, String encoding)
- throws UnsupportedEncodingException
- {
- super(new InputStreamReader(is, encoding));
- this.is = is;
- final int size = 2048;
- buf = new char[size];
- pos = count = bufferSize = size;
- }
-
- XMLInputStreamReader(XMLInputStreamReader reader, String encoding)
- throws UnsupportedEncodingException
- {
- this(reader.is, encoding);
- buf = reader.buf;
- pos = reader.pos;
- count = reader.count;
- markpos = reader.markpos;
- marklimit = reader.marklimit;
- bufferSize = reader.bufferSize;
- }
-
- public void close()
- throws IOException
- {
- buf = null;
- pos = count = 0;
- markpos = -1;
- super.close();
- }
-
- public void mark(int readlimit)
- throws IOException
- {
- marklimit = readlimit;
- markpos = pos;
- }
-
- public boolean markSupported()
- {
- return true;
- }
-
- public int read()
- throws IOException
- {
- if (pos >= count && !refill())
- return -1;
- //System.out.println("read1:"+new String(buf, pos, 1));
- return (int) buf[pos++];
- }
-
- public int read(char[] b)
- throws IOException
- {
- return read(b, 0, b.length);
- }
-
- public int read(char[] b, int off, int len)
- throws IOException
- {
- if (off < 0 || len < 0 || b.length - off < len)
- throw new IndexOutOfBoundsException();
-
- if (len == 0)
- return 0;
-
- if (pos >= count && !refill())
- return -1;
-
- int ret = Math.min(count - pos, len);
- System.arraycopy(buf, pos, b, off, ret);
- //System.out.println("read2:"+new String(b, off, ret));
- pos += ret;
- off += ret;
- len -= ret;
-
- while (len > 0 && refill())
- {
- int remain = Math.min(count - pos, len);
- System.arraycopy(buf, pos, b, off, remain);
- //System.out.println("read3:"+new String(b, off, remain));
- pos += remain;
- off += remain;
- len -= remain;
- ret += remain;
- }
-
- return ret;
- }
-
- public void reset()
- throws IOException
- {
- if (markpos == -1)
- throw new IOException(buf == null ? "Stream closed." : "Invalid mark.");
- pos = markpos;
- }
-
- public long skip(long n)
- throws IOException
- {
- if (buf == null)
- throw new IOException("Stream closed.");
- final long origN = n;
- while (n > 0L)
- {
- if (pos >= count && !refill())
- break;
- int numread = (int) Math.min((long) (count - pos), n);
- pos += numread;
- n -= numread;
- }
- return origN - n;
- }
-
- private boolean refill()
- throws IOException
- {
- if (buf == null)
- throw new IOException("Stream closed.");
-
- if (markpos == -1 || count - markpos >= marklimit)
- {
- markpos = -1;
- pos = count = 0;
- }
- else
- {
- char[] newbuf = buf;
- if (markpos < bufferSize)
- {
- newbuf = new char[count - markpos + bufferSize];
- }
- System.arraycopy(buf, markpos, newbuf, 0, count - markpos);
- buf = newbuf;
- count -= markpos;
- pos -= markpos;
- markpos = 0;
- }
-
- int numread = super.read(buf, count, bufferSize);
-
- if (numread <= 0)
- return false;
-
- count += numread;
- return true;
- }
-
-}
Index: gnu/xml/stream/XMLParser.java
===================================================================
RCS file: /cvsroot/classpath/classpath/gnu/xml/stream/XMLParser.java,v
retrieving revision 1.11
diff -u -r1.11 XMLParser.java
--- gnu/xml/stream/XMLParser.java 24 Dec 2005 17:56:32 -0000 1.11
+++ gnu/xml/stream/XMLParser.java 27 Dec 2005 19:42:44 -0000
@@ -415,8 +415,6 @@
public String getEncoding()
{
- if (input.forceReader)
- return null;
return (input.inputEncoding == null) ? "UTF-8" : input.inputEncoding;
}
@@ -838,7 +836,7 @@
{
reset();
event = readCharData(null);
- if (validating)
+ if (validating && doctype != null)
validatePCData(buf.toString());
}
}
@@ -849,12 +847,13 @@
buf.append(elementName);
state = stack.isEmpty() ? MISC : CONTENT;
event = XMLStreamConstants.END_ELEMENT;
- if (validating)
+ if (validating && doctype != null)
endElementValidationHook();
break;
case INIT: // XMLDecl?
if (tryRead(TEST_XML_DECL))
readXMLDecl();
+ input.finalizeEncoding();
event = XMLStreamConstants.START_DOCUMENT;
state = PROLOG;
break;
@@ -922,6 +921,13 @@
}
}
+ // package private
+
+ String getCurrentElement()
+ {
+ return (String) stack.getLast();
+ }
+
// private
private void mark(int limit)
@@ -989,21 +995,24 @@
char[] chars = delim.toCharArray();
int len = chars.length;
mark(len);
- int l2 = read(tmpBuf, 0, len);
- if (l2 != len)
+ int off = 0;
+ do
{
- reset();
- error("EOF before required string", delim);
+ int l2 = read(tmpBuf, off, len - off);
+ if (l2 == -1)
+ {
+ reset();
+ error("EOF before required string", delim);
+ }
+ off += l2;
}
- else
+ while (off < len);
+ for (int i = 0; i < chars.length; i++)
{
- for (int i = 0; i < chars.length; i++)
+ if (chars[i] != tmpBuf[i])
{
- if (chars[i] != tmpBuf[i])
- {
- reset();
- error("required string", delim);
- }
+ reset();
+ error("required string", delim);
}
}
}
@@ -1035,12 +1044,25 @@
char[] chars = test.toCharArray();
int len = chars.length;
mark(len);
+ int count = 0;
int l2 = read(tmpBuf, 0, len);
- if (l2 < len)
+ if (l2 == -1)
{
reset();
return false;
}
+ count += l2;
+ while (count < len)
+ {
+ // force read
+ int c = read();
+ if (c == -1)
+ {
+ reset();
+ return false;
+ }
+ tmpBuf[count++] = (char) c;
+ }
for (int i = 0; i < len; i++)
{
if (chars[i] != tmpBuf[i])
@@ -1227,6 +1249,7 @@
input.init();
if (tryRead(TEST_XML_DECL))
readTextDecl();
+ input.finalizeEncoding();
}
//System.out.println("pushInput "+name+" "+url);
}
@@ -1360,10 +1383,9 @@
require("encoding");
readEq();
String enc = readLiteral(flags);
- if (!input.forceReader)
- input.setInputEncoding(enc);
skipWhitespace();
require("?>");
+ input.setInputEncoding(enc);
}
/**
@@ -1393,8 +1415,6 @@
error("whitespace required before 'encoding='");
readEq();
xmlEncoding = readLiteral(flags);
- if (!input.forceReader)
- input.setInputEncoding(xmlEncoding);
white = tryWhitespace();
}
@@ -1414,6 +1434,8 @@
skipWhitespace();
require("?>");
+ if (xmlEncoding != null)
+ input.setInputEncoding(xmlEncoding);
}
/**
@@ -2147,7 +2169,7 @@
error("unbound attribute prefix", attr.prefix);
}
}
- if (validating)
+ if (validating && doctype != null)
{
validateStartElement(elementName);
currentContentModel = doctype.getElementModel(elementName);
@@ -2283,7 +2305,7 @@
// Make element name available
buf.setLength(0);
buf.append(expected);
- if (validating)
+ if (validating && doctype != null)
endElementValidationHook();
}
@@ -3309,8 +3331,6 @@
{
if (currentContentModel == null)
return; // root element
- if (doctype == null)
- error("document does not specify a DTD");
switch (currentContentModel.type)
{
case ContentModel.EMPTY:
@@ -3869,7 +3889,7 @@
InputStream in;
Reader reader;
- boolean forceReader, initialized;
+ boolean initialized;
String inputEncoding;
boolean xml11;
@@ -3891,38 +3911,25 @@
}
Input(InputStream in, Reader reader, String publicId, String systemId,
- String name, String defaultEncoding)
+ String name, String inputEncoding)
{
- if (defaultEncoding == null)
- defaultEncoding = "UTF-8";
- if (in != null && !in.markSupported())
- in = new BufferedInputStream(in);
- this.in = in;
+ if (inputEncoding == null)
+ inputEncoding = "UTF-8";
+ this.inputEncoding = inputEncoding;
this.publicId = publicId;
this.systemId = systemId;
this.name = name;
- if (reader == null)
+ if (in != null)
{
- try
- {
- in = new CRLFInputStream(in);
- reader = new XMLInputStreamReader(in, defaultEncoding);
- }
- catch (UnsupportedEncodingException e)
- {
- RuntimeException e2 =
- new RuntimeException(defaultEncoding +
- " charset not supported");
- e2.initCause(e);
- throw e2;
- }
+ if (reader != null)
+ throw new IllegalStateException("both byte and char streams "+
+ "specified");
+ in = new CRLFInputStream(in);
+ in = new BufferedInputStream(in);
+ this.in = in;
}
else
- {
- forceReader = true;
- reader = new CRLFReader(reader);
- }
- this.reader = reader;
+ this.reader = new CRLFReader(reader);
initialized = false;
}
@@ -3953,7 +3960,7 @@
{
if (initialized)
return;
- if (!forceReader && in != null)
+ if (in != null)
detectEncoding();
initialized = true;
}
@@ -3965,7 +3972,10 @@
markOffset = offset;
markLine = line;
markColumn = column;
- reader.mark(len);
+ if (reader != null)
+ reader.mark(len);
+ else
+ in.mark(len);
}
/**
@@ -3975,9 +3985,10 @@
throws IOException
{
offset++;
- int ret = reader.read();
- //System.out.println("read1:"+((char) ret));
- if (ret == 0x0d || (xml11 && ret == 0x85))
+ int ret = (reader != null) ? reader.read() : in.read();
+ //if (ret != -1)
+ // System.out.println(" read1:"+((char) ret));
+ if (ret == 0x0d || (xml11 && (ret == 0x85 || ret == 0x2028)))
ret = 0x0a;
if (ret == 0x0a)
{
@@ -3995,14 +4006,28 @@
int read(char[] b, int off, int len)
throws IOException
{
- int ret = reader.read(b, off, len);
+ int ret;
+ if (reader != null)
+ ret = reader.read(b, off, len);
+ else
+ {
+ byte[] b2 = new byte[len];
+ ret = in.read(b2, 0, len);
+ if (ret != -1)
+ {
+ String s = new String(b2, 0, ret, inputEncoding);
+ char[] c = s.toCharArray();
+ ret = c.length;
+ System.arraycopy(c, 0, b, off, ret);
+ }
+ }
if (ret != -1)
{
- //System.out.println("read:"+new String(b, off, ret));
+ //System.out.println(" read:"+new String(b, off, ret));
for (int i = 0; i < ret; i++)
{
char c = b[off + i];
- if (c == 0x0d || (xml11 && c == 0x85))
+ if (c == 0x0d || (xml11 && (c == 0x85 || c == 0x2028)))
{
c = 0x0a;
b[off + i] = c;
@@ -4023,7 +4048,10 @@
throws IOException
{
//System.out.println(" reset");
- reader.reset();
+ if (reader != null)
+ reader.reset();
+ else
+ in.reset();
offset = markOffset;
line = markLine;
column = markColumn;
@@ -4066,33 +4094,46 @@
// 4-byte encodings
if (equals(SIGNATURE_UCS_4_1234, signature))
- setInputEncoding("UTF-32BE");
+ {
+ in.read();
+ in.read();
+ in.read();
+ in.read();
+ setInputEncoding("UTF-32BE");
+ }
else if (equals(SIGNATURE_UCS_4_4321, signature))
- setInputEncoding("UTF-32LE");
+ {
+ in.read();
+ in.read();
+ in.read();
+ in.read();
+ setInputEncoding("UTF-32LE");
+ }
else if (equals(SIGNATURE_UCS_4_2143, signature) ||
equals(SIGNATURE_UCS_4_3412, signature))
throw new UnsupportedEncodingException("unsupported UCS-4 byte
ordering");
+
// 2-byte encodings
else if (equals(SIGNATURE_UCS_2_12, signature))
{
- setInputEncoding("UTF-16BE");
in.read();
in.read();
+ setInputEncoding("UTF-16BE");
}
else if (equals(SIGNATURE_UCS_2_21, signature))
{
- setInputEncoding("UTF-16LE");
in.read();
in.read();
+ setInputEncoding("UTF-16LE");
}
else if (equals(SIGNATURE_UCS_2_12_NOBOM, signature))
{
- setInputEncoding("UTF-16BE");
+ //setInputEncoding("UTF-16BE");
throw new UnsupportedEncodingException("no byte-order mark for UCS-2
entity");
}
else if (equals(SIGNATURE_UCS_2_21_NOBOM, signature))
{
- setInputEncoding("UTF-16LE");
+ //setInputEncoding("UTF-16LE");
throw new UnsupportedEncodingException("no byte-order mark for UCS-2
entity");
}
// ASCII-derived encodings
@@ -4102,10 +4143,10 @@
}
else if (equals(SIGNATURE_UTF_8_BOM, signature))
{
- setInputEncoding("UTF-8");
in.read();
in.read();
in.read();
+ setInputEncoding("UTF-8");
}
}
@@ -4119,37 +4160,27 @@
return true;
}
- private void setInputEncoding(String encoding)
- throws UnsupportedEncodingException
+ void setInputEncoding(String encoding)
+ throws IOException
{
- if (!encoding.equals(inputEncoding) &&
- reader instanceof XMLInputStreamReader)
- {
- if (inputEncoding == "UTF-8" &&
- (encoding.startsWith("UTF-16") ||
- encoding.startsWith("UTF-32")))
- throw new UnsupportedEncodingException("document is not in its " +
- "declared encoding");
- inputEncoding = encoding;
- reader = new XMLInputStreamReader((XMLInputStreamReader) reader,
- encoding);
- }
- else
- {
- /*if (reporter != null)
- {
- try
- {
- reporter.report("unable to set input encoding '" + encoding +
- "': input is specified as reader", "WARNING",
- encoding, this);
- }
- catch (XMLStreamException e)
- {
- // Am I bothered?
- }}*/
- System.err.println("Can't set input encoding "+encoding);
- }
+ if (encoding.equals(inputEncoding))
+ return;
+ if (reader != null)
+ throw new UnsupportedEncodingException("document is not in its " +
+ "declared encoding: " +
+ inputEncoding);
+ inputEncoding = encoding;
+ finalizeEncoding();
+ }
+
+ void finalizeEncoding()
+ throws IOException
+ {
+ if (reader != null)
+ return;
+ //reader = new XMLInputStreamReader(in, inputEncoding);
+ reader = new BufferedReader(new InputStreamReader(in, inputEncoding));
+ mark(1);
}
}
pgpAY8CTH0JJ4.pgp
Description: PGP signature
_______________________________________________ Classpath-patches mailing list [email protected] http://lists.gnu.org/mailman/listinfo/classpath-patches
