mrglavas 2004/03/05 14:17:01 Modified: java/src/org/apache/xerces/xinclude XIncludeTextReader.java java/src/org/apache/xerces/impl/io MalformedByteSequenceException.java java/src/org/apache/xerces/impl XMLDocumentScannerImpl.java XMLEntityManager.java XMLDocumentFragmentScannerImpl.java Log: Fixing Bug #27083 & #27422:
http://nagoya.apache.org/bugzilla/show_bug.cgi?id=27083 http://nagoya.apache.org/bugzilla/show_bug.cgi?id=27422 Malformed UTF-8 and US-ASCII byte sequences encountered while parsing a document are well-formedness errors. Report them to the error reporter like every other well-formedness violation instead of letting the IOException propogate the user. This fixes a bug in XInclude processing where the processor would treat an IOException thrown because one of our readers detected a bad byte sequence as a ResourceError, causing the XInclude processor to look for a fallback to parse instead of failing. The result of this would be a malformed stream of events. For ASCII, we were only using our built-in reader if the encoding specified was the canonical name US-ASCII. For aliases the Java InputStreamReader was being picked up which doesn't seem to report bad ASCII bytes. Now we look into the EncodingMap so that we cover all of ASCII's aliases. Revision Changes Path 1.9 +11 -9 xml-xerces/java/src/org/apache/xerces/xinclude/XIncludeTextReader.java Index: XIncludeTextReader.java =================================================================== RCS file: /home/cvs/xml-xerces/java/src/org/apache/xerces/xinclude/XIncludeTextReader.java,v retrieving revision 1.8 retrieving revision 1.9 diff -u -r1.8 -r1.9 --- XIncludeTextReader.java 24 Feb 2004 23:15:52 -0000 1.8 +++ XIncludeTextReader.java 5 Mar 2004 22:17:00 -0000 1.9 @@ -229,19 +229,15 @@ consumeBOM(stream, encoding); // If the document is UTF-8 or US-ASCII use - // the Xerces readers for these encodings. + // the Xerces readers for these encodings. For + // US-ASCII consult the encoding map since + // this encoding has many aliases. if (encoding.equals("UTF-8")) { return new UTF8Reader(stream, XMLEntityManager.DEFAULT_BUFFER_SIZE, fErrorReporter.getMessageFormatter(XMLMessageFormatter.XML_DOMAIN), fErrorReporter.getLocale() ); } - else if (encoding.equals("US-ASCII")) { - return new ASCIIReader(stream, - XMLEntityManager.DEFAULT_BUFFER_SIZE, - fErrorReporter.getMessageFormatter(XMLMessageFormatter.XML_DOMAIN), - fErrorReporter.getLocale() ); - } // Try to use a Java reader. String javaEncoding = EncodingMap.getIANA2JavaMapping(encoding); @@ -256,7 +252,13 @@ throw new IOException( aFormatter.formatMessage( aLocale, "EncodingDeclInvalid", new Object[] {encoding} ) ); - } + } + else if (javaEncoding.equals("ASCII")) { + return new ASCIIReader(stream, + XMLEntityManager.DEFAULT_BUFFER_SIZE, + fErrorReporter.getMessageFormatter(XMLMessageFormatter.XML_DOMAIN), + fErrorReporter.getLocale() ); + } return new InputStreamReader(stream, javaEncoding); } 1.2 +3 -3 xml-xerces/java/src/org/apache/xerces/impl/io/MalformedByteSequenceException.java Index: MalformedByteSequenceException.java =================================================================== RCS file: /home/cvs/xml-xerces/java/src/org/apache/xerces/impl/io/MalformedByteSequenceException.java,v retrieving revision 1.1 retrieving revision 1.2 diff -u -r1.1 -r1.2 --- MalformedByteSequenceException.java 4 Mar 2004 19:27:13 -0000 1.1 +++ MalformedByteSequenceException.java 5 Mar 2004 22:17:00 -0000 1.2 @@ -16,7 +16,7 @@ package org.apache.xerces.impl.io; -import java.io.IOException; +import java.io.CharConversionException; import java.util.Locale; import org.apache.xerces.util.MessageFormatter; @@ -29,7 +29,7 @@ * * @version $Id$ */ -public class MalformedByteSequenceException extends IOException { +public class MalformedByteSequenceException extends CharConversionException { // // Data 1.41 +22 -4 xml-xerces/java/src/org/apache/xerces/impl/XMLDocumentScannerImpl.java Index: XMLDocumentScannerImpl.java =================================================================== RCS file: /home/cvs/xml-xerces/java/src/org/apache/xerces/impl/XMLDocumentScannerImpl.java,v retrieving revision 1.40 retrieving revision 1.41 diff -u -r1.40 -r1.41 --- XMLDocumentScannerImpl.java 27 Feb 2004 20:36:07 -0000 1.40 +++ XMLDocumentScannerImpl.java 5 Mar 2004 22:17:00 -0000 1.41 @@ -19,6 +19,7 @@ import java.io.EOFException; import java.io.IOException; +import org.apache.xerces.impl.io.MalformedByteSequenceException; import org.apache.xerces.impl.validation.ValidationManager; import org.apache.xerces.util.NamespaceSupport; import org.apache.xerces.util.XMLChar; @@ -667,7 +668,11 @@ // if no XMLDecl, then scan piece of prolog return true; } - + catch (MalformedByteSequenceException e) { + fErrorReporter.reportError(e.getDomain(), e.getKey(), + e.getArguments(), XMLErrorReporter.SEVERITY_FATAL_ERROR); + return false; + } // premature end of file catch (EOFException e) { reportFatalError("PrematureEOF", null); @@ -831,7 +836,11 @@ setDispatcher(fContentDispatcher); } } - + catch (MalformedByteSequenceException e) { + fErrorReporter.reportError(e.getDomain(), e.getKey(), + e.getArguments(), XMLErrorReporter.SEVERITY_FATAL_ERROR); + return false; + } // premature end of file catch (EOFException e) { reportFatalError("PrematureEOF", null); @@ -940,7 +949,11 @@ } } while (complete || again); } - + catch (MalformedByteSequenceException e) { + fErrorReporter.reportError(e.getDomain(), e.getKey(), + e.getArguments(), XMLErrorReporter.SEVERITY_FATAL_ERROR); + return false; + } // premature end of file catch (EOFException e) { reportFatalError("PrematureEOF", null); @@ -1174,6 +1187,11 @@ } } } while (complete || again); + } + catch (MalformedByteSequenceException e) { + fErrorReporter.reportError(e.getDomain(), e.getKey(), + e.getArguments(), XMLErrorReporter.SEVERITY_FATAL_ERROR); + return false; } catch (EOFException e) { // NOTE: This is the only place we're allowed to reach 1.78 +9 -7 xml-xerces/java/src/org/apache/xerces/impl/XMLEntityManager.java Index: XMLEntityManager.java =================================================================== RCS file: /home/cvs/xml-xerces/java/src/org/apache/xerces/impl/XMLEntityManager.java,v retrieving revision 1.77 retrieving revision 1.78 diff -u -r1.77 -r1.78 --- XMLEntityManager.java 27 Feb 2004 20:40:00 -0000 1.77 +++ XMLEntityManager.java 5 Mar 2004 22:17:00 -0000 1.78 @@ -1847,12 +1847,6 @@ } return new UTF8Reader(inputStream, fBufferSize, fErrorReporter.getMessageFormatter(XMLMessageFormatter.XML_DOMAIN), fErrorReporter.getLocale() ); } - if (ENCODING.equals("US-ASCII")) { - if (DEBUG_ENCODINGS) { - System.out.println("$$$ creating ASCIIReader"); - } - return new ASCIIReader(inputStream, fBufferSize, fErrorReporter.getMessageFormatter(XMLMessageFormatter.XML_DOMAIN), fErrorReporter.getLocale()); - } if(ENCODING.equals("ISO-10646-UCS-4")) { if(isBigEndian != null) { boolean isBE = isBigEndian.booleanValue(); @@ -1917,6 +1911,14 @@ javaEncoding = "ISO8859_1"; } } + else if (javaEncoding.equals("ASCII")) { + if (DEBUG_ENCODINGS) { + System.out.println("$$$ creating ASCIIReader"); + } + return new ASCIIReader(inputStream, fBufferSize, fErrorReporter.getMessageFormatter(XMLMessageFormatter.XML_DOMAIN), fErrorReporter.getLocale()); + } + + if (DEBUG_ENCODINGS) { System.out.print("$$$ creating Java InputStreamReader: encoding="+javaEncoding); if (javaEncoding == encoding) { 1.49 +7 -2 xml-xerces/java/src/org/apache/xerces/impl/XMLDocumentFragmentScannerImpl.java Index: XMLDocumentFragmentScannerImpl.java =================================================================== RCS file: /home/cvs/xml-xerces/java/src/org/apache/xerces/impl/XMLDocumentFragmentScannerImpl.java,v retrieving revision 1.48 retrieving revision 1.49 diff -u -r1.48 -r1.49 --- XMLDocumentFragmentScannerImpl.java 27 Feb 2004 20:36:07 -0000 1.48 +++ XMLDocumentFragmentScannerImpl.java 5 Mar 2004 22:17:00 -0000 1.49 @@ -19,6 +19,7 @@ import java.io.EOFException; import java.io.IOException; +import org.apache.xerces.impl.io.MalformedByteSequenceException; import org.apache.xerces.impl.msg.XMLMessageFormatter; import org.apache.xerces.util.AugmentationsImpl; import org.apache.xerces.util.XMLAttributesImpl; @@ -1608,7 +1609,11 @@ } } while (complete || again); } - + catch (MalformedByteSequenceException e) { + fErrorReporter.reportError(e.getDomain(), e.getKey(), + e.getArguments(), XMLErrorReporter.SEVERITY_FATAL_ERROR); + return false; + } // premature end of file catch (EOFException e) { endOfFileHook(e); --------------------------------------------------------------------- To unsubscribe, e-mail: [EMAIL PROTECTED] For additional commands, e-mail: [EMAIL PROTECTED]