http://git-wip-us.apache.org/repos/asf/struts/blob/775c82a7/plugins/embeddedjsp/src/main/java/org/apache/struts2/jasper/xmlparser/XMLEncodingDetector.java ---------------------------------------------------------------------- diff --git a/plugins/embeddedjsp/src/main/java/org/apache/struts2/jasper/xmlparser/XMLEncodingDetector.java b/plugins/embeddedjsp/src/main/java/org/apache/struts2/jasper/xmlparser/XMLEncodingDetector.java index b251db8..01ecba3 100644 --- a/plugins/embeddedjsp/src/main/java/org/apache/struts2/jasper/xmlparser/XMLEncodingDetector.java +++ b/plugins/embeddedjsp/src/main/java/org/apache/struts2/jasper/xmlparser/XMLEncodingDetector.java @@ -25,21 +25,17 @@ package org.apache.struts2.jasper.xmlparser; -import java.io.EOFException; -import java.io.InputStream; -import java.io.InputStreamReader; -import java.io.IOException; -import java.io.Reader; -import java.util.Locale; -import java.util.jar.JarFile; - import org.apache.struts2.jasper.JasperException; import org.apache.struts2.jasper.JspCompilationContext; import org.apache.struts2.jasper.compiler.ErrorDispatcher; import org.apache.struts2.jasper.compiler.JspUtil; +import java.io.*; +import java.util.Locale; +import java.util.jar.JarFile; + public class XMLEncodingDetector { - + private InputStream stream; private String encoding; private boolean isEncodingSetInProlog; @@ -47,7 +43,7 @@ public class XMLEncodingDetector { private int skip; private Boolean isBigEndian; private Reader reader; - + // org.apache.xerces.impl.XMLEntityManager fields public static final int DEFAULT_BUFFER_SIZE = 2048; public static final int DEFAULT_XMLDECL_BUFFER_SIZE = 64; @@ -55,7 +51,7 @@ public class XMLEncodingDetector { private SymbolTable fSymbolTable; private XMLEncodingDetector fCurrentEntity; private int fBufferSize = DEFAULT_BUFFER_SIZE; - + // org.apache.xerces.impl.XMLEntityManager.ScannedEntity fields private int lineNumber = 1; private int columnNumber = 1; @@ -64,15 +60,15 @@ public class XMLEncodingDetector { private int position; private int count; private boolean mayReadChunks = false; - + // org.apache.xerces.impl.XMLScanner fields - private XMLString fString = new XMLString(); + private XMLString fString = new XMLString(); private XMLStringBuffer fStringBuffer = new XMLStringBuffer(); private XMLStringBuffer fStringBuffer2 = new XMLStringBuffer(); private final static String fVersionSymbol = "version"; private final static String fEncodingSymbol = "encoding"; private final static String fStandaloneSymbol = "standalone"; - + // org.apache.xerces.impl.XMLDocumentFragmentScannerImpl fields private int fMarkupDepth = 0; private String[] fStrings = new String[3]; @@ -88,25 +84,34 @@ public class XMLEncodingDetector { } /** + * <p> * Autodetects the encoding of the XML document supplied by the given * input stream. + * </p> * + * <p> * Encoding autodetection is done according to the XML 1.0 specification, * Appendix F.1: Detection Without External Encoding Information. + * </p> * + * @param fname file name + * @param jarFile jar file + * @param ctxt compilation context + * @param err error dispatcher * @return Two-element array, where the first element (of type * java.lang.String) contains the name of the (auto)detected encoding, and - * the second element (of type java.lang.Boolean) specifies whether the + * the second element (of type java.lang.Boolean) specifies whether the * encoding was specified using the 'encoding' attribute of an XML prolog * (TRUE) or autodetected (FALSE). + * @throws IOException in case of IO errors + * @throws JasperException in case of Jasper errors */ public static Object[] getEncoding(String fname, JarFile jarFile, JspCompilationContext ctxt, ErrorDispatcher err) - throws IOException, JasperException - { + throws IOException, JasperException { InputStream inStream = JspUtil.getInputStream(fname, jarFile, ctxt, - err); + err); XMLEncodingDetector detector = new XMLEncodingDetector(); Object[] ret = detector.getEncoding(inStream, err); inStream.close(); @@ -115,92 +120,93 @@ public class XMLEncodingDetector { } private Object[] getEncoding(InputStream in, ErrorDispatcher err) - throws IOException, JasperException - { + throws IOException, JasperException { this.stream = in; - this.err=err; + this.err = err; createInitialReader(); scanXMLDecl(); - - return new Object[] { this.encoding, - Boolean.valueOf(this.isEncodingSetInProlog), - Boolean.valueOf(this.isBomPresent), - Integer.valueOf(this.skip) }; + + return new Object[]{this.encoding, + Boolean.valueOf(this.isEncodingSetInProlog), + Boolean.valueOf(this.isBomPresent), + Integer.valueOf(this.skip)}; } - + // stub method void endEntity() { } - + // Adapted from: // org.apache.xerces.impl.XMLEntityManager.startEntity() private void createInitialReader() throws IOException, JasperException { - // wrap this stream in RewindableInputStream - stream = new RewindableInputStream(stream); - - // perform auto-detect of encoding if necessary - if (encoding == null) { - // read first four bytes and determine encoding - final byte[] b4 = new byte[4]; - int count = 0; - for (; count<4; count++ ) { - b4[count] = (byte)stream.read(); - } - if (count == 4) { - Object [] encodingDesc = getEncodingName(b4, count); - encoding = (String)(encodingDesc[0]); - isBigEndian = (Boolean)(encodingDesc[1]); - - if (encodingDesc.length > 3) { - isBomPresent = (Boolean)(encodingDesc[2]); - skip = (Integer)(encodingDesc[3]); - } else { - isBomPresent = true; - skip = (Integer)(encodingDesc[2]); - } - - stream.reset(); - // Special case UTF-8 files with BOM created by Microsoft - // tools. It's more efficient to consume the BOM than make - // the reader perform extra checks. -Ac - if (count > 2 && encoding.equals("UTF-8")) { - int b0 = b4[0] & 0xFF; - int b1 = b4[1] & 0xFF; - int b2 = b4[2] & 0xFF; - if (b0 == 0xEF && b1 == 0xBB && b2 == 0xBF) { - // ignore first three bytes... - stream.skip(3); - } - } - reader = createReader(stream, encoding, isBigEndian); - } else { - reader = createReader(stream, encoding, isBigEndian); - } - } + // wrap this stream in RewindableInputStream + stream = new RewindableInputStream(stream); + + // perform auto-detect of encoding if necessary + if (encoding == null) { + // read first four bytes and determine encoding + final byte[] b4 = new byte[4]; + int count = 0; + for (; count < 4; count++) { + b4[count] = (byte) stream.read(); + } + if (count == 4) { + Object[] encodingDesc = getEncodingName(b4, count); + encoding = (String) (encodingDesc[0]); + isBigEndian = (Boolean) (encodingDesc[1]); + + if (encodingDesc.length > 3) { + isBomPresent = (Boolean) (encodingDesc[2]); + skip = (Integer) (encodingDesc[3]); + } else { + isBomPresent = true; + skip = (Integer) (encodingDesc[2]); + } + + stream.reset(); + // Special case UTF-8 files with BOM created by Microsoft + // tools. It's more efficient to consume the BOM than make + // the reader perform extra checks. -Ac + if (count > 2 && encoding.equals("UTF-8")) { + int b0 = b4[0] & 0xFF; + int b1 = b4[1] & 0xFF; + int b2 = b4[2] & 0xFF; + if (b0 == 0xEF && b1 == 0xBB && b2 == 0xBF) { + // ignore first three bytes... + stream.skip(3); + } + } + reader = createReader(stream, encoding, isBigEndian); + } else { + reader = createReader(stream, encoding, isBigEndian); + } + } } // Adapted from: // org.apache.xerces.impl.XMLEntityManager.createReader + /** * Creates a reader capable of reading the given input stream in * the specified encoding. * - * @param inputStream The input stream. - * @param encoding The encoding name that the input stream is - * encoded using. If the user has specified that - * Java encoding names are allowed, then the - * encoding name may be a Java encoding name; - * otherwise, it is an ianaEncoding name. - * @param isBigEndian For encodings (like uCS-4), whose names cannot - * specify a byte order, this tells whether the order - * is bigEndian. null means unknown or not relevant. - * + * @param inputStream The input stream. + * @param encoding The encoding name that the input stream is + * encoded using. If the user has specified that + * Java encoding names are allowed, then the + * encoding name may be a Java encoding name; + * otherwise, it is an ianaEncoding name. + * @param isBigEndian For encodings (like uCS-4), whose names cannot + * specify a byte order, this tells whether the order + * is bigEndian. null means unknown or not relevant. * @return Returns a reader. + * @throws IOException in case of IO errors + * @throws JasperException in case of Jasper errors */ private Reader createReader(InputStream inputStream, String encoding, - Boolean isBigEndian) - throws IOException, JasperException { + Boolean isBigEndian) + throws IOException, JasperException { // normalize encoding name if (encoding == null) { @@ -225,7 +231,7 @@ public class XMLEncodingDetector { } } else { err.jspError("jsp.error.xml.encodingByteOrderUnsupported", - encoding); + encoding); } } if (ENCODING.equals("ISO-10646-UCS-2")) { @@ -238,7 +244,7 @@ public class XMLEncodingDetector { } } else { err.jspError("jsp.error.xml.encodingByteOrderUnsupported", - encoding); + encoding); } } @@ -262,7 +268,7 @@ public class XMLEncodingDetector { String javaEncoding = EncodingMap.getIANA2JavaMapping(ENCODING); if (javaEncoding == null) { if (fAllowJavaEncodings) { - javaEncoding = encoding; + javaEncoding = encoding; } else { err.jspError("jsp.error.xml.encodingDeclInvalid", encoding); // see comment above. @@ -275,6 +281,7 @@ public class XMLEncodingDetector { // Adapted from: // org.apache.xerces.impl.XMLEntityManager.getEncodingName + /** * Returns the IANA encoding name that is auto-detected from * the bytes specified, with the endian-ness of that encoding where @@ -283,9 +290,9 @@ public class XMLEncodingDetector { * @param b4 The first four bytes of the input. * @param count The number of bytes actually read. * @return a 2-element array: the first element, an IANA-encoding string, - * the second element a Boolean which is true iff the document is big - * endian, false if it's little-endian, and null if the distinction isn't - * relevant. + * the second element a Boolean which is true iff the document is big + * endian, false if it's little-endian, and null if the distinction isn't + * relevant. */ private Object[] getEncodingName(byte[] b4, int count) { @@ -298,356 +305,361 @@ public class XMLEncodingDetector { int b1 = b4[1] & 0xFF; if (b0 == 0xFE && b1 == 0xFF) { // UTF-16, big-endian - return new Object [] {"UTF-16BE", Boolean.TRUE, Integer.valueOf(2)}; + return new Object[]{"UTF-16BE", Boolean.TRUE, Integer.valueOf(2)}; } if (b0 == 0xFF && b1 == 0xFE) { // UTF-16, little-endian - return new Object [] {"UTF-16LE", Boolean.FALSE, Integer.valueOf(2)}; + return new Object[]{"UTF-16LE", Boolean.FALSE, Integer.valueOf(2)}; } // default to UTF-8 if we don't have enough bytes to make a // good determination of the encoding if (count < 3) { - return new Object [] {"UTF-8", null, Boolean.FALSE, Integer.valueOf(0)}; + return new Object[]{"UTF-8", null, Boolean.FALSE, Integer.valueOf(0)}; } // UTF-8 with a BOM int b2 = b4[2] & 0xFF; if (b0 == 0xEF && b1 == 0xBB && b2 == 0xBF) { - return new Object [] {"UTF-8", null, Integer.valueOf(3)}; + return new Object[]{"UTF-8", null, Integer.valueOf(3)}; } // default to UTF-8 if we don't have enough bytes to make a // good determination of the encoding if (count < 4) { - return new Object [] {"UTF-8", null, Integer.valueOf(0)}; + return new Object[]{"UTF-8", null, Integer.valueOf(0)}; } // other encodings int b3 = b4[3] & 0xFF; if (b0 == 0x00 && b1 == 0x00 && b2 == 0x00 && b3 == 0x3C) { // UCS-4, big endian (1234) - return new Object [] {"ISO-10646-UCS-4", new Boolean(true), Integer.valueOf(4)}; + return new Object[]{"ISO-10646-UCS-4", new Boolean(true), Integer.valueOf(4)}; } if (b0 == 0x3C && b1 == 0x00 && b2 == 0x00 && b3 == 0x00) { // UCS-4, little endian (4321) - return new Object [] {"ISO-10646-UCS-4", new Boolean(false), Integer.valueOf(4)}; + return new Object[]{"ISO-10646-UCS-4", new Boolean(false), Integer.valueOf(4)}; } if (b0 == 0x00 && b1 == 0x00 && b2 == 0x3C && b3 == 0x00) { // UCS-4, unusual octet order (2143) // REVISIT: What should this be? - return new Object [] {"ISO-10646-UCS-4", null, Integer.valueOf(4)}; + return new Object[]{"ISO-10646-UCS-4", null, Integer.valueOf(4)}; } if (b0 == 0x00 && b1 == 0x3C && b2 == 0x00 && b3 == 0x00) { // UCS-4, unusual octect order (3412) // REVISIT: What should this be? - return new Object [] {"ISO-10646-UCS-4", null, Integer.valueOf(4)}; + return new Object[]{"ISO-10646-UCS-4", null, Integer.valueOf(4)}; } if (b0 == 0x00 && b1 == 0x3C && b2 == 0x00 && b3 == 0x3F) { // UTF-16, big-endian, no BOM // (or could turn out to be UCS-2... // REVISIT: What should this be? - return new Object [] {"UTF-16BE", new Boolean(true), Integer.valueOf(4)}; + return new Object[]{"UTF-16BE", new Boolean(true), Integer.valueOf(4)}; } if (b0 == 0x3C && b1 == 0x00 && b2 == 0x3F && b3 == 0x00) { // UTF-16, little-endian, no BOM // (or could turn out to be UCS-2... - return new Object [] {"UTF-16LE", new Boolean(false), Integer.valueOf(4)}; + return new Object[]{"UTF-16LE", new Boolean(false), Integer.valueOf(4)}; } if (b0 == 0x4C && b1 == 0x6F && b2 == 0xA7 && b3 == 0x94) { // EBCDIC // a la xerces1, return CP037 instead of EBCDIC here - return new Object [] {"CP037", null, Integer.valueOf(4)}; + return new Object[]{"CP037", null, Integer.valueOf(4)}; } // default encoding - return new Object [] {"UTF-8", null, Boolean.FALSE, Integer.valueOf(0)}; + return new Object[]{"UTF-8", null, Boolean.FALSE, Integer.valueOf(0)}; } // Adapted from: // org.apache.xerces.impl.XMLEntityManager.EntityScanner.isExternal - /** Returns true if the current entity being scanned is external. */ + + /** + * @return true if the current entity being scanned is external. + */ public boolean isExternal() { - return true; + return true; } // Adapted from: // org.apache.xerces.impl.XMLEntityManager.EntityScanner.peekChar + /** - * Returns the next character on the input. + * @return the next character on the input. * <p> * <strong>Note:</strong> The character is <em>not</em> consumed. - * - * @throws IOException Thrown if i/o error occurs. - * @throws EOFException Thrown on end of file. + * </p> + * @throws IOException Thrown if i/o error occurs. */ public int peekChar() throws IOException { - - // load more characters, if needed - if (fCurrentEntity.position == fCurrentEntity.count) { - load(0, true); - } - - // peek at character - int c = fCurrentEntity.ch[fCurrentEntity.position]; - - // return peeked character - if (fCurrentEntity.isExternal()) { - return c != '\r' ? c : '\n'; - } - else { - return c; - } - + + // load more characters, if needed + if (fCurrentEntity.position == fCurrentEntity.count) { + load(0, true); + } + + // peek at character + int c = fCurrentEntity.ch[fCurrentEntity.position]; + + // return peeked character + if (fCurrentEntity.isExternal()) { + return c != '\r' ? c : '\n'; + } else { + return c; + } + } // peekChar():int - + // Adapted from: // org.apache.xerces.impl.XMLEntityManager.EntityScanner.scanChar + /** - * Returns the next character on the input. + * @return the next character on the input. * <p> * <strong>Note:</strong> The character is consumed. - * - * @throws IOException Thrown if i/o error occurs. - * @throws EOFException Thrown on end of file. + * </p> + * @throws IOException Thrown if i/o error occurs. */ public int scanChar() throws IOException { - // load more characters, if needed - if (fCurrentEntity.position == fCurrentEntity.count) { - load(0, true); - } - - // scan character - int c = fCurrentEntity.ch[fCurrentEntity.position++]; - boolean external = false; - if (c == '\n' || - (c == '\r' && (external = fCurrentEntity.isExternal()))) { - fCurrentEntity.lineNumber++; - fCurrentEntity.columnNumber = 1; - if (fCurrentEntity.position == fCurrentEntity.count) { - fCurrentEntity.ch[0] = (char)c; - load(1, false); - } - if (c == '\r' && external) { - if (fCurrentEntity.ch[fCurrentEntity.position++] != '\n') { - fCurrentEntity.position--; - } - c = '\n'; - } - } - - // return character that was scanned - fCurrentEntity.columnNumber++; - return c; - + // load more characters, if needed + if (fCurrentEntity.position == fCurrentEntity.count) { + load(0, true); + } + + // scan character + int c = fCurrentEntity.ch[fCurrentEntity.position++]; + boolean external = false; + if (c == '\n' || + (c == '\r' && (external = fCurrentEntity.isExternal()))) { + fCurrentEntity.lineNumber++; + fCurrentEntity.columnNumber = 1; + if (fCurrentEntity.position == fCurrentEntity.count) { + fCurrentEntity.ch[0] = (char) c; + load(1, false); + } + if (c == '\r' && external) { + if (fCurrentEntity.ch[fCurrentEntity.position++] != '\n') { + fCurrentEntity.position--; + } + c = '\n'; + } + } + + // return character that was scanned + fCurrentEntity.columnNumber++; + return c; + } // Adapted from: // org.apache.xerces.impl.XMLEntityManager.EntityScanner.scanName + /** - * Returns a string matching the Name production appearing immediately + * @return a string matching the Name production appearing immediately * on the input as a symbol, or null if no Name string is present. * <p> * <strong>Note:</strong> The Name characters are consumed. + * </p> * <p> * <strong>Note:</strong> The string returned must be a symbol. The * SymbolTable can be used for this purpose. - * - * @throws IOException Thrown if i/o error occurs. - * @throws EOFException Thrown on end of file. - * + * </p> + * @throws IOException Thrown if i/o error occurs. * @see SymbolTable * @see XMLChar#isName * @see XMLChar#isNameStart */ public String scanName() throws IOException { - - // load more characters, if needed - if (fCurrentEntity.position == fCurrentEntity.count) { - load(0, true); - } - - // scan name - int offset = fCurrentEntity.position; - if (XMLChar.isNameStart(fCurrentEntity.ch[offset])) { - if (++fCurrentEntity.position == fCurrentEntity.count) { - fCurrentEntity.ch[0] = fCurrentEntity.ch[offset]; - offset = 0; - if (load(1, false)) { - fCurrentEntity.columnNumber++; - String symbol = fSymbolTable.addSymbol(fCurrentEntity.ch, - 0, 1); - return symbol; - } - } - while (XMLChar.isName(fCurrentEntity.ch[fCurrentEntity.position])) { - if (++fCurrentEntity.position == fCurrentEntity.count) { - int length = fCurrentEntity.position - offset; - if (length == fBufferSize) { - // bad luck we have to resize our buffer - char[] tmp = new char[fBufferSize * 2]; - System.arraycopy(fCurrentEntity.ch, offset, - tmp, 0, length); - fCurrentEntity.ch = tmp; - fBufferSize *= 2; - } else { - System.arraycopy(fCurrentEntity.ch, offset, - fCurrentEntity.ch, 0, length); - } - offset = 0; - if (load(length, false)) { - break; - } - } - } - } - int length = fCurrentEntity.position - offset; - fCurrentEntity.columnNumber += length; - - // return name - String symbol = null; - if (length > 0) { - symbol = fSymbolTable.addSymbol(fCurrentEntity.ch, offset, length); - } - return symbol; - + + // load more characters, if needed + if (fCurrentEntity.position == fCurrentEntity.count) { + load(0, true); + } + + // scan name + int offset = fCurrentEntity.position; + if (XMLChar.isNameStart(fCurrentEntity.ch[offset])) { + if (++fCurrentEntity.position == fCurrentEntity.count) { + fCurrentEntity.ch[0] = fCurrentEntity.ch[offset]; + offset = 0; + if (load(1, false)) { + fCurrentEntity.columnNumber++; + String symbol = fSymbolTable.addSymbol(fCurrentEntity.ch, + 0, 1); + return symbol; + } + } + while (XMLChar.isName(fCurrentEntity.ch[fCurrentEntity.position])) { + if (++fCurrentEntity.position == fCurrentEntity.count) { + int length = fCurrentEntity.position - offset; + if (length == fBufferSize) { + // bad luck we have to resize our buffer + char[] tmp = new char[fBufferSize * 2]; + System.arraycopy(fCurrentEntity.ch, offset, + tmp, 0, length); + fCurrentEntity.ch = tmp; + fBufferSize *= 2; + } else { + System.arraycopy(fCurrentEntity.ch, offset, + fCurrentEntity.ch, 0, length); + } + offset = 0; + if (load(length, false)) { + break; + } + } + } + } + int length = fCurrentEntity.position - offset; + fCurrentEntity.columnNumber += length; + + // return name + String symbol = null; + if (length > 0) { + symbol = fSymbolTable.addSymbol(fCurrentEntity.ch, offset, length); + } + return symbol; + } // Adapted from: // org.apache.xerces.impl.XMLEntityManager.EntityScanner.scanLiteral + /** + * <p> * Scans a range of attribute value data, setting the fields of the * XMLString structure, appropriately. + * </p> + * * <p> * <strong>Note:</strong> The characters are consumed. + * </p> + * * <p> * <strong>Note:</strong> This method does not guarantee to return * the longest run of attribute value data. This method may return * before the quote character due to reaching the end of the input * buffer or any other reason. + * </p> + * * <p> * <strong>Note:</strong> The fields contained in the XMLString * structure are not guaranteed to remain valid upon subsequent calls * to the entity scanner. Therefore, the caller is responsible for * immediately using the returned character data or making a copy of * the character data. + * </p> * * @param quote The quote character that signifies the end of the * attribute value data. * @param content The content structure to fill. - * * @return Returns the next character on the input, if known. This - * value may be -1 but this does <em>note</em> designate - * end of file. - * - * @throws IOException Thrown if i/o error occurs. - * @throws EOFException Thrown on end of file. + * value may be -1 but this does <em>note</em> designate + * end of file. + * @throws IOException Thrown if i/o error occurs. */ public int scanLiteral(int quote, XMLString content) - throws IOException { - - // load more characters, if needed - if (fCurrentEntity.position == fCurrentEntity.count) { - load(0, true); - } else if (fCurrentEntity.position == fCurrentEntity.count - 1) { - fCurrentEntity.ch[0] = fCurrentEntity.ch[fCurrentEntity.count - 1]; - load(1, false); - fCurrentEntity.position = 0; - } - - // normalize newlines - int offset = fCurrentEntity.position; - int c = fCurrentEntity.ch[offset]; - int newlines = 0; - boolean external = fCurrentEntity.isExternal(); - if (c == '\n' || (c == '\r' && external)) { - do { - c = fCurrentEntity.ch[fCurrentEntity.position++]; - if (c == '\r' && external) { - newlines++; - fCurrentEntity.lineNumber++; - fCurrentEntity.columnNumber = 1; - if (fCurrentEntity.position == fCurrentEntity.count) { - offset = 0; - fCurrentEntity.position = newlines; - if (load(newlines, false)) { - break; - } - } - if (fCurrentEntity.ch[fCurrentEntity.position] == '\n') { - fCurrentEntity.position++; - offset++; - } - /*** NEWLINE NORMALIZATION ***/ - else { - newlines++; - } - /***/ - } - else if (c == '\n') { - newlines++; - fCurrentEntity.lineNumber++; - fCurrentEntity.columnNumber = 1; - if (fCurrentEntity.position == fCurrentEntity.count) { - offset = 0; - fCurrentEntity.position = newlines; - if (load(newlines, false)) { - break; - } - } - /*** NEWLINE NORMALIZATION *** - if (fCurrentEntity.ch[fCurrentEntity.position] == '\r' - && external) { - fCurrentEntity.position++; - offset++; - } - /***/ - } - else { - fCurrentEntity.position--; - break; - } - } while (fCurrentEntity.position < fCurrentEntity.count - 1); - for (int i = offset; i < fCurrentEntity.position; i++) { - fCurrentEntity.ch[i] = '\n'; - } - int length = fCurrentEntity.position - offset; - if (fCurrentEntity.position == fCurrentEntity.count - 1) { - content.setValues(fCurrentEntity.ch, offset, length); - return -1; - } - } - - // scan literal value - while (fCurrentEntity.position < fCurrentEntity.count) { - c = fCurrentEntity.ch[fCurrentEntity.position++]; - if ((c == quote && - (!fCurrentEntity.literal || external)) - || c == '%' || !XMLChar.isContent(c)) { - fCurrentEntity.position--; - break; - } - } - int length = fCurrentEntity.position - offset; - fCurrentEntity.columnNumber += length - newlines; - content.setValues(fCurrentEntity.ch, offset, length); - - // return next character - if (fCurrentEntity.position != fCurrentEntity.count) { - c = fCurrentEntity.ch[fCurrentEntity.position]; - // NOTE: We don't want to accidentally signal the - // end of the literal if we're expanding an - // entity appearing in the literal. -Ac - if (c == quote && fCurrentEntity.literal) { - c = -1; - } - } - else { - c = -1; - } - return c; + throws IOException { + + // load more characters, if needed + if (fCurrentEntity.position == fCurrentEntity.count) { + load(0, true); + } else if (fCurrentEntity.position == fCurrentEntity.count - 1) { + fCurrentEntity.ch[0] = fCurrentEntity.ch[fCurrentEntity.count - 1]; + load(1, false); + fCurrentEntity.position = 0; + } + + // normalize newlines + int offset = fCurrentEntity.position; + int c = fCurrentEntity.ch[offset]; + int newlines = 0; + boolean external = fCurrentEntity.isExternal(); + if (c == '\n' || (c == '\r' && external)) { + do { + c = fCurrentEntity.ch[fCurrentEntity.position++]; + if (c == '\r' && external) { + newlines++; + fCurrentEntity.lineNumber++; + fCurrentEntity.columnNumber = 1; + if (fCurrentEntity.position == fCurrentEntity.count) { + offset = 0; + fCurrentEntity.position = newlines; + if (load(newlines, false)) { + break; + } + } + if (fCurrentEntity.ch[fCurrentEntity.position] == '\n') { + fCurrentEntity.position++; + offset++; + } + /*** NEWLINE NORMALIZATION ***/ + else { + newlines++; + } + /***/ + } else if (c == '\n') { + newlines++; + fCurrentEntity.lineNumber++; + fCurrentEntity.columnNumber = 1; + if (fCurrentEntity.position == fCurrentEntity.count) { + offset = 0; + fCurrentEntity.position = newlines; + if (load(newlines, false)) { + break; + } + } + /*** NEWLINE NORMALIZATION *** + if (fCurrentEntity.ch[fCurrentEntity.position] == '\r' + && external) { + fCurrentEntity.position++; + offset++; + } + /***/ + } else { + fCurrentEntity.position--; + break; + } + } while (fCurrentEntity.position < fCurrentEntity.count - 1); + for (int i = offset; i < fCurrentEntity.position; i++) { + fCurrentEntity.ch[i] = '\n'; + } + int length = fCurrentEntity.position - offset; + if (fCurrentEntity.position == fCurrentEntity.count - 1) { + content.setValues(fCurrentEntity.ch, offset, length); + return -1; + } + } + + // scan literal value + while (fCurrentEntity.position < fCurrentEntity.count) { + c = fCurrentEntity.ch[fCurrentEntity.position++]; + if ((c == quote && + (!fCurrentEntity.literal || external)) + || c == '%' || !XMLChar.isContent(c)) { + fCurrentEntity.position--; + break; + } + } + int length = fCurrentEntity.position - offset; + fCurrentEntity.columnNumber += length - newlines; + content.setValues(fCurrentEntity.ch, offset, length); + + // return next character + if (fCurrentEntity.position != fCurrentEntity.count) { + c = fCurrentEntity.ch[fCurrentEntity.position]; + // NOTE: We don't want to accidentally signal the + // end of the literal if we're expanding an + // entity appearing in the literal. -Ac + if (c == quote && fCurrentEntity.literal) { + c = -1; + } + } else { + c = -1; + } + return c; } @@ -656,290 +668,284 @@ public class XMLEncodingDetector { * setting the fields of the XMLString structure, appropriately. * <p> * <strong>Note:</strong> The characters are consumed. + * </p> * <p> * <strong>Note:</strong> This assumes that the internal buffer is * at least the same size, or bigger, than the length of the delimiter * and that the delimiter contains at least one character. + * </p> * <p> * <strong>Note:</strong> This method does not guarantee to return * the longest run of character data. This method may return before * the delimiter due to reaching the end of the input buffer or any * other reason. + * </p> * <p> * <strong>Note:</strong> The fields contained in the XMLString * structure are not guaranteed to remain valid upon subsequent calls * to the entity scanner. Therefore, the caller is responsible for * immediately using the returned character data or making a copy of * the character data. + * </p> * * @param delimiter The string that signifies the end of the character * data to be scanned. * @param buffer The data structure to fill. - * * @return Returns true if there is more data to scan, false otherwise. - * - * @throws IOException Thrown if i/o error occurs. - * @throws EOFException Thrown on end of file. + * @throws IOException Thrown if i/o error occurs. */ public boolean scanData(String delimiter, XMLStringBuffer buffer) - throws IOException { - - boolean done = false; - int delimLen = delimiter.length(); - char charAt0 = delimiter.charAt(0); - boolean external = fCurrentEntity.isExternal(); - do { - - // load more characters, if needed - - if (fCurrentEntity.position == fCurrentEntity.count) { - load(0, true); - } - else if (fCurrentEntity.position >= fCurrentEntity.count - delimLen) { - System.arraycopy(fCurrentEntity.ch, fCurrentEntity.position, - fCurrentEntity.ch, 0, fCurrentEntity.count - fCurrentEntity.position); - load(fCurrentEntity.count - fCurrentEntity.position, false); - fCurrentEntity.position = 0; - } - if (fCurrentEntity.position >= fCurrentEntity.count - delimLen) { - // something must be wrong with the input: e.g., file ends an - // unterminated comment - int length = fCurrentEntity.count - fCurrentEntity.position; - buffer.append (fCurrentEntity.ch, fCurrentEntity.position, - length); - fCurrentEntity.columnNumber += fCurrentEntity.count; - fCurrentEntity.position = fCurrentEntity.count; - load(0,true); - return false; - } - - // normalize newlines - int offset = fCurrentEntity.position; - int c = fCurrentEntity.ch[offset]; - int newlines = 0; - if (c == '\n' || (c == '\r' && external)) { - do { - c = fCurrentEntity.ch[fCurrentEntity.position++]; - if (c == '\r' && external) { - newlines++; - fCurrentEntity.lineNumber++; - fCurrentEntity.columnNumber = 1; - if (fCurrentEntity.position == fCurrentEntity.count) { - offset = 0; - fCurrentEntity.position = newlines; - if (load(newlines, false)) { - break; - } - } - if (fCurrentEntity.ch[fCurrentEntity.position] == '\n') { - fCurrentEntity.position++; - offset++; - } - /*** NEWLINE NORMALIZATION ***/ - else { - newlines++; - } - } - else if (c == '\n') { - newlines++; - fCurrentEntity.lineNumber++; - fCurrentEntity.columnNumber = 1; - if (fCurrentEntity.position == fCurrentEntity.count) { - offset = 0; - fCurrentEntity.position = newlines; - fCurrentEntity.count = newlines; - if (load(newlines, false)) { - break; - } - } - } - else { - fCurrentEntity.position--; - break; - } - } while (fCurrentEntity.position < fCurrentEntity.count - 1); - for (int i = offset; i < fCurrentEntity.position; i++) { - fCurrentEntity.ch[i] = '\n'; - } - int length = fCurrentEntity.position - offset; - if (fCurrentEntity.position == fCurrentEntity.count - 1) { - buffer.append(fCurrentEntity.ch, offset, length); - return true; - } - } - - // iterate over buffer looking for delimiter - OUTER: while (fCurrentEntity.position < fCurrentEntity.count) { - c = fCurrentEntity.ch[fCurrentEntity.position++]; - if (c == charAt0) { - // looks like we just hit the delimiter - int delimOffset = fCurrentEntity.position - 1; - for (int i = 1; i < delimLen; i++) { - if (fCurrentEntity.position == fCurrentEntity.count) { - fCurrentEntity.position -= i; - break OUTER; - } - c = fCurrentEntity.ch[fCurrentEntity.position++]; - if (delimiter.charAt(i) != c) { - fCurrentEntity.position--; - break; - } - } - if (fCurrentEntity.position == delimOffset + delimLen) { - done = true; - break; - } - } - else if (c == '\n' || (external && c == '\r')) { - fCurrentEntity.position--; - break; - } - else if (XMLChar.isInvalid(c)) { - fCurrentEntity.position--; - int length = fCurrentEntity.position - offset; - fCurrentEntity.columnNumber += length - newlines; - buffer.append(fCurrentEntity.ch, offset, length); - return true; - } - } - int length = fCurrentEntity.position - offset; - fCurrentEntity.columnNumber += length - newlines; - if (done) { - length -= delimLen; - } - buffer.append (fCurrentEntity.ch, offset, length); - - // return true if string was skipped - } while (!done); - return !done; + throws IOException { + + boolean done = false; + int delimLen = delimiter.length(); + char charAt0 = delimiter.charAt(0); + boolean external = fCurrentEntity.isExternal(); + do { + + // load more characters, if needed + + if (fCurrentEntity.position == fCurrentEntity.count) { + load(0, true); + } else if (fCurrentEntity.position >= fCurrentEntity.count - delimLen) { + System.arraycopy(fCurrentEntity.ch, fCurrentEntity.position, + fCurrentEntity.ch, 0, fCurrentEntity.count - fCurrentEntity.position); + load(fCurrentEntity.count - fCurrentEntity.position, false); + fCurrentEntity.position = 0; + } + if (fCurrentEntity.position >= fCurrentEntity.count - delimLen) { + // something must be wrong with the input: e.g., file ends an + // unterminated comment + int length = fCurrentEntity.count - fCurrentEntity.position; + buffer.append(fCurrentEntity.ch, fCurrentEntity.position, + length); + fCurrentEntity.columnNumber += fCurrentEntity.count; + fCurrentEntity.position = fCurrentEntity.count; + load(0, true); + return false; + } + + // normalize newlines + int offset = fCurrentEntity.position; + int c = fCurrentEntity.ch[offset]; + int newlines = 0; + if (c == '\n' || (c == '\r' && external)) { + do { + c = fCurrentEntity.ch[fCurrentEntity.position++]; + if (c == '\r' && external) { + newlines++; + fCurrentEntity.lineNumber++; + fCurrentEntity.columnNumber = 1; + if (fCurrentEntity.position == fCurrentEntity.count) { + offset = 0; + fCurrentEntity.position = newlines; + if (load(newlines, false)) { + break; + } + } + if (fCurrentEntity.ch[fCurrentEntity.position] == '\n') { + fCurrentEntity.position++; + offset++; + } + /*** NEWLINE NORMALIZATION ***/ + else { + newlines++; + } + } else if (c == '\n') { + newlines++; + fCurrentEntity.lineNumber++; + fCurrentEntity.columnNumber = 1; + if (fCurrentEntity.position == fCurrentEntity.count) { + offset = 0; + fCurrentEntity.position = newlines; + fCurrentEntity.count = newlines; + if (load(newlines, false)) { + break; + } + } + } else { + fCurrentEntity.position--; + break; + } + } while (fCurrentEntity.position < fCurrentEntity.count - 1); + for (int i = offset; i < fCurrentEntity.position; i++) { + fCurrentEntity.ch[i] = '\n'; + } + int length = fCurrentEntity.position - offset; + if (fCurrentEntity.position == fCurrentEntity.count - 1) { + buffer.append(fCurrentEntity.ch, offset, length); + return true; + } + } + + // iterate over buffer looking for delimiter + OUTER: + while (fCurrentEntity.position < fCurrentEntity.count) { + c = fCurrentEntity.ch[fCurrentEntity.position++]; + if (c == charAt0) { + // looks like we just hit the delimiter + int delimOffset = fCurrentEntity.position - 1; + for (int i = 1; i < delimLen; i++) { + if (fCurrentEntity.position == fCurrentEntity.count) { + fCurrentEntity.position -= i; + break OUTER; + } + c = fCurrentEntity.ch[fCurrentEntity.position++]; + if (delimiter.charAt(i) != c) { + fCurrentEntity.position--; + break; + } + } + if (fCurrentEntity.position == delimOffset + delimLen) { + done = true; + break; + } + } else if (c == '\n' || (external && c == '\r')) { + fCurrentEntity.position--; + break; + } else if (XMLChar.isInvalid(c)) { + fCurrentEntity.position--; + int length = fCurrentEntity.position - offset; + fCurrentEntity.columnNumber += length - newlines; + buffer.append(fCurrentEntity.ch, offset, length); + return true; + } + } + int length = fCurrentEntity.position - offset; + fCurrentEntity.columnNumber += length - newlines; + if (done) { + length -= delimLen; + } + buffer.append(fCurrentEntity.ch, offset, length); + + // return true if string was skipped + } while (!done); + return !done; } // Adapted from: // org.apache.xerces.impl.XMLEntityManager.EntityScanner.skipChar + /** * Skips a character appearing immediately on the input. * <p> * <strong>Note:</strong> The character is consumed only if it matches * the specified character. + * </p> * * @param c The character to skip. - * * @return Returns true if the character was skipped. - * - * @throws IOException Thrown if i/o error occurs. - * @throws EOFException Thrown on end of file. + * @throws IOException Thrown if i/o error occurs. */ public boolean skipChar(int c) throws IOException { - // load more characters, if needed - if (fCurrentEntity.position == fCurrentEntity.count) { - load(0, true); - } - - // skip character - int cc = fCurrentEntity.ch[fCurrentEntity.position]; - if (cc == c) { - fCurrentEntity.position++; - if (c == '\n') { - fCurrentEntity.lineNumber++; - fCurrentEntity.columnNumber = 1; - } - else { - fCurrentEntity.columnNumber++; - } - return true; - } else if (c == '\n' && cc == '\r' && fCurrentEntity.isExternal()) { - // handle newlines - if (fCurrentEntity.position == fCurrentEntity.count) { - fCurrentEntity.ch[0] = (char)cc; - load(1, false); - } - fCurrentEntity.position++; - if (fCurrentEntity.ch[fCurrentEntity.position] == '\n') { - fCurrentEntity.position++; - } - fCurrentEntity.lineNumber++; - fCurrentEntity.columnNumber = 1; - return true; - } - - // character was not skipped - return false; + // load more characters, if needed + if (fCurrentEntity.position == fCurrentEntity.count) { + load(0, true); + } + + // skip character + int cc = fCurrentEntity.ch[fCurrentEntity.position]; + if (cc == c) { + fCurrentEntity.position++; + if (c == '\n') { + fCurrentEntity.lineNumber++; + fCurrentEntity.columnNumber = 1; + } else { + fCurrentEntity.columnNumber++; + } + return true; + } else if (c == '\n' && cc == '\r' && fCurrentEntity.isExternal()) { + // handle newlines + if (fCurrentEntity.position == fCurrentEntity.count) { + fCurrentEntity.ch[0] = (char) cc; + load(1, false); + } + fCurrentEntity.position++; + if (fCurrentEntity.ch[fCurrentEntity.position] == '\n') { + fCurrentEntity.position++; + } + fCurrentEntity.lineNumber++; + fCurrentEntity.columnNumber = 1; + return true; + } + + // character was not skipped + return false; } // Adapted from: // org.apache.xerces.impl.XMLEntityManager.EntityScanner.skipSpaces + /** * Skips space characters appearing immediately on the input. * <p> * <strong>Note:</strong> The characters are consumed only if they are * space characters. + * </p> * * @return Returns true if at least one space character was skipped. - * * @throws IOException Thrown if i/o error occurs. * @throws EOFException Thrown on end of file. - * * @see XMLChar#isSpace */ public boolean skipSpaces() throws IOException { - // load more characters, if needed - if (fCurrentEntity.position == fCurrentEntity.count) { - load(0, true); - } - - // skip spaces - int c = fCurrentEntity.ch[fCurrentEntity.position]; - if (XMLChar.isSpace(c)) { - boolean external = fCurrentEntity.isExternal(); - do { - boolean entityChanged = false; - // handle newlines - if (c == '\n' || (external && c == '\r')) { - fCurrentEntity.lineNumber++; - fCurrentEntity.columnNumber = 1; - if (fCurrentEntity.position == fCurrentEntity.count - 1) { - fCurrentEntity.ch[0] = (char)c; - entityChanged = load(1, true); - if (!entityChanged) - // the load change the position to be 1, - // need to restore it when entity not changed - fCurrentEntity.position = 0; - } - if (c == '\r' && external) { - // REVISIT: Does this need to be updated to fix the - // #x0D ^#x0A newline normalization problem? -Ac - if (fCurrentEntity.ch[++fCurrentEntity.position] != '\n') { - fCurrentEntity.position--; - } - } - /*** NEWLINE NORMALIZATION *** - else { - if (fCurrentEntity.ch[fCurrentEntity.position + 1] == '\r' - && external) { - fCurrentEntity.position++; - } - } - /***/ - } - else { - fCurrentEntity.columnNumber++; - } - // load more characters, if needed - if (!entityChanged) - fCurrentEntity.position++; - if (fCurrentEntity.position == fCurrentEntity.count) { - load(0, true); - } - } while (XMLChar.isSpace(c = fCurrentEntity.ch[fCurrentEntity.position])); - return true; - } - - // no spaces were found - return false; + // load more characters, if needed + if (fCurrentEntity.position == fCurrentEntity.count) { + load(0, true); + } + + // skip spaces + int c = fCurrentEntity.ch[fCurrentEntity.position]; + if (XMLChar.isSpace(c)) { + boolean external = fCurrentEntity.isExternal(); + do { + boolean entityChanged = false; + // handle newlines + if (c == '\n' || (external && c == '\r')) { + fCurrentEntity.lineNumber++; + fCurrentEntity.columnNumber = 1; + if (fCurrentEntity.position == fCurrentEntity.count - 1) { + fCurrentEntity.ch[0] = (char) c; + entityChanged = load(1, true); + if (!entityChanged) + // the load change the position to be 1, + // need to restore it when entity not changed + fCurrentEntity.position = 0; + } + if (c == '\r' && external) { + // REVISIT: Does this need to be updated to fix the + // #x0D ^#x0A newline normalization problem? -Ac + if (fCurrentEntity.ch[++fCurrentEntity.position] != '\n') { + fCurrentEntity.position--; + } + } + /*** NEWLINE NORMALIZATION *** + else { + if (fCurrentEntity.ch[fCurrentEntity.position + 1] == '\r' + && external) { + fCurrentEntity.position++; + } + } + /***/ + } else { + fCurrentEntity.columnNumber++; + } + // load more characters, if needed + if (!entityChanged) + fCurrentEntity.position++; + if (fCurrentEntity.position == fCurrentEntity.count) { + load(0, true); + } + } while (XMLChar.isSpace(c = fCurrentEntity.ch[fCurrentEntity.position])); + return true; + } + + // no spaces were found + return false; } @@ -948,46 +954,45 @@ public class XMLEncodingDetector { * <p> * <strong>Note:</strong> The characters are consumed only if they are * space characters. + * </p> * * @param s The string to skip. - * * @return Returns true if the string was skipped. - * - * @throws IOException Thrown if i/o error occurs. - * @throws EOFException Thrown on end of file. + * @throws IOException Thrown if i/o error occurs. */ public boolean skipString(String s) throws IOException { - // load more characters, if needed - if (fCurrentEntity.position == fCurrentEntity.count) { - load(0, true); - } - - // skip string - final int length = s.length(); - for (int i = 0; i < length; i++) { - char c = fCurrentEntity.ch[fCurrentEntity.position++]; - if (c != s.charAt(i)) { - fCurrentEntity.position -= i + 1; - return false; - } - if (i < length - 1 && fCurrentEntity.position == fCurrentEntity.count) { - System.arraycopy(fCurrentEntity.ch, fCurrentEntity.count - i - 1, fCurrentEntity.ch, 0, i + 1); - // REVISIT: Can a string to be skipped cross an - // entity boundary? -Ac - if (load(i + 1, false)) { - fCurrentEntity.position -= i + 1; - return false; - } - } - } - fCurrentEntity.columnNumber += length; - return true; + // load more characters, if needed + if (fCurrentEntity.position == fCurrentEntity.count) { + load(0, true); + } + + // skip string + final int length = s.length(); + for (int i = 0; i < length; i++) { + char c = fCurrentEntity.ch[fCurrentEntity.position++]; + if (c != s.charAt(i)) { + fCurrentEntity.position -= i + 1; + return false; + } + if (i < length - 1 && fCurrentEntity.position == fCurrentEntity.count) { + System.arraycopy(fCurrentEntity.ch, fCurrentEntity.count - i - 1, fCurrentEntity.ch, 0, i + 1); + // REVISIT: Can a string to be skipped cross an + // entity boundary? -Ac + if (load(i + 1, false)) { + fCurrentEntity.position -= i + 1; + return false; + } + } + } + fCurrentEntity.columnNumber += length; + return true; } // Adapted from: // org.apache.xerces.impl.XMLEntityManager.EntityScanner.load + /** * Loads a chunk of text. * @@ -998,53 +1003,54 @@ public class XMLEncodingDetector { * the current entity in place and the entity * boundary will be signaled by the return * value. - * * @returns Returns true if the entity changed as a result of this - * load operation. + * load operation. */ final boolean load(int offset, boolean changeEntity) - throws IOException { - - // read characters - int length = fCurrentEntity.mayReadChunks? - (fCurrentEntity.ch.length - offset): - (DEFAULT_XMLDECL_BUFFER_SIZE); - int count = fCurrentEntity.reader.read(fCurrentEntity.ch, offset, - length); - - // reset count and position - boolean entityChanged = false; - if (count != -1) { - if (count != 0) { - fCurrentEntity.count = count + offset; - fCurrentEntity.position = offset; - } - } - - // end of this entity - else { - fCurrentEntity.count = offset; - fCurrentEntity.position = offset; - entityChanged = true; - if (changeEntity) { - endEntity(); - if (fCurrentEntity == null) { - throw new EOFException(); - } - // handle the trailing edges - if (fCurrentEntity.position == fCurrentEntity.count) { - load(0, false); - } - } - } - - return entityChanged; + throws IOException { + + // read characters + int length = fCurrentEntity.mayReadChunks ? + (fCurrentEntity.ch.length - offset) : + (DEFAULT_XMLDECL_BUFFER_SIZE); + int count = fCurrentEntity.reader.read(fCurrentEntity.ch, offset, + length); + + // reset count and position + boolean entityChanged = false; + if (count != -1) { + if (count != 0) { + fCurrentEntity.count = count + offset; + fCurrentEntity.position = offset; + } + } + + // end of this entity + else { + fCurrentEntity.count = offset; + fCurrentEntity.position = offset; + entityChanged = true; + if (changeEntity) { + endEntity(); + if (fCurrentEntity == null) { + throw new EOFException(); + } + // handle the trailing edges + if (fCurrentEntity.position == fCurrentEntity.count) { + load(0, false); + } + } + } + + return entityChanged; } // Adapted from: // org.apache.xerces.impl.XMLEntityManager.RewindableInputStream + /** + * <p> * This class wraps the byte inputstreams we're presented with. * We need it because java.io.InputStreams don't provide * functionality to reread processed bytes, and they have a habit @@ -1053,13 +1059,16 @@ public class XMLEncodingDetector { * encoding of a document, we can neither backtrack to read the * whole doc again nor start reading where we are with a new * reader. - * + * </p> + * <p/> + * <p> * This class allows rewinding an inputStream by allowing a mark * to be set, and the stream reset to that position. <strong>The * class assumes that it needs to read one character per * invocation when it's read() method is inovked, but uses the * underlying InputStream's read(char[], offset length) method--it * won't buffer data read this way!</strong> + * </p> * * @author Neil Graham, IBM * @author Glenn Marcy, IBM @@ -1110,7 +1119,7 @@ public class XMLEncodingDetector { fEndOffset = fOffset; return -1; } - fData[fLength++] = (byte)b; + fData[fLength++] = (byte) b; fOffset++; return b & 0xff; } @@ -1130,15 +1139,14 @@ public class XMLEncodingDetector { fEndOffset = fOffset; return -1; } - b[off] = (byte)returnedVal; + b[off] = (byte) returnedVal; return 1; } if (len < bytesLeft) { if (len <= 0) { return 0; } - } - else { + } else { len = bytesLeft; } if (b != null) { @@ -1149,8 +1157,7 @@ public class XMLEncodingDetector { } public long skip(long n) - throws IOException - { + throws IOException { int bytesLeft; if (n <= 0) { return 0; @@ -1171,8 +1178,8 @@ public class XMLEncodingDetector { return bytesLeft; } n -= bytesLeft; - /* - * In a manner of speaking, when this class isn't permitting more + /* + * In a manner of speaking, when this class isn't permitting more * than one byte at a time to be read, it is "blocking". The * available() method should indicate how much can be read without * blocking, so while we're in this mode, it should only indicate @@ -1189,7 +1196,7 @@ public class XMLEncodingDetector { return -1; } return fCurrentEntity.mayReadChunks ? fInputStream.available() - : 0; + : 0; } return bytesLeft; } @@ -1218,34 +1225,35 @@ public class XMLEncodingDetector { // org.apache.xerces.impl.XMLDocumentScannerImpl.dispatch private void scanXMLDecl() throws IOException, JasperException { - if (skipString("<?xml")) { - fMarkupDepth++; - // NOTE: special case where document starts with a PI - // whose name starts with "xml" (e.g. "xmlfoo") - if (XMLChar.isName(peekChar())) { - fStringBuffer.clear(); - fStringBuffer.append("xml"); - while (XMLChar.isName(peekChar())) { - fStringBuffer.append((char)scanChar()); - } - String target = fSymbolTable.addSymbol(fStringBuffer.ch, - fStringBuffer.offset, - fStringBuffer.length); - scanPIData(target, fString); - } - - // standard XML declaration - else { - scanXMLDeclOrTextDecl(false); - } - } + if (skipString("<?xml")) { + fMarkupDepth++; + // NOTE: special case where document starts with a PI + // whose name starts with "xml" (e.g. "xmlfoo") + if (XMLChar.isName(peekChar())) { + fStringBuffer.clear(); + fStringBuffer.append("xml"); + while (XMLChar.isName(peekChar())) { + fStringBuffer.append((char) scanChar()); + } + String target = fSymbolTable.addSymbol(fStringBuffer.ch, + fStringBuffer.offset, + fStringBuffer.length); + scanPIData(target, fString); + } + + // standard XML declaration + else { + scanXMLDeclOrTextDecl(false); + } + } } - + // Adapted from: // org.apache.xerces.impl.XMLDocumentFragmentScannerImpl.scanXMLDeclOrTextDecl + /** * Scans an XML or text declaration. - * <p> + * <p/> * <pre> * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>' * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ") @@ -1261,8 +1269,8 @@ public class XMLEncodingDetector { * be scanned instead of an XML * declaration. */ - private void scanXMLDeclOrTextDecl(boolean scanningTextDecl) - throws IOException, JasperException { + private void scanXMLDeclOrTextDecl(boolean scanningTextDecl) + throws IOException, JasperException { // scan decl scanXMLDeclOrTextDecl(scanningTextDecl, fStrings); @@ -1274,15 +1282,16 @@ public class XMLEncodingDetector { // set encoding on reader if (encodingPseudoAttr != null) { isEncodingSetInProlog = true; - encoding = encodingPseudoAttr; + encoding = encodingPseudoAttr; } } // Adapted from: // org.apache.xerces.impl.XMLScanner.scanXMLDeclOrTextDecl - /** + + /* * Scans an XML or text declaration. - * <p> + * <p/> * <pre> * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>' * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ") @@ -1294,19 +1303,20 @@ public class XMLEncodingDetector { * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>' * </pre> * - * @param scanningTextDecl True if a text declaration is to - * be scanned instead of an XML - * declaration. + * @param scanningTextDecl True if a text declaration is to + * be scanned instead of an XML + * declaration. * @param pseudoAttributeValues An array of size 3 to return the version, - * encoding and standalone pseudo attribute values - * (in that order). - * - * <strong>Note:</strong> This method uses fString, anything in it - * at the time of calling is lost. + * encoding and standalone pseudo attribute values + * (in that order). + * <p/> + * <p> + * <strong>Note:</strong> This method uses fString, anything in it + * at the time of calling is lost.</p> */ private void scanXMLDeclOrTextDecl(boolean scanningTextDecl, - String[] pseudoAttributeValues) - throws IOException, JasperException { + String[] pseudoAttributeValues) + throws IOException, JasperException { // pseudo-attribute values String version = null; @@ -1330,18 +1340,18 @@ public class XMLEncodingDetector { if (name == fVersionSymbol) { if (!sawSpace) { reportFatalError(scanningTextDecl - ? "jsp.error.xml.spaceRequiredBeforeVersionInTextDecl" - : "jsp.error.xml.spaceRequiredBeforeVersionInXMLDecl", - null); + ? "jsp.error.xml.spaceRequiredBeforeVersionInTextDecl" + : "jsp.error.xml.spaceRequiredBeforeVersionInXMLDecl", + null); } version = fString.toString(); state = STATE_ENCODING; if (!version.equals("1.0")) { // REVISIT: XML REC says we should throw an error - // in such cases. + // in such cases. // some may object the throwing of fatalError. err.jspError("jsp.error.xml.versionNotSupported", - version); + version); } } else if (name == fEncodingSymbol) { if (!scanningTextDecl) { @@ -1349,17 +1359,16 @@ public class XMLEncodingDetector { } if (!sawSpace) { reportFatalError(scanningTextDecl - ? "jsp.error.xml.spaceRequiredBeforeEncodingInTextDecl" - : "jsp.error.xml.spaceRequiredBeforeEncodingInXMLDecl", - null); + ? "jsp.error.xml.spaceRequiredBeforeEncodingInTextDecl" + : "jsp.error.xml.spaceRequiredBeforeEncodingInXMLDecl", + null); } encoding = fString.toString(); state = scanningTextDecl ? STATE_DONE : STATE_STANDALONE; } else { if (scanningTextDecl) { err.jspError("jsp.error.xml.encodingDeclRequired"); - } - else { + } else { err.jspError("jsp.error.xml.versionInfoRequired"); } } @@ -1369,9 +1378,9 @@ public class XMLEncodingDetector { if (name == fEncodingSymbol) { if (!sawSpace) { reportFatalError(scanningTextDecl - ? "jsp.error.xml.spaceRequiredBeforeEncodingInTextDecl" - : "jsp.error.xml.spaceRequiredBeforeEncodingInXMLDecl", - null); + ? "jsp.error.xml.spaceRequiredBeforeEncodingInTextDecl" + : "jsp.error.xml.spaceRequiredBeforeEncodingInXMLDecl", + null); } encoding = fString.toString(); state = scanningTextDecl ? STATE_DONE : STATE_STANDALONE; @@ -1402,7 +1411,7 @@ public class XMLEncodingDetector { err.jspError("jsp.error.xml.sdDeclInvalid"); } } else { - err.jspError("jsp.error.xml.encodingDeclRequired"); + err.jspError("jsp.error.xml.encodingDeclRequired"); } break; } @@ -1416,9 +1425,9 @@ public class XMLEncodingDetector { if (scanningTextDecl && state != STATE_DONE) { err.jspError("jsp.error.xml.morePseudoAttributes"); } - + // If there is no data in the xml or text decl then we fail to report - // error for version or encoding info above. + // error for version or encoding info above. if (scanningTextDecl) { if (!dataFoundForTarget && encoding == null) { err.jspError("jsp.error.xml.encodingDeclRequired"); @@ -1437,7 +1446,7 @@ public class XMLEncodingDetector { err.jspError("jsp.error.xml.xmlDeclUnterminated"); } - + // fill in return array pseudoAttributeValues[0] = version; pseudoAttributeValues[1] = encoding; @@ -1446,24 +1455,28 @@ public class XMLEncodingDetector { // Adapted from: // org.apache.xerces.impl.XMLScanner.scanPseudoAttribute + /** * Scans a pseudo attribute. * * @param scanningTextDecl True if scanning this pseudo-attribute for a - * TextDecl; false if scanning XMLDecl. This + * TextDecl; false if scanning XMLDecl. This * flag is needed to report the correct type of * error. - * @param value The string to fill in with the attribute + * @param value The string to fill in with the attribute * value. - * * @return The name of the attribute * + * <p> * <strong>Note:</strong> This method uses fStringBuffer2, anything in it - * at the time of calling is lost. + * at the time of calling is lost.</p> + * + * @throws IOException in case of IO errors + * @throws JasperException in case of Jasper errors */ - public String scanPseudoAttribute(boolean scanningTextDecl, - XMLString value) - throws IOException, JasperException { + public String scanPseudoAttribute(boolean scanningTextDecl, + XMLString value) + throws IOException, JasperException { String name = scanName(); if (name == null) { @@ -1472,17 +1485,17 @@ public class XMLEncodingDetector { skipSpaces(); if (!skipChar('=')) { reportFatalError(scanningTextDecl ? - "jsp.error.xml.eqRequiredInTextDecl" - : "jsp.error.xml.eqRequiredInXMLDecl", - name); + "jsp.error.xml.eqRequiredInTextDecl" + : "jsp.error.xml.eqRequiredInXMLDecl", + name); } skipSpaces(); int quote = peekChar(); if (quote != '\'' && quote != '"') { reportFatalError(scanningTextDecl ? - "jsp.error.xml.quoteRequiredInTextDecl" - : "jsp.error.xml.quoteRequiredInXMLDecl" , - name); + "jsp.error.xml.quoteRequiredInTextDecl" + : "jsp.error.xml.quoteRequiredInXMLDecl", + name); } scanChar(); int c = scanLiteral(quote, value); @@ -1492,15 +1505,13 @@ public class XMLEncodingDetector { fStringBuffer2.append(value); if (c != -1) { if (c == '&' || c == '%' || c == '<' || c == ']') { - fStringBuffer2.append((char)scanChar()); - } - else if (XMLChar.isHighSurrogate(c)) { + fStringBuffer2.append((char) scanChar()); + } else if (XMLChar.isHighSurrogate(c)) { scanSurrogates(fStringBuffer2); - } - else if (XMLChar.isInvalid(c)) { + } else if (XMLChar.isInvalid(c)) { String key = scanningTextDecl - ? "jsp.error.xml.invalidCharInTextDecl" - : "jsp.error.xml.invalidCharInXMLDecl"; + ? "jsp.error.xml.invalidCharInTextDecl" + : "jsp.error.xml.invalidCharInXMLDecl"; reportFatalError(key, Integer.toString(c, 16)); scanChar(); } @@ -1512,31 +1523,36 @@ public class XMLEncodingDetector { } if (!skipChar(quote)) { reportFatalError(scanningTextDecl ? - "jsp.error.xml.closeQuoteMissingInTextDecl" - : "jsp.error.xml.closeQuoteMissingInXMLDecl", - name); + "jsp.error.xml.closeQuoteMissingInTextDecl" + : "jsp.error.xml.closeQuoteMissingInXMLDecl", + name); } // return return name; } - + // Adapted from: // org.apache.xerces.impl.XMLScanner.scanPIData - /** + + /* + * <p> * Scans a processing data. This is needed to handle the situation - * where a document starts with a processing instruction whose + * where a document starts with a processing instruction whose * target name <em>starts with</em> "xml". (e.g. xmlfoo) - * + * <p> + * <p/> + * <p> * <strong>Note:</strong> This method uses fStringBuffer, anything in it * at the time of calling is lost. + * </p> * * @param target The PI target - * @param data The string to fill in with the data + * @param data The string to fill in with the data */ - private void scanPIData(String target, XMLString data) - throws IOException, JasperException { + private void scanPIData(String target, XMLString data) + throws IOException, JasperException { // check target if (target.length() == 3) { @@ -1554,8 +1570,7 @@ public class XMLEncodingDetector { // we found the end, there is no data data.clear(); return; - } - else { + } else { // if there is data there should be some space err.jspError("jsp.error.xml.spaceRequiredInPI"); } @@ -1571,7 +1586,7 @@ public class XMLEncodingDetector { scanSurrogates(fStringBuffer); } else if (XMLChar.isInvalid(c)) { err.jspError("jsp.error.xml.invalidCharInPI", - Integer.toHexString(c)); + Integer.toHexString(c)); scanChar(); } } @@ -1583,40 +1598,42 @@ public class XMLEncodingDetector { // Adapted from: // org.apache.xerces.impl.XMLScanner.scanSurrogates - /** + + /* * Scans surrogates and append them to the specified buffer. * <p> * <strong>Note:</strong> This assumes the current char has already been * identified as a high surrogate. + * </p> * * @param buf The StringBuffer to append the read surrogates to. * @returns True if it succeeded. */ private boolean scanSurrogates(XMLStringBuffer buf) - throws IOException, JasperException { + throws IOException, JasperException { int high = scanChar(); int low = peekChar(); if (!XMLChar.isLowSurrogate(low)) { err.jspError("jsp.error.xml.invalidCharInContent", - Integer.toString(high, 16)); + Integer.toString(high, 16)); return false; } scanChar(); // convert surrogates to supplemental character - int c = XMLChar.supplemental((char)high, (char)low); + int c = XMLChar.supplemental((char) high, (char) low); // supplemental character must be a valid XML character if (!XMLChar.isValid(c)) { err.jspError("jsp.error.xml.invalidCharInContent", - Integer.toString(c, 16)); + Integer.toString(c, 16)); return false; } // fill in the buffer - buf.append((char)high); - buf.append((char)low); + buf.append((char) high); + buf.append((char) low); return true; @@ -1624,11 +1641,15 @@ public class XMLEncodingDetector { // Adapted from: // org.apache.xerces.impl.XMLScanner.reportFatalError - /** + + /* * Convenience function used in all XML scanners. + * + * @param msgId message ID + * @param arg argument + * @throws JasperException in case of Jasper errors */ - private void reportFatalError(String msgId, String arg) - throws JasperException { + private void reportFatalError(String msgId, String arg) throws JasperException { err.jspError(msgId, arg); }
http://git-wip-us.apache.org/repos/asf/struts/blob/775c82a7/plugins/embeddedjsp/src/main/java/org/apache/struts2/jasper/xmlparser/XMLString.java ---------------------------------------------------------------------- diff --git a/plugins/embeddedjsp/src/main/java/org/apache/struts2/jasper/xmlparser/XMLString.java b/plugins/embeddedjsp/src/main/java/org/apache/struts2/jasper/xmlparser/XMLString.java index 0fab239..46195c9 100644 --- a/plugins/embeddedjsp/src/main/java/org/apache/struts2/jasper/xmlparser/XMLString.java +++ b/plugins/embeddedjsp/src/main/java/org/apache/struts2/jasper/xmlparser/XMLString.java @@ -26,15 +26,20 @@ package org.apache.struts2.jasper.xmlparser; /** + * <p> * This class is used as a structure to pass text contained in the underlying * character buffer of the scanner. The offset and length fields allow the * buffer to be re-used without creating new character arrays. + * </p> + * * <p> * <strong>Note:</strong> Methods that are passed an XMLString structure * should consider the contents read-only and not make any modifications * to the contents of the buffer. The method receiving this structure * should also not modify the offset and length if this structure (or * the values of this structure) are passed to another method. + * </p> + * * <p> * <strong>Note:</strong> Methods that are passed an XMLString structure * are required to copy the information out of the buffer if it is to be @@ -43,6 +48,7 @@ package org.apache.struts2.jasper.xmlparser; * be assured once the method that is passed this structure returns. * Therefore, methods passed this structure should not save any reference * to the structure or the character array contained in the structure. + * </p> * * @author Eric Ye, IBM * @author Andy Clark, IBM @@ -121,8 +127,9 @@ public class XMLString { * <p> * <strong>Note:</strong> This does not copy the character array; * only the reference to the array is copied. - * - * @param s + * </p> + * + * @param s xml string */ public void setValues(XMLString s) { setValues(s.ch, s.offset, s.length); @@ -136,7 +143,7 @@ public class XMLString { } // clear() /** - * Returns true if the contents of this XMLString structure and + * @return true if the contents of this XMLString structure and * the specified array are equal. * * @param ch The character array. @@ -160,7 +167,7 @@ public class XMLString { } // equals(char[],int,int):boolean /** - * Returns true if the contents of this XMLString structure and + * @return true if the contents of this XMLString structure and * the specified string are equal. * * @param s The string to compare. @@ -189,7 +196,7 @@ public class XMLString { // Object methods // - /** Returns a string representation of this object. */ + /** @return a string representation of this object. */ public String toString() { return length > 0 ? new String(ch, offset, length) : ""; } // toString():String http://git-wip-us.apache.org/repos/asf/struts/blob/775c82a7/plugins/embeddedjsp/src/main/java/org/apache/struts2/jasper/xmlparser/XMLStringBuffer.java ---------------------------------------------------------------------- diff --git a/plugins/embeddedjsp/src/main/java/org/apache/struts2/jasper/xmlparser/XMLStringBuffer.java b/plugins/embeddedjsp/src/main/java/org/apache/struts2/jasper/xmlparser/XMLStringBuffer.java index d16c967..1103a97 100644 --- a/plugins/embeddedjsp/src/main/java/org/apache/struts2/jasper/xmlparser/XMLStringBuffer.java +++ b/plugins/embeddedjsp/src/main/java/org/apache/struts2/jasper/xmlparser/XMLStringBuffer.java @@ -26,34 +26,41 @@ package org.apache.struts2.jasper.xmlparser; /** + * <p> * XMLString is a structure used to pass character arrays. However, * XMLStringBuffer is a buffer in which characters can be appended * and extends XMLString so that it can be passed to methods * expecting an XMLString object. This is a safe operation because * it is assumed that any callee will <strong>not</strong> modify * the contents of the XMLString structure. - * <p> + * </p> + * + * <p> * The contents of the string are managed by the string buffer. As * characters are appended, the string buffer will grow as needed. + * </p> + * * <p> - * <strong>Note:</strong> Never set the <code>ch</code>, + * <strong>Note:</strong> Never set the <code>ch</code>, * <code>offset</code>, and <code>length</code> fields directly. * These fields are managed by the string buffer. In order to reset * the buffer, call <code>clear()</code>. - * + * </p> + * * @author Andy Clark, IBM * @author Eric Ye, IBM - * * @version $Id: XMLStringBuffer.java 467222 2006-10-24 03:17:11Z markt $ */ public class XMLStringBuffer - extends XMLString { + extends XMLString { // // Constants // - /** Default buffer size (32). */ + /** + * Default buffer size (32). + */ public static final int DEFAULT_SIZE = 32; // @@ -61,40 +68,56 @@ public class XMLStringBuffer // /** - * + * */ public XMLStringBuffer() { this(DEFAULT_SIZE); } // <init>() /** - * - * - * @param size + * @param size size */ public XMLStringBuffer(int size) { ch = new char[size]; } // <init>(int) - /** Constructs a string buffer from a char. */ + /** + * Constructs a string buffer from a char. + * + * @param c character + */ public XMLStringBuffer(char c) { this(1); append(c); } // <init>(char) - /** Constructs a string buffer from a String. */ + /** + * Constructs a string buffer from a String. + * + * @param s string + */ public XMLStringBuffer(String s) { this(s.length()); append(s); } // <init>(String) - /** Constructs a string buffer from the specified character array. */ + /** + * Constructs a string buffer from the specified character array. + * + * @param ch char array + * @param offset offset + * @param length length + */ public XMLStringBuffer(char[] ch, int offset, int length) { this(length); append(ch, offset, length); } // <init>(char[],int,int) - /** Constructs a string buffer from the specified XMLString. */ + /** + * Constructs a string buffer from the specified XMLString. + * + * @param s xml string + */ public XMLStringBuffer(XMLString s) { this(s.length); append(s); @@ -104,7 +127,9 @@ public class XMLStringBuffer // Public methods // - /** Clears the string buffer. */ + /** + * Clears the string buffer. + */ public void clear() { offset = 0; length = 0; @@ -112,17 +137,17 @@ public class XMLStringBuffer /** * append - * - * @param c + * + * @param c char */ public void append(char c) { if (this.length + 1 > this.ch.length) { - int newLength = this.ch.length*2; - if (newLength < this.ch.length + DEFAULT_SIZE) - newLength = this.ch.length + DEFAULT_SIZE; - char[] newch = new char[newLength]; - System.arraycopy(this.ch, 0, newch, 0, this.length); - this.ch = newch; + int newLength = this.ch.length * 2; + if (newLength < this.ch.length + DEFAULT_SIZE) + newLength = this.ch.length + DEFAULT_SIZE; + char[] newch = new char[newLength]; + System.arraycopy(this.ch, 0, newch, 0, this.length); + this.ch = newch; } this.ch[this.length] = c; this.length++; @@ -130,16 +155,16 @@ public class XMLStringBuffer /** * append - * - * @param s + * + * @param s string */ public void append(String s) { int length = s.length(); if (this.length + length > this.ch.length) { - int newLength = this.ch.length*2; + int newLength = this.ch.length * 2; if (newLength < this.length + length + DEFAULT_SIZE) newLength = this.ch.length + length + DEFAULT_SIZE; - char[] newch = new char[newLength]; + char[] newch = new char[newLength]; System.arraycopy(this.ch, 0, newch, 0, this.length); this.ch = newch; } @@ -149,10 +174,10 @@ public class XMLStringBuffer /** * append - * - * @param ch - * @param offset - * @param length + * + * @param ch char arry + * @param offset offset + * @param length length */ public void append(char[] ch, int offset, int length) { if (this.length + length > this.ch.length) { @@ -166,8 +191,8 @@ public class XMLStringBuffer /** * append - * - * @param s + * + * @param s xml string */ public void append(XMLString s) { append(s.ch, s.offset, s.length);