WICKET-5416 BOM in UTF markup file breaks encoding detection Copy BOMInputStream, ByteOrderMark and ProxyInputStream from commons-io 2.4 (http://svn.apache.org/viewvc/commons/proper/io/trunk/src/main/java/org/apache/commons/io/input/)
Project: http://git-wip-us.apache.org/repos/asf/wicket/repo Commit: http://git-wip-us.apache.org/repos/asf/wicket/commit/4bb19a48 Tree: http://git-wip-us.apache.org/repos/asf/wicket/tree/4bb19a48 Diff: http://git-wip-us.apache.org/repos/asf/wicket/diff/4bb19a48 Branch: refs/heads/wicket-cdi-1.1-rewrite Commit: 4bb19a48ff5df89d03da53e87efba379ff3c54d3 Parents: 724e9f7 Author: Martin Tzvetanov Grigorov <[email protected]> Authored: Fri Nov 15 16:08:03 2013 +0200 Committer: Martin Tzvetanov Grigorov <[email protected]> Committed: Fri Nov 15 16:08:03 2013 +0200 ---------------------------------------------------------------------- .../apache/wicket/util/io/BOMInputStream.java | 404 +++++++++++++++++++ .../apache/wicket/util/io/ByteOrderMark.java | 191 +++++++++ .../apache/wicket/util/io/ProxyInputStream.java | 236 +++++++++++ .../org/apache/wicket/util/io/XmlReader.java | 4 +- .../apache/wicket/util/io/XmlReaderTest.java | 19 +- .../java/org/apache/wicket/util/io/test_8.html | 2 + 6 files changed, 853 insertions(+), 3 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/wicket/blob/4bb19a48/wicket-util/src/main/java/org/apache/wicket/util/io/BOMInputStream.java ---------------------------------------------------------------------- diff --git a/wicket-util/src/main/java/org/apache/wicket/util/io/BOMInputStream.java b/wicket-util/src/main/java/org/apache/wicket/util/io/BOMInputStream.java new file mode 100644 index 0000000..b870278 --- /dev/null +++ b/wicket-util/src/main/java/org/apache/wicket/util/io/BOMInputStream.java @@ -0,0 +1,404 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.wicket.util.io; + +import java.io.IOException; +import java.io.InputStream; +import java.util.Arrays; +import java.util.Comparator; +import java.util.List; + +/** + * This class is used to wrap a stream that includes an encoded {@link ByteOrderMark} as its first bytes. + * + * This class detects these bytes and, if required, can automatically skip them and return the subsequent byte as the + * first byte in the stream. + * + * The {@link ByteOrderMark} implementation has the following pre-defined BOMs: + * <ul> + * <li>UTF-8 - {@link ByteOrderMark#UTF_8}</li> + * <li>UTF-16BE - {@link ByteOrderMark#UTF_16LE}</li> + * <li>UTF-16LE - {@link ByteOrderMark#UTF_16BE}</li> + * <li>UTF-32BE - {@link ByteOrderMark#UTF_32LE}</li> + * <li>UTF-32LE - {@link ByteOrderMark#UTF_32BE}</li> + * </ul> + * + * + * <h3>Example 1 - Detect and exclude a UTF-8 BOM</h3> + * + * <pre> + * BOMInputStream bomIn = new BOMInputStream(in); + * if (bomIn.hasBOM()) { + * // has a UTF-8 BOM + * } + * </pre> + * + * <h3>Example 2 - Detect a UTF-8 BOM (but don't exclude it)</h3> + * + * <pre> + * boolean include = true; + * BOMInputStream bomIn = new BOMInputStream(in, include); + * if (bomIn.hasBOM()) { + * // has a UTF-8 BOM + * } + * </pre> + * + * <h3>Example 3 - Detect Multiple BOMs</h3> + * + * <pre> + * BOMInputStream bomIn = new BOMInputStream(in, + * ByteOrderMark.UTF_16LE, ByteOrderMark.UTF_16BE, + * ByteOrderMark.UTF_32LE, ByteOrderMark.UTF_32BE + * ); + * if (bomIn.hasBOM() == false) { + * // No BOM found + * } else if (bomIn.hasBOM(ByteOrderMark.UTF_16LE)) { + * // has a UTF-16LE BOM + * } else if (bomIn.hasBOM(ByteOrderMark.UTF_16BE)) { + * // has a UTF-16BE BOM + * } else if (bomIn.hasBOM(ByteOrderMark.UTF_32LE)) { + * // has a UTF-32LE BOM + * } else if (bomIn.hasBOM(ByteOrderMark.UTF_32BE)) { + * // has a UTF-32BE BOM + * } + * </pre> + * + * @see ByteOrderMark + * @see <a href="http://en.wikipedia.org/wiki/Byte_order_mark">Wikipedia - Byte Order Mark</a> + * @version $Id$ + * @since 2.0 + */ +public class BOMInputStream extends ProxyInputStream +{ + private final boolean include; + /** + * BOMs are sorted from longest to shortest. + */ + private final List<ByteOrderMark> boms; + private ByteOrderMark byteOrderMark; + private int[] firstBytes; + private int fbLength; + private int fbIndex; + private int markFbIndex; + private boolean markedAtStart; + + /** + * Constructs a new BOM InputStream that excludes a {@link ByteOrderMark#UTF_8} BOM. + * + * @param delegate + * the InputStream to delegate to + */ + public BOMInputStream(final InputStream delegate) { + this(delegate, false, ByteOrderMark.UTF_8); + } + + /** + * Constructs a new BOM InputStream that detects a a {@link ByteOrderMark#UTF_8} and optionally includes it. + * + * @param delegate + * the InputStream to delegate to + * @param include + * true to include the UTF-8 BOM or false to exclude it + */ + public BOMInputStream(final InputStream delegate, final boolean include) { + this(delegate, include, ByteOrderMark.UTF_8); + } + + /** + * Constructs a new BOM InputStream that excludes the specified BOMs. + * + * @param delegate + * the InputStream to delegate to + * @param boms + * The BOMs to detect and exclude + */ + public BOMInputStream(final InputStream delegate, final ByteOrderMark... boms) { + this(delegate, false, boms); + } + + /** + * Compares ByteOrderMark objects in descending length order. + */ + private static final Comparator<ByteOrderMark> ByteOrderMarkLengthComparator = new Comparator<ByteOrderMark>() { + + public int compare(final ByteOrderMark bom1, final ByteOrderMark bom2) { + final int len1 = bom1.length(); + final int len2 = bom2.length(); + if (len1 > len2) { + return -1; + } + if (len2 > len1) { + return 1; + } + return 0; + } + }; + + /** + * Constructs a new BOM InputStream that detects the specified BOMs and optionally includes them. + * + * @param delegate + * the InputStream to delegate to + * @param include + * true to include the specified BOMs or false to exclude them + * @param boms + * The BOMs to detect and optionally exclude + */ + public BOMInputStream(final InputStream delegate, final boolean include, final ByteOrderMark... boms) { + super(delegate); + if (boms == null || boms.length == 0) { + throw new IllegalArgumentException("No BOMs specified"); + } + this.include = include; + // Sort the BOMs to match the longest BOM first because some BOMs have the same starting two bytes. + Arrays.sort(boms, ByteOrderMarkLengthComparator); + this.boms = Arrays.asList(boms); + + } + + /** + * Indicates whether the stream contains one of the specified BOMs. + * + * @return true if the stream has one of the specified BOMs, otherwise false if it does not + * @throws IOException + * if an error reading the first bytes of the stream occurs + */ + public boolean hasBOM() throws IOException { + return getBOM() != null; + } + + /** + * Indicates whether the stream contains the specified BOM. + * + * @param bom + * The BOM to check for + * @return true if the stream has the specified BOM, otherwise false if it does not + * @throws IllegalArgumentException + * if the BOM is not one the stream is configured to detect + * @throws IOException + * if an error reading the first bytes of the stream occurs + */ + public boolean hasBOM(final ByteOrderMark bom) throws IOException { + if (!boms.contains(bom)) { + throw new IllegalArgumentException("Stream not configure to detect " + bom); + } + return byteOrderMark != null && getBOM().equals(bom); + } + + /** + * Return the BOM (Byte Order Mark). + * + * @return The BOM or null if none + * @throws IOException + * if an error reading the first bytes of the stream occurs + */ + public ByteOrderMark getBOM() throws IOException { + if (firstBytes == null) { + fbLength = 0; + // BOMs are sorted from longest to shortest + final int maxBomSize = boms.get(0).length(); + firstBytes = new int[maxBomSize]; + // Read first maxBomSize bytes + for (int i = 0; i < firstBytes.length; i++) { + firstBytes[i] = in.read(); + fbLength++; + if (firstBytes[i] < 0) { + break; + } + } + // match BOM in firstBytes + byteOrderMark = find(); + if (byteOrderMark != null) { + if (!include) { + if (byteOrderMark.length() < firstBytes.length) { + fbIndex = byteOrderMark.length(); + } else { + fbLength = 0; + } + } + } + } + return byteOrderMark; + } + + /** + * Return the BOM charset Name - {@link ByteOrderMark#getCharsetName()}. + * + * @return The BOM charset Name or null if no BOM found + * @throws IOException + * if an error reading the first bytes of the stream occurs + * + */ + public String getBOMCharsetName() throws IOException { + getBOM(); + return byteOrderMark == null ? null : byteOrderMark.getCharsetName(); + } + + /** + * This method reads and either preserves or skips the first bytes in the stream. It behaves like the single-byte + * <code>read()</code> method, either returning a valid byte or -1 to indicate that the initial bytes have been + * processed already. + * + * @return the byte read (excluding BOM) or -1 if the end of stream + * @throws IOException + * if an I/O error occurs + */ + private int readFirstBytes() throws IOException { + getBOM(); + return fbIndex < fbLength ? firstBytes[fbIndex++] : -1; + } + + /** + * Find a BOM with the specified bytes. + * + * @return The matched BOM or null if none matched + */ + private ByteOrderMark find() { + for (final ByteOrderMark bom : boms) { + if (matches(bom)) { + return bom; + } + } + return null; + } + + /** + * Check if the bytes match a BOM. + * + * @param bom + * The BOM + * @return true if the bytes match the bom, otherwise false + */ + private boolean matches(final ByteOrderMark bom) { + // if (bom.length() != fbLength) { + // return false; + // } + // firstBytes may be bigger than the BOM bytes + for (int i = 0; i < bom.length(); i++) { + if (bom.get(i) != firstBytes[i]) { + return false; + } + } + return true; + } + + // ---------------------------------------------------------------------------- + // Implementation of InputStream + // ---------------------------------------------------------------------------- + + /** + * Invokes the delegate's <code>read()</code> method, detecting and optionally skipping BOM. + * + * @return the byte read (excluding BOM) or -1 if the end of stream + * @throws IOException + * if an I/O error occurs + */ + @Override + public int read() throws IOException { + final int b = readFirstBytes(); + return b >= 0 ? b : in.read(); + } + + /** + * Invokes the delegate's <code>read(byte[], int, int)</code> method, detecting and optionally skipping BOM. + * + * @param buf + * the buffer to read the bytes into + * @param off + * The start offset + * @param len + * The number of bytes to read (excluding BOM) + * @return the number of bytes read or -1 if the end of stream + * @throws IOException + * if an I/O error occurs + */ + @Override + public int read(final byte[] buf, int off, int len) throws IOException { + int firstCount = 0; + int b = 0; + while (len > 0 && b >= 0) { + b = readFirstBytes(); + if (b >= 0) { + buf[off++] = (byte) (b & 0xFF); + len--; + firstCount++; + } + } + final int secondCount = in.read(buf, off, len); + return secondCount < 0 ? firstCount > 0 ? firstCount : -1 : firstCount + secondCount; + } + + /** + * Invokes the delegate's <code>read(byte[])</code> method, detecting and optionally skipping BOM. + * + * @param buf + * the buffer to read the bytes into + * @return the number of bytes read (excluding BOM) or -1 if the end of stream + * @throws IOException + * if an I/O error occurs + */ + @Override + public int read(final byte[] buf) throws IOException { + return read(buf, 0, buf.length); + } + + /** + * Invokes the delegate's <code>mark(int)</code> method. + * + * @param readlimit + * read ahead limit + */ + @Override + public synchronized void mark(final int readlimit) { + markFbIndex = fbIndex; + markedAtStart = firstBytes == null; + in.mark(readlimit); + } + + /** + * Invokes the delegate's <code>reset()</code> method. + * + * @throws IOException + * if an I/O error occurs + */ + @Override + public synchronized void reset() throws IOException { + fbIndex = markFbIndex; + if (markedAtStart) { + firstBytes = null; + } + + in.reset(); + } + + /** + * Invokes the delegate's <code>skip(long)</code> method, detecting and optionallyskipping BOM. + * + * @param n + * the number of bytes to skip + * @return the number of bytes to skipped or -1 if the end of stream + * @throws IOException + * if an I/O error occurs + */ + @Override + public long skip(long n) throws IOException { + while (n > 0 && readFirstBytes() >= 0) { + n--; + } + return in.skip(n); + } +} http://git-wip-us.apache.org/repos/asf/wicket/blob/4bb19a48/wicket-util/src/main/java/org/apache/wicket/util/io/ByteOrderMark.java ---------------------------------------------------------------------- diff --git a/wicket-util/src/main/java/org/apache/wicket/util/io/ByteOrderMark.java b/wicket-util/src/main/java/org/apache/wicket/util/io/ByteOrderMark.java new file mode 100644 index 0000000..e1ee046 --- /dev/null +++ b/wicket-util/src/main/java/org/apache/wicket/util/io/ByteOrderMark.java @@ -0,0 +1,191 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.wicket.util.io; + +import java.io.Serializable; + +/** + * Byte Order Mark (BOM) representation - see {@link BOMInputStream}. + * + * @see BOMInputStream + * @see <a href="http://en.wikipedia.org/wiki/Byte_order_mark">Wikipedia: Byte Order Mark</a> + * @see <a href="http://www.w3.org/TR/2006/REC-xml-20060816/#sec-guessing">W3C: Autodetection of Character Encodings + * (Non-Normative)</a> + * @version $Id$ + * @since 2.0 + */ +public class ByteOrderMark implements Serializable { + + private static final long serialVersionUID = 1L; + + /** UTF-8 BOM */ + public static final ByteOrderMark UTF_8 = new ByteOrderMark("UTF-8", 0xEF, 0xBB, 0xBF); + + /** UTF-16BE BOM (Big-Endian) */ + public static final ByteOrderMark UTF_16BE = new ByteOrderMark("UTF-16BE", 0xFE, 0xFF); + + /** UTF-16LE BOM (Little-Endian) */ + public static final ByteOrderMark UTF_16LE = new ByteOrderMark("UTF-16LE", 0xFF, 0xFE); + + /** + * UTF-32BE BOM (Big-Endian) + * @since 2.2 + */ + public static final ByteOrderMark UTF_32BE = new ByteOrderMark("UTF-32BE", 0x00, 0x00, 0xFE, 0xFF); + + /** + * UTF-32LE BOM (Little-Endian) + * @since 2.2 + */ + public static final ByteOrderMark UTF_32LE = new ByteOrderMark("UTF-32LE", 0xFF, 0xFE, 0x00, 0x00); + + /** + * Unicode BOM character; external form depends on the encoding. + * @see <a href="http://unicode.org/faq/utf_bom.html#BOM">Byte Order Mark (BOM) FAQ</a> + * @since 2.5 + */ + public static final char UTF_BOM = '\uFEFF'; + + private final String charsetName; + private final int[] bytes; + + /** + * Construct a new BOM. + * + * @param charsetName The name of the charset the BOM represents + * @param bytes The BOM's bytes + * @throws IllegalArgumentException if the charsetName is null or + * zero length + * @throws IllegalArgumentException if the bytes are null or zero + * length + */ + public ByteOrderMark(final String charsetName, final int... bytes) { + if (charsetName == null || charsetName.length() == 0) { + throw new IllegalArgumentException("No charsetName specified"); + } + if (bytes == null || bytes.length == 0) { + throw new IllegalArgumentException("No bytes specified"); + } + this.charsetName = charsetName; + this.bytes = new int[bytes.length]; + System.arraycopy(bytes, 0, this.bytes, 0, bytes.length); + } + + /** + * Return the name of the {@link java.nio.charset.Charset} the BOM represents. + * + * @return the character set name + */ + public String getCharsetName() { + return charsetName; + } + + /** + * Return the length of the BOM's bytes. + * + * @return the length of the BOM's bytes + */ + public int length() { + return bytes.length; + } + + /** + * The byte at the specified position. + * + * @param pos The position + * @return The specified byte + */ + public int get(final int pos) { + return bytes[pos]; + } + + /** + * Return a copy of the BOM's bytes. + * + * @return a copy of the BOM's bytes + */ + public byte[] getBytes() { + final byte[] copy = new byte[bytes.length]; + for (int i = 0; i < bytes.length; i++) { + copy[i] = (byte)bytes[i]; + } + return copy; + } + + /** + * Indicates if this BOM's bytes equals another. + * + * @param obj The object to compare to + * @return true if the bom's bytes are equal, otherwise + * false + */ + @Override + public boolean equals(final Object obj) { + if (!(obj instanceof ByteOrderMark)) { + return false; + } + final ByteOrderMark bom = (ByteOrderMark)obj; + if (bytes.length != bom.length()) { + return false; + } + for (int i = 0; i < bytes.length; i++) { + if (bytes[i] != bom.get(i)) { + return false; + } + } + return true; + } + + /** + * Return the hashcode for this BOM. + * + * @return the hashcode for this BOM. + * @see java.lang.Object#hashCode() + */ + @Override + public int hashCode() { + int hashCode = getClass().hashCode(); + for (final int b : bytes) { + hashCode += b; + } + return hashCode; + } + + /** + * Provide a String representation of the BOM. + * + * @return the length of the BOM's bytes + */ + @Override + public String toString() { + final StringBuilder builder = new StringBuilder(); + builder.append(getClass().getSimpleName()); + builder.append('['); + builder.append(charsetName); + builder.append(": "); + for (int i = 0; i < bytes.length; i++) { + if (i > 0) { + builder.append(","); + } + builder.append("0x"); + builder.append(Integer.toHexString(0xFF & bytes[i]).toUpperCase()); + } + builder.append(']'); + return builder.toString(); + } + +} http://git-wip-us.apache.org/repos/asf/wicket/blob/4bb19a48/wicket-util/src/main/java/org/apache/wicket/util/io/ProxyInputStream.java ---------------------------------------------------------------------- diff --git a/wicket-util/src/main/java/org/apache/wicket/util/io/ProxyInputStream.java b/wicket-util/src/main/java/org/apache/wicket/util/io/ProxyInputStream.java new file mode 100644 index 0000000..e3d424c --- /dev/null +++ b/wicket-util/src/main/java/org/apache/wicket/util/io/ProxyInputStream.java @@ -0,0 +1,236 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.wicket.util.io; + +import java.io.FilterInputStream; +import java.io.IOException; +import java.io.InputStream; + +/** + * A Proxy stream which acts as expected, that is it passes the method + * calls on to the proxied stream and doesn't change which methods are + * being called. + * <p> + * It is an alternative base class to FilterInputStream + * to increase reusability, because FilterInputStream changes the + * methods being called, such as read(byte[]) to read(byte[], int, int). + * <p> + * See the protected methods for ways in which a subclass can easily decorate + * a stream with custom pre-, post- or error processing functionality. + * + * @version $Id$ + */ +public abstract class ProxyInputStream extends FilterInputStream { + + /** + * Constructs a new ProxyInputStream. + * + * @param proxy the InputStream to delegate to + */ + public ProxyInputStream(final InputStream proxy) { + super(proxy); + // the proxy is stored in a protected superclass variable named 'in' + } + + /** + * Invokes the delegate's <code>read()</code> method. + * @return the byte read or -1 if the end of stream + * @throws IOException if an I/O error occurs + */ + @Override + public int read() throws IOException { + try { + beforeRead(1); + final int b = in.read(); + afterRead(b != -1 ? 1 : -1); + return b; + } catch (final IOException e) { + handleIOException(e); + return -1; + } + } + + /** + * Invokes the delegate's <code>read(byte[])</code> method. + * @param bts the buffer to read the bytes into + * @return the number of bytes read or -1 if the end of stream + * @throws IOException if an I/O error occurs + */ + @Override + public int read(final byte[] bts) throws IOException { + try { + beforeRead(bts != null ? bts.length : 0); + final int n = in.read(bts); + afterRead(n); + return n; + } catch (final IOException e) { + handleIOException(e); + return -1; + } + } + + /** + * Invokes the delegate's <code>read(byte[], int, int)</code> method. + * @param bts the buffer to read the bytes into + * @param off The start offset + * @param len The number of bytes to read + * @return the number of bytes read or -1 if the end of stream + * @throws IOException if an I/O error occurs + */ + @Override + public int read(final byte[] bts, final int off, final int len) throws IOException { + try { + beforeRead(len); + final int n = in.read(bts, off, len); + afterRead(n); + return n; + } catch (final IOException e) { + handleIOException(e); + return -1; + } + } + + /** + * Invokes the delegate's <code>skip(long)</code> method. + * @param ln the number of bytes to skip + * @return the actual number of bytes skipped + * @throws IOException if an I/O error occurs + */ + @Override + public long skip(final long ln) throws IOException { + try { + return in.skip(ln); + } catch (final IOException e) { + handleIOException(e); + return 0; + } + } + + /** + * Invokes the delegate's <code>available()</code> method. + * @return the number of available bytes + * @throws IOException if an I/O error occurs + */ + @Override + public int available() throws IOException { + try { + return super.available(); + } catch (final IOException e) { + handleIOException(e); + return 0; + } + } + + /** + * Invokes the delegate's <code>close()</code> method. + * @throws IOException if an I/O error occurs + */ + @Override + public void close() throws IOException { + try { + in.close(); + } catch (final IOException e) { + handleIOException(e); + } + } + + /** + * Invokes the delegate's <code>mark(int)</code> method. + * @param readlimit read ahead limit + */ + @Override + public synchronized void mark(final int readlimit) { + in.mark(readlimit); + } + + /** + * Invokes the delegate's <code>reset()</code> method. + * @throws IOException if an I/O error occurs + */ + @Override + public synchronized void reset() throws IOException { + try { + in.reset(); + } catch (final IOException e) { + handleIOException(e); + } + } + + /** + * Invokes the delegate's <code>markSupported()</code> method. + * @return true if mark is supported, otherwise false + */ + @Override + public boolean markSupported() { + return in.markSupported(); + } + + /** + * Invoked by the read methods before the call is proxied. The number + * of bytes that the caller wanted to read (1 for the {@link #read()} + * method, buffer length for {@link #read(byte[])}, etc.) is given as + * an argument. + * <p> + * Subclasses can override this method to add common pre-processing + * functionality without having to override all the read methods. + * The default implementation does nothing. + * <p> + * Note this method is <em>not</em> called from {@link #skip(long)} or + * {@link #reset()}. You need to explicitly override those methods if + * you want to add pre-processing steps also to them. + * + * @since 2.0 + * @param n number of bytes that the caller asked to be read + * @throws IOException if the pre-processing fails + */ + protected void beforeRead(final int n) throws IOException { + } + + /** + * Invoked by the read methods after the proxied call has returned + * successfully. The number of bytes returned to the caller (or -1 if + * the end of stream was reached) is given as an argument. + * <p> + * Subclasses can override this method to add common post-processing + * functionality without having to override all the read methods. + * The default implementation does nothing. + * <p> + * Note this method is <em>not</em> called from {@link #skip(long)} or + * {@link #reset()}. You need to explicitly override those methods if + * you want to add post-processing steps also to them. + * + * @since 2.0 + * @param n number of bytes read, or -1 if the end of stream was reached + * @throws IOException if the post-processing fails + */ + protected void afterRead(final int n) throws IOException { + } + + /** + * Handle any IOExceptions thrown. + * <p> + * This method provides a point to implement custom exception + * handling. The default behaviour is to re-throw the exception. + * @param e The IOException thrown + * @throws IOException if an I/O error occurs + * @since 2.0 + */ + protected void handleIOException(final IOException e) throws IOException { + throw e; + } + +} http://git-wip-us.apache.org/repos/asf/wicket/blob/4bb19a48/wicket-util/src/main/java/org/apache/wicket/util/io/XmlReader.java ---------------------------------------------------------------------- diff --git a/wicket-util/src/main/java/org/apache/wicket/util/io/XmlReader.java b/wicket-util/src/main/java/org/apache/wicket/util/io/XmlReader.java index ce4c041..1f3a408 100644 --- a/wicket-util/src/main/java/org/apache/wicket/util/io/XmlReader.java +++ b/wicket-util/src/main/java/org/apache/wicket/util/io/XmlReader.java @@ -70,11 +70,11 @@ public final class XmlReader extends Reader if (!inputStream.markSupported()) { - this.inputStream = new BufferedInputStream(inputStream); + this.inputStream = new BufferedInputStream(new BOMInputStream(inputStream)); } else { - this.inputStream = inputStream; + this.inputStream = new BOMInputStream(inputStream); } encoding = defaultEncoding; http://git-wip-us.apache.org/repos/asf/wicket/blob/4bb19a48/wicket-util/src/test/java/org/apache/wicket/util/io/XmlReaderTest.java ---------------------------------------------------------------------- diff --git a/wicket-util/src/test/java/org/apache/wicket/util/io/XmlReaderTest.java b/wicket-util/src/test/java/org/apache/wicket/util/io/XmlReaderTest.java index 8c01fd3..c3cb0c7 100644 --- a/wicket-util/src/test/java/org/apache/wicket/util/io/XmlReaderTest.java +++ b/wicket-util/src/test/java/org/apache/wicket/util/io/XmlReaderTest.java @@ -123,7 +123,7 @@ public class XmlReaderTest extends Assert } /** - * + * * @throws Exception */ @Test @@ -137,4 +137,21 @@ public class XmlReaderTest extends Assert assertEquals("<html>", bufReader.readLine()); assertNull(bufReader.readLine()); } + + /** + * + * @throws Exception + */ + @Test + public void ignoreBOM() throws Exception + { + // test_8.html starts with <U+FEFF> character + XmlReader reader = new XmlReader(this.getClass().getResourceAsStream("test_8.html"), null); + assertEquals("UTF-8", reader.getEncoding()); + + BufferedReader bufReader = new BufferedReader(reader); + assertEquals("", bufReader.readLine().trim()); + assertEquals("<html>", bufReader.readLine()); + assertNull(bufReader.readLine()); + } } http://git-wip-us.apache.org/repos/asf/wicket/blob/4bb19a48/wicket-util/src/test/java/org/apache/wicket/util/io/test_8.html ---------------------------------------------------------------------- diff --git a/wicket-util/src/test/java/org/apache/wicket/util/io/test_8.html b/wicket-util/src/test/java/org/apache/wicket/util/io/test_8.html new file mode 100644 index 0000000..22e1a82 --- /dev/null +++ b/wicket-util/src/test/java/org/apache/wicket/util/io/test_8.html @@ -0,0 +1,2 @@ +<?xml version="1.0" encoding=UTF-8?> +<html>
