Author: markt Date: Tue Mar 5 13:41:19 2013 New Revision: 1452791 URL: http://svn.apache.org/r1452791 Log: Refactor encoding/decoding to use NIO
Modified: tomcat/tc7.0.x/trunk/ (props changed) tomcat/tc7.0.x/trunk/java/org/apache/catalina/connector/CoyoteAdapter.java tomcat/tc7.0.x/trunk/java/org/apache/catalina/connector/InputBuffer.java tomcat/tc7.0.x/trunk/java/org/apache/catalina/connector/OutputBuffer.java tomcat/tc7.0.x/trunk/java/org/apache/catalina/connector/Response.java tomcat/tc7.0.x/trunk/java/org/apache/naming/resources/DirContextURLConnection.java tomcat/tc7.0.x/trunk/java/org/apache/tomcat/util/buf/B2CConverter.java tomcat/tc7.0.x/trunk/java/org/apache/tomcat/util/buf/ByteChunk.java tomcat/tc7.0.x/trunk/java/org/apache/tomcat/util/buf/C2BConverter.java tomcat/tc7.0.x/trunk/java/org/apache/tomcat/util/buf/CharChunk.java tomcat/tc7.0.x/trunk/java/org/apache/tomcat/util/buf/UEncoder.java tomcat/tc7.0.x/trunk/test/org/apache/catalina/websocket/TestWebSocket.java tomcat/tc7.0.x/trunk/test/org/apache/tomcat/util/buf/TestB2CConverter.java tomcat/tc7.0.x/trunk/webapps/docs/changelog.xml Propchange: tomcat/tc7.0.x/trunk/ ------------------------------------------------------------------------------ Merged /tomcat/trunk:r1437743,1437891,1447818,1451408 Modified: tomcat/tc7.0.x/trunk/java/org/apache/catalina/connector/CoyoteAdapter.java URL: http://svn.apache.org/viewvc/tomcat/tc7.0.x/trunk/java/org/apache/catalina/connector/CoyoteAdapter.java?rev=1452791&r1=1452790&r2=1452791&view=diff ============================================================================== --- tomcat/tc7.0.x/trunk/java/org/apache/catalina/connector/CoyoteAdapter.java (original) +++ tomcat/tc7.0.x/trunk/java/org/apache/catalina/connector/CoyoteAdapter.java Tue Mar 5 13:41:19 2013 @@ -992,7 +992,7 @@ public class CoyoteAdapter implements Ad } if (conv != null) { try { - conv.convert(bc, cc, cc.getBuffer().length - cc.getEnd()); + conv.convert(bc, cc); uri.setChars(cc.getBuffer(), cc.getStart(), cc.getLength()); return; Modified: tomcat/tc7.0.x/trunk/java/org/apache/catalina/connector/InputBuffer.java URL: http://svn.apache.org/viewvc/tomcat/tc7.0.x/trunk/java/org/apache/catalina/connector/InputBuffer.java?rev=1452791&r1=1452790&r2=1452791&view=diff ============================================================================== --- tomcat/tc7.0.x/trunk/java/org/apache/catalina/connector/InputBuffer.java (original) +++ tomcat/tc7.0.x/trunk/java/org/apache/catalina/connector/InputBuffer.java Tue Mar 5 13:41:19 2013 @@ -357,14 +357,19 @@ public class InputBuffer extends Reader if (markPos == -1) { cb.setOffset(0); cb.setEnd(0); + } else { + // Make sure there's enough space in the worst case + cb.makeSpace(bb.getLength()); + if ((cb.getBuffer().length - cb.getEnd()) == 0) { + // We went over the limit + cb.setOffset(0); + cb.setEnd(0); + markPos = -1; + } } - int limit = bb.getLength()+cb.getStart(); - if ( cb.getLimit() < limit ) { - cb.setLimit(limit); - } + state = CHAR_STATE; - conv.convert(bb, cb, bb.getLength()); - bb.setOffset(bb.getEnd()); + conv.convert(bb, cb); return cb.getLength(); Modified: tomcat/tc7.0.x/trunk/java/org/apache/catalina/connector/OutputBuffer.java URL: http://svn.apache.org/viewvc/tomcat/tc7.0.x/trunk/java/org/apache/catalina/connector/OutputBuffer.java?rev=1452791&r1=1452790&r2=1452791&view=diff ============================================================================== --- tomcat/tc7.0.x/trunk/java/org/apache/catalina/connector/OutputBuffer.java (original) +++ tomcat/tc7.0.x/trunk/java/org/apache/catalina/connector/OutputBuffer.java Tue Mar 5 13:41:19 2013 @@ -29,6 +29,7 @@ import org.apache.coyote.ActionCode; import org.apache.coyote.Response; import org.apache.tomcat.util.buf.ByteChunk; import org.apache.tomcat.util.buf.C2BConverter; +import org.apache.tomcat.util.buf.CharChunk; /** @@ -40,7 +41,7 @@ import org.apache.tomcat.util.buf.C2BCon * @author Remy Maucherat */ public class OutputBuffer extends Writer - implements ByteChunk.ByteOutputChannel { + implements ByteChunk.ByteOutputChannel, CharChunk.CharOutputChannel { // -------------------------------------------------------------- Constants @@ -61,6 +62,12 @@ public class OutputBuffer extends Writer /** + * The chunk buffer. + */ + private final CharChunk cb; + + + /** * State of the output buffer. */ private boolean initial = true; @@ -97,6 +104,12 @@ public class OutputBuffer extends Writer /** + * Char chunk used to output chars. + */ + private CharChunk outputCharChunk = new CharChunk(); + + + /** * Encoding to use. */ private String enc; @@ -156,6 +169,10 @@ public class OutputBuffer extends Writer bb = new ByteChunk(size); bb.setLimit(size); bb.setByteOutputChannel(this); + cb = new CharChunk(size); + cb.setLimit(size); + cb.setOptimizedWrite(false); + cb.setCharOutputChannel(this); } @@ -225,16 +242,18 @@ public class OutputBuffer extends Writer initial = true; bytesWritten = 0; charsWritten = 0; - + bb.recycle(); + cb.recycle(); + outputCharChunk.setChars(null, 0, 0); closed = false; - doFlush = false; suspended = false; - + doFlush = false; + if (conv!= null) { conv.recycle(); } - + gotEnc = false; enc = null; @@ -266,9 +285,10 @@ public class OutputBuffer extends Writer return; } - // Flush the convertor if one is in use - if (gotEnc && conv != null) { - conv.flushBuffer(); + // If there are chars, flush all of them to the byte buffer now as bytes are used to + // calculate the content-length (if everything fits into the byte buffer, of course). + if (cb.getLength() > 0) { + cb.flushBuffer(); } if ((!coyoteResponse.isCommitted()) @@ -319,17 +339,15 @@ public class OutputBuffer extends Writer return; } - // Flush the convertor if one is in use - if (gotEnc && conv != null) { - conv.flushBuffer(); - } - try { doFlush = true; if (initial) { coyoteResponse.sendHeaders(); initial = false; } + if (cb.getLength() > 0) { + cb.flushBuffer(); + } if (bb.getLength() > 0) { bb.flushBuffer(); } @@ -438,6 +456,33 @@ public class OutputBuffer extends Writer // ------------------------------------------------- Chars Handling Methods + /** + * Convert the chars to bytes, then send the data to the client. + * + * @param buf Char buffer to be written to the response + * @param off Offset + * @param len Length + * + * @throws IOException An underlying IOException occurred + */ + @Override + public void realWriteChars(char buf[], int off, int len) + throws IOException { + + outputCharChunk.setChars(buf, off, len); + while (outputCharChunk.getLength() > 0) { + conv.convert(outputCharChunk, bb); + if (bb.getLength() == 0) { + // Break out of the loop if more chars are needed to produce any output + break; + } + if (outputCharChunk.getLength() > 0) { + bb.flushBuffer(); + } + } + + } + @Override public void write(int c) throws IOException { @@ -446,7 +491,7 @@ public class OutputBuffer extends Writer return; } - conv.convert((char) c); + cb.append((char) c); charsWritten++; } @@ -473,7 +518,7 @@ public class OutputBuffer extends Writer return; } - conv.convert(c, off, len); + cb.append(c, off, len); charsWritten += len; } @@ -494,7 +539,8 @@ public class OutputBuffer extends Writer if (s == null) { s = "null"; } - conv.convert(s, off, len); + cb.append(s, off, len); + charsWritten += len; } @@ -509,7 +555,8 @@ public class OutputBuffer extends Writer if (s == null) { s = "null"; } - conv.convert(s); + cb.append(s); + charsWritten += s.length(); } @@ -541,7 +588,6 @@ public class OutputBuffer extends Writer } conv = encoders.get(enc); if (conv == null) { - if (Globals.IS_SECURITY_ENABLED){ try{ conv = AccessController.doPrivileged( @@ -549,7 +595,7 @@ public class OutputBuffer extends Writer @Override public C2BConverter run() throws IOException{ - return new C2BConverter(bb, enc); + return new C2BConverter(enc); } } @@ -561,7 +607,7 @@ public class OutputBuffer extends Writer } } } else { - conv = new C2BConverter(bb, enc); + conv = new C2BConverter(enc); } encoders.put(enc, conv); @@ -598,11 +644,8 @@ public class OutputBuffer extends Writer } public void reset(boolean resetWriterStreamFlags) { - // If a Writer was being used, there may be bytes in the converter - if (gotEnc && conv != null) { - conv.recycle(); - } bb.recycle(); + cb.recycle(); bytesWritten = 0; charsWritten = 0; if (resetWriterStreamFlags) { Modified: tomcat/tc7.0.x/trunk/java/org/apache/catalina/connector/Response.java URL: http://svn.apache.org/viewvc/tomcat/tc7.0.x/trunk/java/org/apache/catalina/connector/Response.java?rev=1452791&r1=1452790&r2=1452791&view=diff ============================================================================== --- tomcat/tc7.0.x/trunk/java/org/apache/catalina/connector/Response.java (original) +++ tomcat/tc7.0.x/trunk/java/org/apache/catalina/connector/Response.java Tue Mar 5 13:41:19 2013 @@ -1688,17 +1688,16 @@ public class Response if (!leadingSlash) { String relativePath = request.getDecodedRequestURI(); int pos = relativePath.lastIndexOf('/'); - relativePath = relativePath.substring(0, pos); - - String encodedURI = null; + CharChunk encodedURI = null; final String frelativePath = relativePath; + final int fend = pos; if (SecurityUtil.isPackageProtectionEnabled() ){ try{ encodedURI = AccessController.doPrivileged( - new PrivilegedExceptionAction<String>(){ + new PrivilegedExceptionAction<CharChunk>(){ @Override - public String run() throws IOException{ - return urlEncoder.encodeURL(frelativePath); + public CharChunk run() throws IOException{ + return urlEncoder.encodeURL(frelativePath, 0, fend); } }); } catch (PrivilegedActionException pae){ @@ -1708,9 +1707,10 @@ public class Response throw iae; } } else { - encodedURI = urlEncoder.encodeURL(relativePath); + encodedURI = urlEncoder.encodeURL(relativePath, 0, pos); } - redirectURLCC.append(encodedURI, 0, encodedURI.length()); + redirectURLCC.append(encodedURI); + encodedURI.recycle(); redirectURLCC.append('/'); } redirectURLCC.append(location, 0, location.length()); Modified: tomcat/tc7.0.x/trunk/java/org/apache/naming/resources/DirContextURLConnection.java URL: http://svn.apache.org/viewvc/tomcat/tc7.0.x/trunk/java/org/apache/naming/resources/DirContextURLConnection.java?rev=1452791&r1=1452790&r2=1452791&view=diff ============================================================================== --- tomcat/tc7.0.x/trunk/java/org/apache/naming/resources/DirContextURLConnection.java (original) +++ tomcat/tc7.0.x/trunk/java/org/apache/naming/resources/DirContextURLConnection.java Tue Mar 5 13:41:19 2013 @@ -439,7 +439,9 @@ public class DirContextURLConnection ext collection.list("/"); while (enumeration.hasMoreElements()) { NameClassPair ncp = enumeration.nextElement(); - result.addElement(URL_ENCODER.encodeURL(ncp.getName())); + String s = ncp.getName(); + result.addElement( + URL_ENCODER.encodeURL(s, 0, s.length()).toString()); } } catch (NamingException e) { // Unexpected exception Modified: tomcat/tc7.0.x/trunk/java/org/apache/tomcat/util/buf/B2CConverter.java URL: http://svn.apache.org/viewvc/tomcat/tc7.0.x/trunk/java/org/apache/tomcat/util/buf/B2CConverter.java?rev=1452791&r1=1452790&r2=1452791&view=diff ============================================================================== --- tomcat/tc7.0.x/trunk/java/org/apache/tomcat/util/buf/B2CConverter.java (original) +++ tomcat/tc7.0.x/trunk/java/org/apache/tomcat/util/buf/B2CConverter.java Tue Mar 5 13:41:19 2013 @@ -17,34 +17,23 @@ package org.apache.tomcat.util.buf; import java.io.IOException; -import java.io.InputStream; -import java.io.InputStreamReader; import java.io.UnsupportedEncodingException; +import java.nio.ByteBuffer; +import java.nio.CharBuffer; import java.nio.charset.Charset; import java.nio.charset.CharsetDecoder; +import java.nio.charset.CoderResult; import java.util.HashMap; import java.util.Locale; import java.util.Map; import org.apache.tomcat.util.res.StringManager; -/** Efficient conversion of bytes to character . - * - * This uses the standard JDK mechanism - a reader - but provides mechanisms - * to recycle all the objects that are used. It is compatible with JDK1.1 - * and up, - * ( nio is better, but it's not available even in 1.2 or 1.3 ) - * - * Not used in the current code, the performance gain is not very big - * in the current case ( since String is created anyway ), but it will - * be used in a later version or after the remaining optimizations. +/** + * NIO based character decoder. */ public class B2CConverter { - - private static final org.apache.juli.logging.Log log= - org.apache.juli.logging.LogFactory.getLog( B2CConverter.class ); - private static final StringManager sm = StringManager.getManager(Constants.Package); @@ -54,6 +43,9 @@ public class B2CConverter { public static final Charset ISO_8859_1; public static final Charset UTF_8; + // Protected so unit tests can use it + protected static final int LEFTOVER_SIZE = 9; + static { for (Charset charset: Charset.availableCharsets().values()) { encodingToCharsetCache.put( @@ -77,185 +69,120 @@ public class B2CConverter { } public static Charset getCharset(String enc) - throws UnsupportedEncodingException{ + throws UnsupportedEncodingException { // Encoding names should all be ASCII String lowerCaseEnc = enc.toLowerCase(Locale.US); + return getCharsetLower(lowerCaseEnc); + } + + /** + * Only to be used when it is known that the encoding name is in lower case. + */ + public static Charset getCharsetLower(String lowerCaseEnc) + throws UnsupportedEncodingException { + Charset charset = encodingToCharsetCache.get(lowerCaseEnc); if (charset == null) { // Pre-population of the cache means this must be invalid throw new UnsupportedEncodingException( - sm.getString("b2cConverter.unknownEncoding", enc)); + sm.getString("b2cConverter.unknownEncoding", lowerCaseEnc)); } return charset; } - private IntermediateInputStream iis; - private ReadConvertor conv; - private CharsetDecoder decoder; - private String encoding; + private final CharsetDecoder decoder; + private ByteBuffer bb = null; + private CharBuffer cb = null; /** - * @deprecated Unused. Will be removed in Tomcat 8.0.x onwards. + * Leftover buffer used for incomplete characters. */ - @Deprecated - protected B2CConverter() { - } + private final ByteBuffer leftovers; - /** Create a converter, with bytes going to a byte buffer - */ - public B2CConverter(String encoding) - throws IOException - { - this.encoding=encoding; - reset(); + public B2CConverter(String encoding) throws IOException { + byte[] left = new byte[LEFTOVER_SIZE]; + leftovers = ByteBuffer.wrap(left); + decoder = getCharset(encoding).newDecoder(); } - - /** Reset the internal state, empty the buffers. - * The encoding remain in effect, the internal buffers remain allocated. + /** + * Reset the decoder state. */ - public void recycle() { - conv.recycle(); + public void recycle() { decoder.reset(); + leftovers.position(0); } - static final int BUFFER_SIZE=8192; - char result[]=new char[BUFFER_SIZE]; + public boolean isUndeflow() { + return (leftovers.position() > 0); + } /** - * Convert a buffer of bytes into a chars. - * - * @param bb Input byte buffer - * @param cb Output char buffer - * @param limit Number of bytes to convert - * @throws IOException - */ - public void convert( ByteChunk bb, CharChunk cb, int limit) - throws IOException - { - iis.setByteChunk( bb ); - try { - // read from the reader - int bbLengthBeforeRead = 0; - while( limit > 0 ) { - int size = limit < BUFFER_SIZE ? limit : BUFFER_SIZE; - bbLengthBeforeRead = bb.getLength(); - int cnt=conv.read( result, 0, size ); - if( cnt <= 0 ) { - // End of stream ! - we may be in a bad state - if(log.isDebugEnabled()) { - log.debug("B2CConverter: EOF"); - } - return; - } - if(log.isDebugEnabled()) { - log.debug("B2CConverter: Converted: " + - new String(result, 0, cnt)); - } - cb.append( result, 0, cnt ); - limit = limit - (bbLengthBeforeRead - bb.getLength()); - } - } catch( IOException ex) { - if(log.isDebugEnabled()) { - log.debug("B2CConverter: Reseting the converter " + ex.toString()); + * Convert the given bytes to characters. + * + * @param bc byte input + * @param cc char output + */ + public void convert(ByteChunk bc, CharChunk cc) + throws IOException { + if ((bb == null) || (bb.array() != bc.getBuffer())) { + // Create a new byte buffer if anything changed + bb = ByteBuffer.wrap(bc.getBuffer(), bc.getStart(), bc.getLength()); + } else { + // Initialize the byte buffer + bb.limit(bc.getEnd()); + bb.position(bc.getStart()); + } + if ((cb == null) || (cb.array() != cc.getBuffer())) { + // Create a new char buffer if anything changed + cb = CharBuffer.wrap(cc.getBuffer(), cc.getEnd(), + cc.getBuffer().length - cc.getEnd()); + } else { + // Initialize the char buffer + cb.limit(cc.getBuffer().length); + cb.position(cc.getEnd()); + } + CoderResult result = null; + // Parse leftover if any are present + if (leftovers.position() > 0) { + int pos = cb.position(); + // Loop until one char is decoded or there is a decoder error + do { + leftovers.put(bc.substractB()); + leftovers.flip(); + result = decoder.decode(leftovers, cb, false); + leftovers.position(leftovers.limit()); + leftovers.limit(leftovers.array().length); + } while (result.isUnderflow() && (cb.position() == pos)); + if (result.isError() || result.isMalformed()) { + result.throwException(); } - reset(); - throw ex; + bb.position(bc.getStart()); + leftovers.position(0); } - } - - - public void reset() throws IOException { - // Re-create the reader and iis - iis = new IntermediateInputStream(); - decoder = getCharset(encoding).newDecoder(); - conv = new ReadConvertor(iis, decoder); - } - -} - -// -------------------- Private implementation -------------------- - - - -/** - * - */ -final class ReadConvertor extends InputStreamReader { - - /** Create a converter. - */ - public ReadConvertor(IntermediateInputStream in, CharsetDecoder decoder) { - super(in, decoder); - } - - /** Overridden - will do nothing but reset internal state. - */ - @Override - public final void close() throws IOException { - // NOTHING - // Calling super.close() would reset out and cb. - } - - @Override - public final int read(char cbuf[], int off, int len) - throws IOException - { - // will do the conversion and call write on the output stream - return super.read( cbuf, off, len ); - } - - /** Reset the buffer - */ - public final void recycle() { - try { - // Must clear super's buffer. - while (ready()) { - // InputStreamReader#skip(long) will allocate buffer to skip. - read(); + // Do the decoding and get the results into the byte chunk and the char + // chunk + result = decoder.decode(bb, cb, false); + if (result.isError() || result.isMalformed()) { + result.throwException(); + } else if (result.isOverflow()) { + // Propagate current positions to the byte chunk and char chunk, if + // this continues the char buffer will get resized + bc.setOffset(bb.position()); + cc.setEnd(cb.position()); + } else if (result.isUnderflow()) { + // Propagate current positions to the byte chunk and char chunk + bc.setOffset(bb.position()); + cc.setEnd(cb.position()); + // Put leftovers in the leftovers byte buffer + if (bc.getLength() > 0) { + leftovers.limit(leftovers.array().length); + leftovers.position(bc.getLength()); + bc.substract(leftovers.array(), 0, bc.getLength()); } - } catch(IOException ioe){ } } } - - -/** Special output stream where close() is overridden, so super.close() - is never called. - - This allows recycling. It can also be disabled, so callbacks will - not be called if recycling the converter and if data was not flushed. -*/ -final class IntermediateInputStream extends InputStream { - ByteChunk bc = null; - - public IntermediateInputStream() { - } - - @Override - public final void close() throws IOException { - // shouldn't be called - we filter it out in writer - throw new IOException("close() called - shouldn't happen "); - } - - @Override - public final int read(byte cbuf[], int off, int len) throws IOException { - return bc.substract(cbuf, off, len); - } - - @Override - public final int read() throws IOException { - return bc.substract(); - } - - // -------------------- Internal methods -------------------- - - - void setByteChunk( ByteChunk mb ) { - bc = mb; - } - -} Modified: tomcat/tc7.0.x/trunk/java/org/apache/tomcat/util/buf/ByteChunk.java URL: http://svn.apache.org/viewvc/tomcat/tc7.0.x/trunk/java/org/apache/tomcat/util/buf/ByteChunk.java?rev=1452791&r1=1452790&r2=1452791&view=diff ============================================================================== --- tomcat/tc7.0.x/trunk/java/org/apache/tomcat/util/buf/ByteChunk.java (original) +++ tomcat/tc7.0.x/trunk/java/org/apache/tomcat/util/buf/ByteChunk.java Tue Mar 5 13:41:19 2013 @@ -397,6 +397,7 @@ public final class ByteChunk implements } + /** * @deprecated Unused. Will be removed in Tomcat 8.0.x onwards. */ @@ -421,6 +422,23 @@ public final class ByteChunk implements } + + public byte substractB() + throws IOException { + + if ((end - start) == 0) { + if (in == null) + return -1; + int n = in.realReadBytes( buff, 0, buff.length ); + if (n < 0) + return -1; + } + + return (buff[start++]); + + } + + public int substract( byte src[], int off, int len ) throws IOException { Modified: tomcat/tc7.0.x/trunk/java/org/apache/tomcat/util/buf/C2BConverter.java URL: http://svn.apache.org/viewvc/tomcat/tc7.0.x/trunk/java/org/apache/tomcat/util/buf/C2BConverter.java?rev=1452791&r1=1452790&r2=1452791&view=diff ============================================================================== --- tomcat/tc7.0.x/trunk/java/org/apache/tomcat/util/buf/C2BConverter.java (original) +++ tomcat/tc7.0.x/trunk/java/org/apache/tomcat/util/buf/C2BConverter.java Tue Mar 5 13:41:19 2013 @@ -16,295 +16,112 @@ */ package org.apache.tomcat.util.buf; -import java.io.BufferedWriter; import java.io.IOException; -import java.io.OutputStream; -import java.io.OutputStreamWriter; -import java.nio.charset.Charset; - -import org.apache.juli.logging.Log; -import org.apache.juli.logging.LogFactory; -import org.apache.tomcat.util.res.StringManager; +import java.nio.ByteBuffer; +import java.nio.CharBuffer; +import java.nio.charset.CharsetEncoder; +import java.nio.charset.CoderResult; +import java.nio.charset.CodingErrorAction; /** - * Efficient conversion of character to bytes. - * - * This uses the standard JDK mechanism - a writer - but provides mechanisms to - * recycle all the objects that are used. Input is buffered to improve - * performance. + * NIO based character encoder. */ public final class C2BConverter { - private static final Log log = LogFactory.getLog(C2BConverter.class); - private static final StringManager sm = - StringManager.getManager(Constants.Package); - - private final String encoding; - private BufferedWriter writer; - private WriteConvertor conv; - private IntermediateOutputStream ios; - private ByteChunk bb; + protected CharsetEncoder encoder = null; + protected ByteBuffer bb = null; + protected CharBuffer cb = null; /** - * Create a converter, with bytes going to a byte buffer. + * Leftover buffer used for multi-characters characters. */ - public C2BConverter(ByteChunk output, String encoding) throws IOException { - this.bb = output; - this.encoding = encoding; - init(); - } + protected CharBuffer leftovers = null; - /** - * Create a converter - * @deprecated Unused. Will be removed in Tomcat 8.0.x onwards. - */ - @Deprecated public C2BConverter(String encoding) throws IOException { - this(new ByteChunk(1024), encoding); - } - - /** - * @deprecated Unused. Will be removed in Tomcat 8.0.x onwards. - */ - @Deprecated - public ByteChunk getByteChunk() { - return bb; + encoder = B2CConverter.getCharset(encoding).newEncoder(); + // FIXME: See if unmappable/malformed behavior configuration is needed + // in practice + encoder.onUnmappableCharacter(CodingErrorAction.REPLACE) + .onMalformedInput(CodingErrorAction.REPLACE); + char[] left = new char[4]; + leftovers = CharBuffer.wrap(left); } /** - * @deprecated Unused. Will be removed in Tomcat 8.0.x onwards. + * Reset the encoder state. */ - @Deprecated - public String getEncoding() { - return encoding; + public void recycle() { + encoder.reset(); + leftovers.position(0); } - /** - * @deprecated Unused. Will be removed in Tomcat 8.0.x onwards. - */ - @Deprecated - public void setByteChunk(ByteChunk bb) { - this.bb=bb; - ios.setByteChunk( bb ); - } - - /** - * Reset the internal state, empty the buffers. - * The encoding remain in effect, the internal buffers remain allocated. - */ - public final void recycle() { - // Disable any output - ios.disable(); - // Flush out the BufferedWriter and WriteConvertor - try { - writer.flush(); - } catch (IOException e) { - log.warn(sm.getString("c2bConverter.recycleFailed"), e); - try { - init(); - } catch (IOException ignore) { - // Should never happen since this means encoding is invalid and - // in that case, the constructor will have failed. - } - } - // Re-enable ready for re-use - ios.enable(); - bb.recycle(); - } - - private void init() throws IOException { - ios = new IntermediateOutputStream(bb); - conv = new WriteConvertor(ios, B2CConverter.getCharset(encoding)); - writer = new BufferedWriter(conv); - } - - /** - * Generate the bytes using the specified encoding. - */ - public final void convert(char c[], int off, int len) throws IOException { - writer.write(c, off, len); + public boolean isUndeflow() { + return (leftovers.position() > 0); } /** - * Generate the bytes using the specified encoding. + * Convert the given characters to bytes. + * + * @param cc char input + * @param bc byte output */ - public final void convert(String s, int off, int len) throws IOException { - writer.write(s, off, len); - } - - /** - * Generate the bytes using the specified encoding. - */ - public final void convert(String s) throws IOException { - writer.write(s); - } - - /** - * Generate the bytes using the specified encoding. - */ - public final void convert(char c) throws IOException { - writer.write(c); - } - - /** - * Convert a message bytes chars to bytes - * @deprecated Unused. Will be removed in Tomcat 8.0.x onwards. - */ - @Deprecated - public final void convert(MessageBytes mb) throws IOException { - int type=mb.getType(); - if( type==MessageBytes.T_BYTES ) { - return; + public void convert(CharChunk cc, ByteChunk bc) + throws IOException { + if ((bb == null) || (bb.array() != bc.getBuffer())) { + // Create a new byte buffer if anything changed + bb = ByteBuffer.wrap(bc.getBuffer(), bc.getEnd(), + bc.getBuffer().length - bc.getEnd()); + } else { + // Initialize the byte buffer + bb.limit(bc.getBuffer().length); + bb.position(bc.getEnd()); } - ByteChunk orig=bb; - setByteChunk( mb.getByteChunk()); - bb.recycle(); - bb.allocate( 32, -1 ); - - if( type==MessageBytes.T_STR ) { - convert( mb.getString() ); - // System.out.println("XXX Converting " + mb.getString() ); - } else if( type==MessageBytes.T_CHARS ) { - CharChunk charC=mb.getCharChunk(); - convert( charC.getBuffer(), - charC.getOffset(), charC.getLength()); - //System.out.println("XXX Converting " + mb.getCharChunk() ); + if ((cb == null) || (cb.array() != cc.getBuffer())) { + // Create a new char buffer if anything changed + cb = CharBuffer.wrap(cc.getBuffer(), cc.getStart(), + cc.getLength()); } else { - if (log.isDebugEnabled()) { - log.debug("XXX unknowon type " + type ); + // Initialize the char buffer + cb.limit(cc.getEnd()); + cb.position(cc.getStart()); + } + CoderResult result = null; + // Parse leftover if any are present + if (leftovers.position() > 0) { + int pos = bb.position(); + // Loop until one char is encoded or there is a encoder error + do { + leftovers.put((char) cc.substract()); + leftovers.flip(); + result = encoder.encode(leftovers, bb, false); + leftovers.position(leftovers.limit()); + leftovers.limit(leftovers.array().length); + } while (result.isUnderflow() && (bb.position() == pos)); + if (result.isError() || result.isMalformed()) { + result.throwException(); } + cb.position(cc.getStart()); + leftovers.position(0); } - flushBuffer(); - //System.out.println("C2B: XXX " + bb.getBuffer() + bb.getLength()); - setByteChunk(orig); - } - - /** - * Flush any internal buffers into the ByteOutput or the internal byte[]. - */ - public final void flushBuffer() throws IOException { - writer.flush(); - } - -} - -// -------------------- Private implementation -------------------- -/** - * Special writer class, where close() is overridden. The default implementation - * would set byteOutputter to null, and the writer can't be recycled. - * - * Note that the flush method will empty the internal buffers _and_ call - * flush on the output stream - that's why we use an intermediary output stream - * that overrides flush(). The idea is to have full control: flushing the - * char->byte converter should be independent of flushing the OutputStream. - * - * When a WriteConverter is created, it'll allocate one or 2 byte buffers, - * with a 8k size that can't be changed ( at least in JDK1.1 -> 1.4 ). It would - * also allocate a ByteOutputter or equivalent - again some internal buffers. - * - * It is essential to keep this object around and reuse it. You can use either - * pools or per thread data - but given that in most cases a converter will be - * needed for every thread and most of the time only 1 ( or 2 ) encodings will - * be used, it is far better to keep it per thread and eliminate the pool - * overhead too. - */ - final class WriteConvertor extends OutputStreamWriter { - - /** - * Create a converter. - */ - public WriteConvertor(IntermediateOutputStream out, Charset charset) { - super(out, charset); - } - - /** - * This is a NOOP. - */ - @Override - public final void close() throws IOException { - // NOTHING - // Calling super.close() would reset out and cb. - } - - /** - * Flush the characters only. - */ - @Override - public final void flush() throws IOException { - // Will flushBuffer and out() - // flushBuffer put any remaining chars in the byte[] - super.flush(); - } - - @Override - public final void write(char cbuf[], int off, int len) throws IOException { - // Will do the conversion and call write on the output stream - super.write( cbuf, off, len ); - } -} - - -/** - * Special output stream where close() is overridden, so super.close() - * is never called. - * - * This allows recycling. It can also be disabled, so callbacks will - * not be called if recycling the converter and if data was not flushed. - */ -final class IntermediateOutputStream extends OutputStream { - private ByteChunk tbuff; - private boolean enabled = true; - - public IntermediateOutputStream(ByteChunk tbuff) { - this.tbuff=tbuff; - } - - @Override - public final void close() throws IOException { - // shouldn't be called - we filter it out in writer - throw new IOException("close() called - shouldn't happen "); - } - - @Override - public final void flush() throws IOException { - // nothing - write will go directly to the buffer, - // we don't keep any state - } - - @Override - public final void write(byte cbuf[], int off, int len) throws IOException { - // will do the conversion and call write on the output stream - if( enabled ) { - tbuff.append( cbuf, off, len ); + // Do the decoding and get the results into the byte chunk and the char + // chunk + result = encoder.encode(cb, bb, false); + if (result.isError() || result.isMalformed()) { + result.throwException(); + } else if (result.isOverflow()) { + // Propagate current positions to the byte chunk and char chunk + bc.setEnd(bb.position()); + cc.setOffset(cb.position()); + } else if (result.isUnderflow()) { + // Propagate current positions to the byte chunk and char chunk + bc.setEnd(bb.position()); + cc.setOffset(cb.position()); + // Put leftovers in the leftovers char buffer + if (cc.getLength() > 0) { + leftovers.limit(leftovers.array().length); + leftovers.position(cc.getLength()); + cc.substract(leftovers.array(), 0, cc.getLength()); + } } } - - @Override - public final void write(int i) throws IOException { - throw new IOException("write( int ) called - shouldn't happen "); - } - - // -------------------- Internal methods -------------------- - /** - * @deprecated Unused. Will be removed in Tomcat 8.0.x onwards. - */ - @Deprecated - void setByteChunk(ByteChunk bb) { - tbuff = bb; - } - - /** - * Temporary disable - this is used to recycle the converter without - * generating an output if the buffers were not flushed. - */ - final void disable() { - enabled = false; - } - - /** - * Re-enable - used to recycle the converter. - */ - final void enable() { - enabled = true; - } } Modified: tomcat/tc7.0.x/trunk/java/org/apache/tomcat/util/buf/CharChunk.java URL: http://svn.apache.org/viewvc/tomcat/tc7.0.x/trunk/java/org/apache/tomcat/util/buf/CharChunk.java?rev=1452791&r1=1452790&r2=1452791&view=diff ============================================================================== --- tomcat/tc7.0.x/trunk/java/org/apache/tomcat/util/buf/CharChunk.java (original) +++ tomcat/tc7.0.x/trunk/java/org/apache/tomcat/util/buf/CharChunk.java Tue Mar 5 13:41:19 2013 @@ -468,7 +468,7 @@ public final class CharChunk implements /** Make space for len chars. If len is small, allocate * a reserve space too. Never grow bigger than limit. */ - private void makeSpace(int count) + public void makeSpace(int count) { char[] tmp = null; Modified: tomcat/tc7.0.x/trunk/java/org/apache/tomcat/util/buf/UEncoder.java URL: http://svn.apache.org/viewvc/tomcat/tc7.0.x/trunk/java/org/apache/tomcat/util/buf/UEncoder.java?rev=1452791&r1=1452790&r2=1452791&view=diff ============================================================================== --- tomcat/tc7.0.x/trunk/java/org/apache/tomcat/util/buf/UEncoder.java (original) +++ tomcat/tc7.0.x/trunk/java/org/apache/tomcat/util/buf/UEncoder.java Tue Mar 5 13:41:19 2013 @@ -16,9 +16,7 @@ */ package org.apache.tomcat.util.buf; -import java.io.CharArrayWriter; import java.io.IOException; -import java.io.Writer; import java.util.BitSet; /** Efficient implementation for encoders. @@ -33,14 +31,13 @@ import java.util.BitSet; */ public final class UEncoder { - private static final org.apache.juli.logging.Log log= - org.apache.juli.logging.LogFactory.getLog(UEncoder.class ); - // Not static - the set may differ ( it's better than adding // an extra check for "/", "+", etc private BitSet safeChars=null; private C2BConverter c2b=null; private ByteChunk bb=null; + private CharChunk cb=null; + private CharChunk output=null; private String encoding="UTF8"; @@ -62,92 +59,65 @@ public final class UEncoder { /** URL Encode string, using a specified encoding. - * - * @param buf The writer - * @param s string to be encoded - * @throws IOException If an I/O error occurs - */ - public void urlEncode( Writer buf, String s ) - throws IOException { - if( c2b==null ) { - bb=new ByteChunk(16); // small enough. - c2b=new C2BConverter( bb, encoding ); - } - - for (int i = 0; i < s.length(); i++) { - int c = s.charAt(i); - if( safeChars.get( c ) ) { - if(log.isDebugEnabled()) { - log.debug("Encoder: Safe: " + (char)c); - } - buf.write((char)c); - } else { - if(log.isDebugEnabled()) { - log.debug("Encoder: Unsafe: " + (char)c); - } - c2b.convert( (char)c ); - - // "surrogate" - UTF is _not_ 16 bit, but 21 !!!! - // ( while UCS is 31 ). Amazing... - if (c >= 0xD800 && c <= 0xDBFF) { - if ( (i+1) < s.length()) { - int d = s.charAt(i+1); - if (d >= 0xDC00 && d <= 0xDFFF) { - if(log.isDebugEnabled()) { - log.debug("Encoder: Unsafe: " + c); - } - c2b.convert( (char)d); - i++; - } - } - } - - c2b.flushBuffer(); - - urlEncode( buf, bb.getBuffer(), bb.getOffset(), - bb.getLength() ); - bb.recycle(); - } - } - } - - /** - */ - public void urlEncode( Writer buf, byte bytes[], int off, int len) - throws IOException { - for( int j=off; j< len; j++ ) { - buf.write( '%' ); - char ch = Character.forDigit((bytes[j] >> 4) & 0xF, 16); - if(log.isDebugEnabled()) { - log.debug("Encoder: Encode: " + ch); - } - buf.write(ch); - ch = Character.forDigit(bytes[j] & 0xF, 16); - if(log.isDebugEnabled()) { - log.debug("Encoder: Encode: " + ch); - } - buf.write(ch); - } - } - - /** - * Utility function to re-encode the URL. - * Still has problems with charset, since UEncoder mostly - * ignores it. - */ - public String encodeURL(String uri) { - String outUri=null; - try { - // XXX optimize - recycle, etc - CharArrayWriter out = new CharArrayWriter(); - urlEncode(out, uri); - outUri=out.toString(); - } catch (IOException iex) { - } - return outUri; - } - - + * + * @param buf The writer + * @param s string to be encoded + * @throws IOException If an I/O error occurs + */ + public CharChunk encodeURL(String s, int start, int end) + throws IOException { + if (c2b == null) { + bb = new ByteChunk(8); // small enough. + cb = new CharChunk(2); // small enough. + output = new CharChunk(64); // small enough. + c2b = new C2BConverter(encoding); + } else { + bb.recycle(); + cb.recycle(); + } + + for (int i = start; i < end; i++) { + char c = s.charAt(i); + if (safeChars.get(c)) { + output.append(c); + } else { + cb.append(c); + c2b.convert(cb, bb); + + // "surrogate" - UTF is _not_ 16 bit, but 21 !!!! + // ( while UCS is 31 ). Amazing... + if (c >= 0xD800 && c <= 0xDBFF) { + if ((i+1) < end) { + char d = s.charAt(i+1); + if (d >= 0xDC00 && d <= 0xDFFF) { + cb.append(d); + c2b.convert(cb, bb); + i++; + } + } + } + + urlEncode(output, bb); + cb.recycle(); + bb.recycle(); + } + } + + return output; + } + + protected void urlEncode(CharChunk out, ByteChunk bb) + throws IOException { + byte[] bytes = bb.getBuffer(); + for (int j = bb.getStart(); j < bb.getEnd(); j++) { + out.append('%'); + char ch = Character.forDigit((bytes[j] >> 4) & 0xF, 16); + out.append(ch); + ch = Character.forDigit(bytes[j] & 0xF, 16); + out.append(ch); + } + } + // -------------------- Internal implementation -------------------- private void initSafeChars() { Modified: tomcat/tc7.0.x/trunk/test/org/apache/catalina/websocket/TestWebSocket.java URL: http://svn.apache.org/viewvc/tomcat/tc7.0.x/trunk/test/org/apache/catalina/websocket/TestWebSocket.java?rev=1452791&r1=1452790&r2=1452791&view=diff ============================================================================== --- tomcat/tc7.0.x/trunk/test/org/apache/catalina/websocket/TestWebSocket.java (original) +++ tomcat/tc7.0.x/trunk/test/org/apache/catalina/websocket/TestWebSocket.java Tue Mar 5 13:41:19 2013 @@ -372,9 +372,10 @@ public class TestWebSocket extends Tomca private void sendMessage(String message, boolean finalFragment) throws IOException { ByteChunk bc = new ByteChunk(8192); - C2BConverter c2b = new C2BConverter(bc, "UTF-8"); - c2b.convert(message); - c2b.flushBuffer(); + CharChunk cc = new CharChunk(8192); + C2BConverter c2b = new C2BConverter("UTF-8"); + cc.append(message); + c2b.convert(cc, bc); int len = bc.getLength(); assertTrue(len < 126); @@ -427,7 +428,7 @@ public class TestWebSocket extends Tomca bc.setEnd(len); B2CConverter b2c = new B2CConverter("UTF-8"); - b2c.convert(bc, cc, len); + b2c.convert(bc, cc); return cc.toString(); } Modified: tomcat/tc7.0.x/trunk/test/org/apache/tomcat/util/buf/TestB2CConverter.java URL: http://svn.apache.org/viewvc/tomcat/tc7.0.x/trunk/test/org/apache/tomcat/util/buf/TestB2CConverter.java?rev=1452791&r1=1452790&r2=1452791&view=diff ============================================================================== --- tomcat/tc7.0.x/trunk/test/org/apache/tomcat/util/buf/TestB2CConverter.java (original) +++ tomcat/tc7.0.x/trunk/test/org/apache/tomcat/util/buf/TestB2CConverter.java Tue Mar 5 13:41:19 2013 @@ -16,6 +16,8 @@ */ package org.apache.tomcat.util.buf; +import java.nio.charset.Charset; + import org.junit.Assert; import org.junit.Test; @@ -48,8 +50,7 @@ public class TestB2CConverter { for (int i = 0; i < msgCount; i++) { bc.append(UTF16_MESSAGE, 0, UTF16_MESSAGE.length); - // Note: The limit is the number of characters to read - conv.convert(bc, cc, 3); + conv.convert(bc, cc); Assert.assertEquals("ABC", cc.toString()); bc.recycle(); cc.recycle(); @@ -58,4 +59,28 @@ public class TestB2CConverter { System.out.println(cc); } + + @Test + public void testLeftoverSize() { + float maxLeftover = 0; + for (Charset charset : Charset.availableCharsets().values()) { + float leftover; + if (charset.name().toLowerCase().startsWith("x-")) { + // Non-standard charset that browsers won't be using + // Likely something used internally by the JRE + continue; + } + try { + leftover = charset.newEncoder().maxBytesPerChar(); + } catch (UnsupportedOperationException uoe) { + // Skip it + continue; + } + if (leftover > maxLeftover) { + maxLeftover = leftover; + } + } + Assert.assertTrue("Limit needs to be at least " + maxLeftover, + maxLeftover <= B2CConverter.LEFTOVER_SIZE); + } } Modified: tomcat/tc7.0.x/trunk/webapps/docs/changelog.xml URL: http://svn.apache.org/viewvc/tomcat/tc7.0.x/trunk/webapps/docs/changelog.xml?rev=1452791&r1=1452790&r2=1452791&view=diff ============================================================================== --- tomcat/tc7.0.x/trunk/webapps/docs/changelog.xml (original) +++ tomcat/tc7.0.x/trunk/webapps/docs/changelog.xml Tue Mar 5 13:41:19 2013 @@ -98,6 +98,13 @@ </fix> </changelog> </subsection> + <subsection name="Coyote"> + <changelog> + <scode> + Refactor char encoding/decoding using NIO APIs. (remm) + </scode> + </changelog> + </subsection> <subsection name="Web applications"> <changelog> <fix> --------------------------------------------------------------------- To unsubscribe, e-mail: dev-unsubscr...@tomcat.apache.org For additional commands, e-mail: dev-h...@tomcat.apache.org