Author: markt
Date: Tue Mar 5 13:41:19 2013
New Revision: 1452791
URL: http://svn.apache.org/r1452791
Log:
Refactor encoding/decoding to use NIO
Modified:
tomcat/tc7.0.x/trunk/ (props changed)
tomcat/tc7.0.x/trunk/java/org/apache/catalina/connector/CoyoteAdapter.java
tomcat/tc7.0.x/trunk/java/org/apache/catalina/connector/InputBuffer.java
tomcat/tc7.0.x/trunk/java/org/apache/catalina/connector/OutputBuffer.java
tomcat/tc7.0.x/trunk/java/org/apache/catalina/connector/Response.java
tomcat/tc7.0.x/trunk/java/org/apache/naming/resources/DirContextURLConnection.java
tomcat/tc7.0.x/trunk/java/org/apache/tomcat/util/buf/B2CConverter.java
tomcat/tc7.0.x/trunk/java/org/apache/tomcat/util/buf/ByteChunk.java
tomcat/tc7.0.x/trunk/java/org/apache/tomcat/util/buf/C2BConverter.java
tomcat/tc7.0.x/trunk/java/org/apache/tomcat/util/buf/CharChunk.java
tomcat/tc7.0.x/trunk/java/org/apache/tomcat/util/buf/UEncoder.java
tomcat/tc7.0.x/trunk/test/org/apache/catalina/websocket/TestWebSocket.java
tomcat/tc7.0.x/trunk/test/org/apache/tomcat/util/buf/TestB2CConverter.java
tomcat/tc7.0.x/trunk/webapps/docs/changelog.xml
Propchange: tomcat/tc7.0.x/trunk/
------------------------------------------------------------------------------
Merged /tomcat/trunk:r1437743,1437891,1447818,1451408
Modified:
tomcat/tc7.0.x/trunk/java/org/apache/catalina/connector/CoyoteAdapter.java
URL:
http://svn.apache.org/viewvc/tomcat/tc7.0.x/trunk/java/org/apache/catalina/connector/CoyoteAdapter.java?rev=1452791&r1=1452790&r2=1452791&view=diff
==============================================================================
--- tomcat/tc7.0.x/trunk/java/org/apache/catalina/connector/CoyoteAdapter.java
(original)
+++ tomcat/tc7.0.x/trunk/java/org/apache/catalina/connector/CoyoteAdapter.java
Tue Mar 5 13:41:19 2013
@@ -992,7 +992,7 @@ public class CoyoteAdapter implements Ad
}
if (conv != null) {
try {
- conv.convert(bc, cc, cc.getBuffer().length - cc.getEnd());
+ conv.convert(bc, cc);
uri.setChars(cc.getBuffer(), cc.getStart(),
cc.getLength());
return;
Modified:
tomcat/tc7.0.x/trunk/java/org/apache/catalina/connector/InputBuffer.java
URL:
http://svn.apache.org/viewvc/tomcat/tc7.0.x/trunk/java/org/apache/catalina/connector/InputBuffer.java?rev=1452791&r1=1452790&r2=1452791&view=diff
==============================================================================
--- tomcat/tc7.0.x/trunk/java/org/apache/catalina/connector/InputBuffer.java
(original)
+++ tomcat/tc7.0.x/trunk/java/org/apache/catalina/connector/InputBuffer.java
Tue Mar 5 13:41:19 2013
@@ -357,14 +357,19 @@ public class InputBuffer extends Reader
if (markPos == -1) {
cb.setOffset(0);
cb.setEnd(0);
+ } else {
+ // Make sure there's enough space in the worst case
+ cb.makeSpace(bb.getLength());
+ if ((cb.getBuffer().length - cb.getEnd()) == 0) {
+ // We went over the limit
+ cb.setOffset(0);
+ cb.setEnd(0);
+ markPos = -1;
+ }
}
- int limit = bb.getLength()+cb.getStart();
- if ( cb.getLimit() < limit ) {
- cb.setLimit(limit);
- }
+
state = CHAR_STATE;
- conv.convert(bb, cb, bb.getLength());
- bb.setOffset(bb.getEnd());
+ conv.convert(bb, cb);
return cb.getLength();
Modified:
tomcat/tc7.0.x/trunk/java/org/apache/catalina/connector/OutputBuffer.java
URL:
http://svn.apache.org/viewvc/tomcat/tc7.0.x/trunk/java/org/apache/catalina/connector/OutputBuffer.java?rev=1452791&r1=1452790&r2=1452791&view=diff
==============================================================================
--- tomcat/tc7.0.x/trunk/java/org/apache/catalina/connector/OutputBuffer.java
(original)
+++ tomcat/tc7.0.x/trunk/java/org/apache/catalina/connector/OutputBuffer.java
Tue Mar 5 13:41:19 2013
@@ -29,6 +29,7 @@ import org.apache.coyote.ActionCode;
import org.apache.coyote.Response;
import org.apache.tomcat.util.buf.ByteChunk;
import org.apache.tomcat.util.buf.C2BConverter;
+import org.apache.tomcat.util.buf.CharChunk;
/**
@@ -40,7 +41,7 @@ import org.apache.tomcat.util.buf.C2BCon
* @author Remy Maucherat
*/
public class OutputBuffer extends Writer
- implements ByteChunk.ByteOutputChannel {
+ implements ByteChunk.ByteOutputChannel, CharChunk.CharOutputChannel {
// -------------------------------------------------------------- Constants
@@ -61,6 +62,12 @@ public class OutputBuffer extends Writer
/**
+ * The chunk buffer.
+ */
+ private final CharChunk cb;
+
+
+ /**
* State of the output buffer.
*/
private boolean initial = true;
@@ -97,6 +104,12 @@ public class OutputBuffer extends Writer
/**
+ * Char chunk used to output chars.
+ */
+ private CharChunk outputCharChunk = new CharChunk();
+
+
+ /**
* Encoding to use.
*/
private String enc;
@@ -156,6 +169,10 @@ public class OutputBuffer extends Writer
bb = new ByteChunk(size);
bb.setLimit(size);
bb.setByteOutputChannel(this);
+ cb = new CharChunk(size);
+ cb.setLimit(size);
+ cb.setOptimizedWrite(false);
+ cb.setCharOutputChannel(this);
}
@@ -225,16 +242,18 @@ public class OutputBuffer extends Writer
initial = true;
bytesWritten = 0;
charsWritten = 0;
-
+
bb.recycle();
+ cb.recycle();
+ outputCharChunk.setChars(null, 0, 0);
closed = false;
- doFlush = false;
suspended = false;
-
+ doFlush = false;
+
if (conv!= null) {
conv.recycle();
}
-
+
gotEnc = false;
enc = null;
@@ -266,9 +285,10 @@ public class OutputBuffer extends Writer
return;
}
- // Flush the convertor if one is in use
- if (gotEnc && conv != null) {
- conv.flushBuffer();
+ // If there are chars, flush all of them to the byte buffer now as
bytes are used to
+ // calculate the content-length (if everything fits into the byte
buffer, of course).
+ if (cb.getLength() > 0) {
+ cb.flushBuffer();
}
if ((!coyoteResponse.isCommitted())
@@ -319,17 +339,15 @@ public class OutputBuffer extends Writer
return;
}
- // Flush the convertor if one is in use
- if (gotEnc && conv != null) {
- conv.flushBuffer();
- }
-
try {
doFlush = true;
if (initial) {
coyoteResponse.sendHeaders();
initial = false;
}
+ if (cb.getLength() > 0) {
+ cb.flushBuffer();
+ }
if (bb.getLength() > 0) {
bb.flushBuffer();
}
@@ -438,6 +456,33 @@ public class OutputBuffer extends Writer
// ------------------------------------------------- Chars Handling Methods
+ /**
+ * Convert the chars to bytes, then send the data to the client.
+ *
+ * @param buf Char buffer to be written to the response
+ * @param off Offset
+ * @param len Length
+ *
+ * @throws IOException An underlying IOException occurred
+ */
+ @Override
+ public void realWriteChars(char buf[], int off, int len)
+ throws IOException {
+
+ outputCharChunk.setChars(buf, off, len);
+ while (outputCharChunk.getLength() > 0) {
+ conv.convert(outputCharChunk, bb);
+ if (bb.getLength() == 0) {
+ // Break out of the loop if more chars are needed to produce
any output
+ break;
+ }
+ if (outputCharChunk.getLength() > 0) {
+ bb.flushBuffer();
+ }
+ }
+
+ }
+
@Override
public void write(int c)
throws IOException {
@@ -446,7 +491,7 @@ public class OutputBuffer extends Writer
return;
}
- conv.convert((char) c);
+ cb.append((char) c);
charsWritten++;
}
@@ -473,7 +518,7 @@ public class OutputBuffer extends Writer
return;
}
- conv.convert(c, off, len);
+ cb.append(c, off, len);
charsWritten += len;
}
@@ -494,7 +539,8 @@ public class OutputBuffer extends Writer
if (s == null) {
s = "null";
}
- conv.convert(s, off, len);
+ cb.append(s, off, len);
+ charsWritten += len;
}
@@ -509,7 +555,8 @@ public class OutputBuffer extends Writer
if (s == null) {
s = "null";
}
- conv.convert(s);
+ cb.append(s);
+ charsWritten += s.length();
}
@@ -541,7 +588,6 @@ public class OutputBuffer extends Writer
}
conv = encoders.get(enc);
if (conv == null) {
-
if (Globals.IS_SECURITY_ENABLED){
try{
conv = AccessController.doPrivileged(
@@ -549,7 +595,7 @@ public class OutputBuffer extends Writer
@Override
public C2BConverter run() throws IOException{
- return new C2BConverter(bb, enc);
+ return new C2BConverter(enc);
}
}
@@ -561,7 +607,7 @@ public class OutputBuffer extends Writer
}
}
} else {
- conv = new C2BConverter(bb, enc);
+ conv = new C2BConverter(enc);
}
encoders.put(enc, conv);
@@ -598,11 +644,8 @@ public class OutputBuffer extends Writer
}
public void reset(boolean resetWriterStreamFlags) {
- // If a Writer was being used, there may be bytes in the converter
- if (gotEnc && conv != null) {
- conv.recycle();
- }
bb.recycle();
+ cb.recycle();
bytesWritten = 0;
charsWritten = 0;
if (resetWriterStreamFlags) {
Modified: tomcat/tc7.0.x/trunk/java/org/apache/catalina/connector/Response.java
URL:
http://svn.apache.org/viewvc/tomcat/tc7.0.x/trunk/java/org/apache/catalina/connector/Response.java?rev=1452791&r1=1452790&r2=1452791&view=diff
==============================================================================
--- tomcat/tc7.0.x/trunk/java/org/apache/catalina/connector/Response.java
(original)
+++ tomcat/tc7.0.x/trunk/java/org/apache/catalina/connector/Response.java Tue
Mar 5 13:41:19 2013
@@ -1688,17 +1688,16 @@ public class Response
if (!leadingSlash) {
String relativePath = request.getDecodedRequestURI();
int pos = relativePath.lastIndexOf('/');
- relativePath = relativePath.substring(0, pos);
-
- String encodedURI = null;
+ CharChunk encodedURI = null;
final String frelativePath = relativePath;
+ final int fend = pos;
if (SecurityUtil.isPackageProtectionEnabled() ){
try{
encodedURI = AccessController.doPrivileged(
- new PrivilegedExceptionAction<String>(){
+ new PrivilegedExceptionAction<CharChunk>(){
@Override
- public String run() throws IOException{
- return
urlEncoder.encodeURL(frelativePath);
+ public CharChunk run() throws IOException{
+ return
urlEncoder.encodeURL(frelativePath, 0, fend);
}
});
} catch (PrivilegedActionException pae){
@@ -1708,9 +1707,10 @@ public class Response
throw iae;
}
} else {
- encodedURI = urlEncoder.encodeURL(relativePath);
+ encodedURI = urlEncoder.encodeURL(relativePath, 0,
pos);
}
- redirectURLCC.append(encodedURI, 0, encodedURI.length());
+ redirectURLCC.append(encodedURI);
+ encodedURI.recycle();
redirectURLCC.append('/');
}
redirectURLCC.append(location, 0, location.length());
Modified:
tomcat/tc7.0.x/trunk/java/org/apache/naming/resources/DirContextURLConnection.java
URL:
http://svn.apache.org/viewvc/tomcat/tc7.0.x/trunk/java/org/apache/naming/resources/DirContextURLConnection.java?rev=1452791&r1=1452790&r2=1452791&view=diff
==============================================================================
---
tomcat/tc7.0.x/trunk/java/org/apache/naming/resources/DirContextURLConnection.java
(original)
+++
tomcat/tc7.0.x/trunk/java/org/apache/naming/resources/DirContextURLConnection.java
Tue Mar 5 13:41:19 2013
@@ -439,7 +439,9 @@ public class DirContextURLConnection ext
collection.list("/");
while (enumeration.hasMoreElements()) {
NameClassPair ncp = enumeration.nextElement();
- result.addElement(URL_ENCODER.encodeURL(ncp.getName()));
+ String s = ncp.getName();
+ result.addElement(
+ URL_ENCODER.encodeURL(s, 0,
s.length()).toString());
}
} catch (NamingException e) {
// Unexpected exception
Modified: tomcat/tc7.0.x/trunk/java/org/apache/tomcat/util/buf/B2CConverter.java
URL:
http://svn.apache.org/viewvc/tomcat/tc7.0.x/trunk/java/org/apache/tomcat/util/buf/B2CConverter.java?rev=1452791&r1=1452790&r2=1452791&view=diff
==============================================================================
--- tomcat/tc7.0.x/trunk/java/org/apache/tomcat/util/buf/B2CConverter.java
(original)
+++ tomcat/tc7.0.x/trunk/java/org/apache/tomcat/util/buf/B2CConverter.java Tue
Mar 5 13:41:19 2013
@@ -17,34 +17,23 @@
package org.apache.tomcat.util.buf;
import java.io.IOException;
-import java.io.InputStream;
-import java.io.InputStreamReader;
import java.io.UnsupportedEncodingException;
+import java.nio.ByteBuffer;
+import java.nio.CharBuffer;
import java.nio.charset.Charset;
import java.nio.charset.CharsetDecoder;
+import java.nio.charset.CoderResult;
import java.util.HashMap;
import java.util.Locale;
import java.util.Map;
import org.apache.tomcat.util.res.StringManager;
-/** Efficient conversion of bytes to character .
- *
- * This uses the standard JDK mechanism - a reader - but provides mechanisms
- * to recycle all the objects that are used. It is compatible with JDK1.1
- * and up,
- * ( nio is better, but it's not available even in 1.2 or 1.3 )
- *
- * Not used in the current code, the performance gain is not very big
- * in the current case ( since String is created anyway ), but it will
- * be used in a later version or after the remaining optimizations.
+/**
+ * NIO based character decoder.
*/
public class B2CConverter {
-
- private static final org.apache.juli.logging.Log log=
- org.apache.juli.logging.LogFactory.getLog( B2CConverter.class );
-
private static final StringManager sm =
StringManager.getManager(Constants.Package);
@@ -54,6 +43,9 @@ public class B2CConverter {
public static final Charset ISO_8859_1;
public static final Charset UTF_8;
+ // Protected so unit tests can use it
+ protected static final int LEFTOVER_SIZE = 9;
+
static {
for (Charset charset: Charset.availableCharsets().values()) {
encodingToCharsetCache.put(
@@ -77,185 +69,120 @@ public class B2CConverter {
}
public static Charset getCharset(String enc)
- throws UnsupportedEncodingException{
+ throws UnsupportedEncodingException {
// Encoding names should all be ASCII
String lowerCaseEnc = enc.toLowerCase(Locale.US);
+ return getCharsetLower(lowerCaseEnc);
+ }
+
+ /**
+ * Only to be used when it is known that the encoding name is in lower
case.
+ */
+ public static Charset getCharsetLower(String lowerCaseEnc)
+ throws UnsupportedEncodingException {
+
Charset charset = encodingToCharsetCache.get(lowerCaseEnc);
if (charset == null) {
// Pre-population of the cache means this must be invalid
throw new UnsupportedEncodingException(
- sm.getString("b2cConverter.unknownEncoding", enc));
+ sm.getString("b2cConverter.unknownEncoding",
lowerCaseEnc));
}
return charset;
}
- private IntermediateInputStream iis;
- private ReadConvertor conv;
- private CharsetDecoder decoder;
- private String encoding;
+ private final CharsetDecoder decoder;
+ private ByteBuffer bb = null;
+ private CharBuffer cb = null;
/**
- * @deprecated Unused. Will be removed in Tomcat 8.0.x onwards.
+ * Leftover buffer used for incomplete characters.
*/
- @Deprecated
- protected B2CConverter() {
- }
+ private final ByteBuffer leftovers;
- /** Create a converter, with bytes going to a byte buffer
- */
- public B2CConverter(String encoding)
- throws IOException
- {
- this.encoding=encoding;
- reset();
+ public B2CConverter(String encoding) throws IOException {
+ byte[] left = new byte[LEFTOVER_SIZE];
+ leftovers = ByteBuffer.wrap(left);
+ decoder = getCharset(encoding).newDecoder();
}
-
- /** Reset the internal state, empty the buffers.
- * The encoding remain in effect, the internal buffers remain allocated.
+ /**
+ * Reset the decoder state.
*/
- public void recycle() {
- conv.recycle();
+ public void recycle() {
decoder.reset();
+ leftovers.position(0);
}
- static final int BUFFER_SIZE=8192;
- char result[]=new char[BUFFER_SIZE];
+ public boolean isUndeflow() {
+ return (leftovers.position() > 0);
+ }
/**
- * Convert a buffer of bytes into a chars.
- *
- * @param bb Input byte buffer
- * @param cb Output char buffer
- * @param limit Number of bytes to convert
- * @throws IOException
- */
- public void convert( ByteChunk bb, CharChunk cb, int limit)
- throws IOException
- {
- iis.setByteChunk( bb );
- try {
- // read from the reader
- int bbLengthBeforeRead = 0;
- while( limit > 0 ) {
- int size = limit < BUFFER_SIZE ? limit : BUFFER_SIZE;
- bbLengthBeforeRead = bb.getLength();
- int cnt=conv.read( result, 0, size );
- if( cnt <= 0 ) {
- // End of stream ! - we may be in a bad state
- if(log.isDebugEnabled()) {
- log.debug("B2CConverter: EOF");
- }
- return;
- }
- if(log.isDebugEnabled()) {
- log.debug("B2CConverter: Converted: " +
- new String(result, 0, cnt));
- }
- cb.append( result, 0, cnt );
- limit = limit - (bbLengthBeforeRead - bb.getLength());
- }
- } catch( IOException ex) {
- if(log.isDebugEnabled()) {
- log.debug("B2CConverter: Reseting the converter " +
ex.toString());
+ * Convert the given bytes to characters.
+ *
+ * @param bc byte input
+ * @param cc char output
+ */
+ public void convert(ByteChunk bc, CharChunk cc)
+ throws IOException {
+ if ((bb == null) || (bb.array() != bc.getBuffer())) {
+ // Create a new byte buffer if anything changed
+ bb = ByteBuffer.wrap(bc.getBuffer(), bc.getStart(),
bc.getLength());
+ } else {
+ // Initialize the byte buffer
+ bb.limit(bc.getEnd());
+ bb.position(bc.getStart());
+ }
+ if ((cb == null) || (cb.array() != cc.getBuffer())) {
+ // Create a new char buffer if anything changed
+ cb = CharBuffer.wrap(cc.getBuffer(), cc.getEnd(),
+ cc.getBuffer().length - cc.getEnd());
+ } else {
+ // Initialize the char buffer
+ cb.limit(cc.getBuffer().length);
+ cb.position(cc.getEnd());
+ }
+ CoderResult result = null;
+ // Parse leftover if any are present
+ if (leftovers.position() > 0) {
+ int pos = cb.position();
+ // Loop until one char is decoded or there is a decoder error
+ do {
+ leftovers.put(bc.substractB());
+ leftovers.flip();
+ result = decoder.decode(leftovers, cb, false);
+ leftovers.position(leftovers.limit());
+ leftovers.limit(leftovers.array().length);
+ } while (result.isUnderflow() && (cb.position() == pos));
+ if (result.isError() || result.isMalformed()) {
+ result.throwException();
}
- reset();
- throw ex;
+ bb.position(bc.getStart());
+ leftovers.position(0);
}
- }
-
-
- public void reset() throws IOException {
- // Re-create the reader and iis
- iis = new IntermediateInputStream();
- decoder = getCharset(encoding).newDecoder();
- conv = new ReadConvertor(iis, decoder);
- }
-
-}
-
-// -------------------- Private implementation --------------------
-
-
-
-/**
- *
- */
-final class ReadConvertor extends InputStreamReader {
-
- /** Create a converter.
- */
- public ReadConvertor(IntermediateInputStream in, CharsetDecoder decoder) {
- super(in, decoder);
- }
-
- /** Overridden - will do nothing but reset internal state.
- */
- @Override
- public final void close() throws IOException {
- // NOTHING
- // Calling super.close() would reset out and cb.
- }
-
- @Override
- public final int read(char cbuf[], int off, int len)
- throws IOException
- {
- // will do the conversion and call write on the output stream
- return super.read( cbuf, off, len );
- }
-
- /** Reset the buffer
- */
- public final void recycle() {
- try {
- // Must clear super's buffer.
- while (ready()) {
- // InputStreamReader#skip(long) will allocate buffer to skip.
- read();
+ // Do the decoding and get the results into the byte chunk and the char
+ // chunk
+ result = decoder.decode(bb, cb, false);
+ if (result.isError() || result.isMalformed()) {
+ result.throwException();
+ } else if (result.isOverflow()) {
+ // Propagate current positions to the byte chunk and char chunk, if
+ // this continues the char buffer will get resized
+ bc.setOffset(bb.position());
+ cc.setEnd(cb.position());
+ } else if (result.isUnderflow()) {
+ // Propagate current positions to the byte chunk and char chunk
+ bc.setOffset(bb.position());
+ cc.setEnd(cb.position());
+ // Put leftovers in the leftovers byte buffer
+ if (bc.getLength() > 0) {
+ leftovers.limit(leftovers.array().length);
+ leftovers.position(bc.getLength());
+ bc.substract(leftovers.array(), 0, bc.getLength());
}
- } catch(IOException ioe){
}
}
}
-
-
-/** Special output stream where close() is overridden, so super.close()
- is never called.
-
- This allows recycling. It can also be disabled, so callbacks will
- not be called if recycling the converter and if data was not flushed.
-*/
-final class IntermediateInputStream extends InputStream {
- ByteChunk bc = null;
-
- public IntermediateInputStream() {
- }
-
- @Override
- public final void close() throws IOException {
- // shouldn't be called - we filter it out in writer
- throw new IOException("close() called - shouldn't happen ");
- }
-
- @Override
- public final int read(byte cbuf[], int off, int len) throws IOException {
- return bc.substract(cbuf, off, len);
- }
-
- @Override
- public final int read() throws IOException {
- return bc.substract();
- }
-
- // -------------------- Internal methods --------------------
-
-
- void setByteChunk( ByteChunk mb ) {
- bc = mb;
- }
-
-}
Modified: tomcat/tc7.0.x/trunk/java/org/apache/tomcat/util/buf/ByteChunk.java
URL:
http://svn.apache.org/viewvc/tomcat/tc7.0.x/trunk/java/org/apache/tomcat/util/buf/ByteChunk.java?rev=1452791&r1=1452790&r2=1452791&view=diff
==============================================================================
--- tomcat/tc7.0.x/trunk/java/org/apache/tomcat/util/buf/ByteChunk.java
(original)
+++ tomcat/tc7.0.x/trunk/java/org/apache/tomcat/util/buf/ByteChunk.java Tue Mar
5 13:41:19 2013
@@ -397,6 +397,7 @@ public final class ByteChunk implements
}
+
/**
* @deprecated Unused. Will be removed in Tomcat 8.0.x onwards.
*/
@@ -421,6 +422,23 @@ public final class ByteChunk implements
}
+
+ public byte substractB()
+ throws IOException {
+
+ if ((end - start) == 0) {
+ if (in == null)
+ return -1;
+ int n = in.realReadBytes( buff, 0, buff.length );
+ if (n < 0)
+ return -1;
+ }
+
+ return (buff[start++]);
+
+ }
+
+
public int substract( byte src[], int off, int len )
throws IOException {
Modified: tomcat/tc7.0.x/trunk/java/org/apache/tomcat/util/buf/C2BConverter.java
URL:
http://svn.apache.org/viewvc/tomcat/tc7.0.x/trunk/java/org/apache/tomcat/util/buf/C2BConverter.java?rev=1452791&r1=1452790&r2=1452791&view=diff
==============================================================================
--- tomcat/tc7.0.x/trunk/java/org/apache/tomcat/util/buf/C2BConverter.java
(original)
+++ tomcat/tc7.0.x/trunk/java/org/apache/tomcat/util/buf/C2BConverter.java Tue
Mar 5 13:41:19 2013
@@ -16,295 +16,112 @@
*/
package org.apache.tomcat.util.buf;
-import java.io.BufferedWriter;
import java.io.IOException;
-import java.io.OutputStream;
-import java.io.OutputStreamWriter;
-import java.nio.charset.Charset;
-
-import org.apache.juli.logging.Log;
-import org.apache.juli.logging.LogFactory;
-import org.apache.tomcat.util.res.StringManager;
+import java.nio.ByteBuffer;
+import java.nio.CharBuffer;
+import java.nio.charset.CharsetEncoder;
+import java.nio.charset.CoderResult;
+import java.nio.charset.CodingErrorAction;
/**
- * Efficient conversion of character to bytes.
- *
- * This uses the standard JDK mechanism - a writer - but provides mechanisms to
- * recycle all the objects that are used. Input is buffered to improve
- * performance.
+ * NIO based character encoder.
*/
public final class C2BConverter {
- private static final Log log = LogFactory.getLog(C2BConverter.class);
- private static final StringManager sm =
- StringManager.getManager(Constants.Package);
-
- private final String encoding;
- private BufferedWriter writer;
- private WriteConvertor conv;
- private IntermediateOutputStream ios;
- private ByteChunk bb;
+ protected CharsetEncoder encoder = null;
+ protected ByteBuffer bb = null;
+ protected CharBuffer cb = null;
/**
- * Create a converter, with bytes going to a byte buffer.
+ * Leftover buffer used for multi-characters characters.
*/
- public C2BConverter(ByteChunk output, String encoding) throws IOException {
- this.bb = output;
- this.encoding = encoding;
- init();
- }
+ protected CharBuffer leftovers = null;
- /**
- * Create a converter
- * @deprecated Unused. Will be removed in Tomcat 8.0.x onwards.
- */
- @Deprecated
public C2BConverter(String encoding) throws IOException {
- this(new ByteChunk(1024), encoding);
- }
-
- /**
- * @deprecated Unused. Will be removed in Tomcat 8.0.x onwards.
- */
- @Deprecated
- public ByteChunk getByteChunk() {
- return bb;
+ encoder = B2CConverter.getCharset(encoding).newEncoder();
+ // FIXME: See if unmappable/malformed behavior configuration is needed
+ // in practice
+ encoder.onUnmappableCharacter(CodingErrorAction.REPLACE)
+ .onMalformedInput(CodingErrorAction.REPLACE);
+ char[] left = new char[4];
+ leftovers = CharBuffer.wrap(left);
}
/**
- * @deprecated Unused. Will be removed in Tomcat 8.0.x onwards.
+ * Reset the encoder state.
*/
- @Deprecated
- public String getEncoding() {
- return encoding;
+ public void recycle() {
+ encoder.reset();
+ leftovers.position(0);
}
- /**
- * @deprecated Unused. Will be removed in Tomcat 8.0.x onwards.
- */
- @Deprecated
- public void setByteChunk(ByteChunk bb) {
- this.bb=bb;
- ios.setByteChunk( bb );
- }
-
- /**
- * Reset the internal state, empty the buffers.
- * The encoding remain in effect, the internal buffers remain allocated.
- */
- public final void recycle() {
- // Disable any output
- ios.disable();
- // Flush out the BufferedWriter and WriteConvertor
- try {
- writer.flush();
- } catch (IOException e) {
- log.warn(sm.getString("c2bConverter.recycleFailed"), e);
- try {
- init();
- } catch (IOException ignore) {
- // Should never happen since this means encoding is invalid and
- // in that case, the constructor will have failed.
- }
- }
- // Re-enable ready for re-use
- ios.enable();
- bb.recycle();
- }
-
- private void init() throws IOException {
- ios = new IntermediateOutputStream(bb);
- conv = new WriteConvertor(ios, B2CConverter.getCharset(encoding));
- writer = new BufferedWriter(conv);
- }
-
- /**
- * Generate the bytes using the specified encoding.
- */
- public final void convert(char c[], int off, int len) throws IOException {
- writer.write(c, off, len);
+ public boolean isUndeflow() {
+ return (leftovers.position() > 0);
}
/**
- * Generate the bytes using the specified encoding.
+ * Convert the given characters to bytes.
+ *
+ * @param cc char input
+ * @param bc byte output
*/
- public final void convert(String s, int off, int len) throws IOException {
- writer.write(s, off, len);
- }
-
- /**
- * Generate the bytes using the specified encoding.
- */
- public final void convert(String s) throws IOException {
- writer.write(s);
- }
-
- /**
- * Generate the bytes using the specified encoding.
- */
- public final void convert(char c) throws IOException {
- writer.write(c);
- }
-
- /**
- * Convert a message bytes chars to bytes
- * @deprecated Unused. Will be removed in Tomcat 8.0.x onwards.
- */
- @Deprecated
- public final void convert(MessageBytes mb) throws IOException {
- int type=mb.getType();
- if( type==MessageBytes.T_BYTES ) {
- return;
+ public void convert(CharChunk cc, ByteChunk bc)
+ throws IOException {
+ if ((bb == null) || (bb.array() != bc.getBuffer())) {
+ // Create a new byte buffer if anything changed
+ bb = ByteBuffer.wrap(bc.getBuffer(), bc.getEnd(),
+ bc.getBuffer().length - bc.getEnd());
+ } else {
+ // Initialize the byte buffer
+ bb.limit(bc.getBuffer().length);
+ bb.position(bc.getEnd());
}
- ByteChunk orig=bb;
- setByteChunk( mb.getByteChunk());
- bb.recycle();
- bb.allocate( 32, -1 );
-
- if( type==MessageBytes.T_STR ) {
- convert( mb.getString() );
- // System.out.println("XXX Converting " + mb.getString() );
- } else if( type==MessageBytes.T_CHARS ) {
- CharChunk charC=mb.getCharChunk();
- convert( charC.getBuffer(),
- charC.getOffset(), charC.getLength());
- //System.out.println("XXX Converting " + mb.getCharChunk() );
+ if ((cb == null) || (cb.array() != cc.getBuffer())) {
+ // Create a new char buffer if anything changed
+ cb = CharBuffer.wrap(cc.getBuffer(), cc.getStart(),
+ cc.getLength());
} else {
- if (log.isDebugEnabled()) {
- log.debug("XXX unknowon type " + type );
+ // Initialize the char buffer
+ cb.limit(cc.getEnd());
+ cb.position(cc.getStart());
+ }
+ CoderResult result = null;
+ // Parse leftover if any are present
+ if (leftovers.position() > 0) {
+ int pos = bb.position();
+ // Loop until one char is encoded or there is a encoder error
+ do {
+ leftovers.put((char) cc.substract());
+ leftovers.flip();
+ result = encoder.encode(leftovers, bb, false);
+ leftovers.position(leftovers.limit());
+ leftovers.limit(leftovers.array().length);
+ } while (result.isUnderflow() && (bb.position() == pos));
+ if (result.isError() || result.isMalformed()) {
+ result.throwException();
}
+ cb.position(cc.getStart());
+ leftovers.position(0);
}
- flushBuffer();
- //System.out.println("C2B: XXX " + bb.getBuffer() + bb.getLength());
- setByteChunk(orig);
- }
-
- /**
- * Flush any internal buffers into the ByteOutput or the internal byte[].
- */
- public final void flushBuffer() throws IOException {
- writer.flush();
- }
-
-}
-
-// -------------------- Private implementation --------------------
-/**
- * Special writer class, where close() is overridden. The default
implementation
- * would set byteOutputter to null, and the writer can't be recycled.
- *
- * Note that the flush method will empty the internal buffers _and_ call
- * flush on the output stream - that's why we use an intermediary output stream
- * that overrides flush(). The idea is to have full control: flushing the
- * char->byte converter should be independent of flushing the OutputStream.
- *
- * When a WriteConverter is created, it'll allocate one or 2 byte buffers,
- * with a 8k size that can't be changed ( at least in JDK1.1 -> 1.4 ). It would
- * also allocate a ByteOutputter or equivalent - again some internal buffers.
- *
- * It is essential to keep this object around and reuse it. You can use either
- * pools or per thread data - but given that in most cases a converter will be
- * needed for every thread and most of the time only 1 ( or 2 ) encodings will
- * be used, it is far better to keep it per thread and eliminate the pool
- * overhead too.
- */
- final class WriteConvertor extends OutputStreamWriter {
-
- /**
- * Create a converter.
- */
- public WriteConvertor(IntermediateOutputStream out, Charset charset) {
- super(out, charset);
- }
-
- /**
- * This is a NOOP.
- */
- @Override
- public final void close() throws IOException {
- // NOTHING
- // Calling super.close() would reset out and cb.
- }
-
- /**
- * Flush the characters only.
- */
- @Override
- public final void flush() throws IOException {
- // Will flushBuffer and out()
- // flushBuffer put any remaining chars in the byte[]
- super.flush();
- }
-
- @Override
- public final void write(char cbuf[], int off, int len) throws IOException {
- // Will do the conversion and call write on the output stream
- super.write( cbuf, off, len );
- }
-}
-
-
-/**
- * Special output stream where close() is overridden, so super.close()
- * is never called.
- *
- * This allows recycling. It can also be disabled, so callbacks will
- * not be called if recycling the converter and if data was not flushed.
- */
-final class IntermediateOutputStream extends OutputStream {
- private ByteChunk tbuff;
- private boolean enabled = true;
-
- public IntermediateOutputStream(ByteChunk tbuff) {
- this.tbuff=tbuff;
- }
-
- @Override
- public final void close() throws IOException {
- // shouldn't be called - we filter it out in writer
- throw new IOException("close() called - shouldn't happen ");
- }
-
- @Override
- public final void flush() throws IOException {
- // nothing - write will go directly to the buffer,
- // we don't keep any state
- }
-
- @Override
- public final void write(byte cbuf[], int off, int len) throws IOException {
- // will do the conversion and call write on the output stream
- if( enabled ) {
- tbuff.append( cbuf, off, len );
+ // Do the decoding and get the results into the byte chunk and the char
+ // chunk
+ result = encoder.encode(cb, bb, false);
+ if (result.isError() || result.isMalformed()) {
+ result.throwException();
+ } else if (result.isOverflow()) {
+ // Propagate current positions to the byte chunk and char chunk
+ bc.setEnd(bb.position());
+ cc.setOffset(cb.position());
+ } else if (result.isUnderflow()) {
+ // Propagate current positions to the byte chunk and char chunk
+ bc.setEnd(bb.position());
+ cc.setOffset(cb.position());
+ // Put leftovers in the leftovers char buffer
+ if (cc.getLength() > 0) {
+ leftovers.limit(leftovers.array().length);
+ leftovers.position(cc.getLength());
+ cc.substract(leftovers.array(), 0, cc.getLength());
+ }
}
}
-
- @Override
- public final void write(int i) throws IOException {
- throw new IOException("write( int ) called - shouldn't happen ");
- }
-
- // -------------------- Internal methods --------------------
- /**
- * @deprecated Unused. Will be removed in Tomcat 8.0.x onwards.
- */
- @Deprecated
- void setByteChunk(ByteChunk bb) {
- tbuff = bb;
- }
-
- /**
- * Temporary disable - this is used to recycle the converter without
- * generating an output if the buffers were not flushed.
- */
- final void disable() {
- enabled = false;
- }
-
- /**
- * Re-enable - used to recycle the converter.
- */
- final void enable() {
- enabled = true;
- }
}
Modified: tomcat/tc7.0.x/trunk/java/org/apache/tomcat/util/buf/CharChunk.java
URL:
http://svn.apache.org/viewvc/tomcat/tc7.0.x/trunk/java/org/apache/tomcat/util/buf/CharChunk.java?rev=1452791&r1=1452790&r2=1452791&view=diff
==============================================================================
--- tomcat/tc7.0.x/trunk/java/org/apache/tomcat/util/buf/CharChunk.java
(original)
+++ tomcat/tc7.0.x/trunk/java/org/apache/tomcat/util/buf/CharChunk.java Tue Mar
5 13:41:19 2013
@@ -468,7 +468,7 @@ public final class CharChunk implements
/** Make space for len chars. If len is small, allocate
* a reserve space too. Never grow bigger than limit.
*/
- private void makeSpace(int count)
+ public void makeSpace(int count)
{
char[] tmp = null;
Modified: tomcat/tc7.0.x/trunk/java/org/apache/tomcat/util/buf/UEncoder.java
URL:
http://svn.apache.org/viewvc/tomcat/tc7.0.x/trunk/java/org/apache/tomcat/util/buf/UEncoder.java?rev=1452791&r1=1452790&r2=1452791&view=diff
==============================================================================
--- tomcat/tc7.0.x/trunk/java/org/apache/tomcat/util/buf/UEncoder.java
(original)
+++ tomcat/tc7.0.x/trunk/java/org/apache/tomcat/util/buf/UEncoder.java Tue Mar
5 13:41:19 2013
@@ -16,9 +16,7 @@
*/
package org.apache.tomcat.util.buf;
-import java.io.CharArrayWriter;
import java.io.IOException;
-import java.io.Writer;
import java.util.BitSet;
/** Efficient implementation for encoders.
@@ -33,14 +31,13 @@ import java.util.BitSet;
*/
public final class UEncoder {
- private static final org.apache.juli.logging.Log log=
- org.apache.juli.logging.LogFactory.getLog(UEncoder.class );
-
// Not static - the set may differ ( it's better than adding
// an extra check for "/", "+", etc
private BitSet safeChars=null;
private C2BConverter c2b=null;
private ByteChunk bb=null;
+ private CharChunk cb=null;
+ private CharChunk output=null;
private String encoding="UTF8";
@@ -62,92 +59,65 @@ public final class UEncoder {
/** URL Encode string, using a specified encoding.
- *
- * @param buf The writer
- * @param s string to be encoded
- * @throws IOException If an I/O error occurs
- */
- public void urlEncode( Writer buf, String s )
- throws IOException {
- if( c2b==null ) {
- bb=new ByteChunk(16); // small enough.
- c2b=new C2BConverter( bb, encoding );
- }
-
- for (int i = 0; i < s.length(); i++) {
- int c = s.charAt(i);
- if( safeChars.get( c ) ) {
- if(log.isDebugEnabled()) {
- log.debug("Encoder: Safe: " + (char)c);
- }
- buf.write((char)c);
- } else {
- if(log.isDebugEnabled()) {
- log.debug("Encoder: Unsafe: " + (char)c);
- }
- c2b.convert( (char)c );
-
- // "surrogate" - UTF is _not_ 16 bit, but 21 !!!!
- // ( while UCS is 31 ). Amazing...
- if (c >= 0xD800 && c <= 0xDBFF) {
- if ( (i+1) < s.length()) {
- int d = s.charAt(i+1);
- if (d >= 0xDC00 && d <= 0xDFFF) {
- if(log.isDebugEnabled()) {
- log.debug("Encoder: Unsafe: " + c);
- }
- c2b.convert( (char)d);
- i++;
- }
- }
- }
-
- c2b.flushBuffer();
-
- urlEncode( buf, bb.getBuffer(), bb.getOffset(),
- bb.getLength() );
- bb.recycle();
- }
- }
- }
-
- /**
- */
- public void urlEncode( Writer buf, byte bytes[], int off, int len)
- throws IOException {
- for( int j=off; j< len; j++ ) {
- buf.write( '%' );
- char ch = Character.forDigit((bytes[j] >> 4) & 0xF, 16);
- if(log.isDebugEnabled()) {
- log.debug("Encoder: Encode: " + ch);
- }
- buf.write(ch);
- ch = Character.forDigit(bytes[j] & 0xF, 16);
- if(log.isDebugEnabled()) {
- log.debug("Encoder: Encode: " + ch);
- }
- buf.write(ch);
- }
- }
-
- /**
- * Utility function to re-encode the URL.
- * Still has problems with charset, since UEncoder mostly
- * ignores it.
- */
- public String encodeURL(String uri) {
- String outUri=null;
- try {
- // XXX optimize - recycle, etc
- CharArrayWriter out = new CharArrayWriter();
- urlEncode(out, uri);
- outUri=out.toString();
- } catch (IOException iex) {
- }
- return outUri;
- }
-
-
+ *
+ * @param buf The writer
+ * @param s string to be encoded
+ * @throws IOException If an I/O error occurs
+ */
+ public CharChunk encodeURL(String s, int start, int end)
+ throws IOException {
+ if (c2b == null) {
+ bb = new ByteChunk(8); // small enough.
+ cb = new CharChunk(2); // small enough.
+ output = new CharChunk(64); // small enough.
+ c2b = new C2BConverter(encoding);
+ } else {
+ bb.recycle();
+ cb.recycle();
+ }
+
+ for (int i = start; i < end; i++) {
+ char c = s.charAt(i);
+ if (safeChars.get(c)) {
+ output.append(c);
+ } else {
+ cb.append(c);
+ c2b.convert(cb, bb);
+
+ // "surrogate" - UTF is _not_ 16 bit, but 21 !!!!
+ // ( while UCS is 31 ). Amazing...
+ if (c >= 0xD800 && c <= 0xDBFF) {
+ if ((i+1) < end) {
+ char d = s.charAt(i+1);
+ if (d >= 0xDC00 && d <= 0xDFFF) {
+ cb.append(d);
+ c2b.convert(cb, bb);
+ i++;
+ }
+ }
+ }
+
+ urlEncode(output, bb);
+ cb.recycle();
+ bb.recycle();
+ }
+ }
+
+ return output;
+ }
+
+ protected void urlEncode(CharChunk out, ByteChunk bb)
+ throws IOException {
+ byte[] bytes = bb.getBuffer();
+ for (int j = bb.getStart(); j < bb.getEnd(); j++) {
+ out.append('%');
+ char ch = Character.forDigit((bytes[j] >> 4) & 0xF, 16);
+ out.append(ch);
+ ch = Character.forDigit(bytes[j] & 0xF, 16);
+ out.append(ch);
+ }
+ }
+
// -------------------- Internal implementation --------------------
private void initSafeChars() {
Modified:
tomcat/tc7.0.x/trunk/test/org/apache/catalina/websocket/TestWebSocket.java
URL:
http://svn.apache.org/viewvc/tomcat/tc7.0.x/trunk/test/org/apache/catalina/websocket/TestWebSocket.java?rev=1452791&r1=1452790&r2=1452791&view=diff
==============================================================================
--- tomcat/tc7.0.x/trunk/test/org/apache/catalina/websocket/TestWebSocket.java
(original)
+++ tomcat/tc7.0.x/trunk/test/org/apache/catalina/websocket/TestWebSocket.java
Tue Mar 5 13:41:19 2013
@@ -372,9 +372,10 @@ public class TestWebSocket extends Tomca
private void sendMessage(String message, boolean finalFragment)
throws IOException {
ByteChunk bc = new ByteChunk(8192);
- C2BConverter c2b = new C2BConverter(bc, "UTF-8");
- c2b.convert(message);
- c2b.flushBuffer();
+ CharChunk cc = new CharChunk(8192);
+ C2BConverter c2b = new C2BConverter("UTF-8");
+ cc.append(message);
+ c2b.convert(cc, bc);
int len = bc.getLength();
assertTrue(len < 126);
@@ -427,7 +428,7 @@ public class TestWebSocket extends Tomca
bc.setEnd(len);
B2CConverter b2c = new B2CConverter("UTF-8");
- b2c.convert(bc, cc, len);
+ b2c.convert(bc, cc);
return cc.toString();
}
Modified:
tomcat/tc7.0.x/trunk/test/org/apache/tomcat/util/buf/TestB2CConverter.java
URL:
http://svn.apache.org/viewvc/tomcat/tc7.0.x/trunk/test/org/apache/tomcat/util/buf/TestB2CConverter.java?rev=1452791&r1=1452790&r2=1452791&view=diff
==============================================================================
--- tomcat/tc7.0.x/trunk/test/org/apache/tomcat/util/buf/TestB2CConverter.java
(original)
+++ tomcat/tc7.0.x/trunk/test/org/apache/tomcat/util/buf/TestB2CConverter.java
Tue Mar 5 13:41:19 2013
@@ -16,6 +16,8 @@
*/
package org.apache.tomcat.util.buf;
+import java.nio.charset.Charset;
+
import org.junit.Assert;
import org.junit.Test;
@@ -48,8 +50,7 @@ public class TestB2CConverter {
for (int i = 0; i < msgCount; i++) {
bc.append(UTF16_MESSAGE, 0, UTF16_MESSAGE.length);
- // Note: The limit is the number of characters to read
- conv.convert(bc, cc, 3);
+ conv.convert(bc, cc);
Assert.assertEquals("ABC", cc.toString());
bc.recycle();
cc.recycle();
@@ -58,4 +59,28 @@ public class TestB2CConverter {
System.out.println(cc);
}
+
+ @Test
+ public void testLeftoverSize() {
+ float maxLeftover = 0;
+ for (Charset charset : Charset.availableCharsets().values()) {
+ float leftover;
+ if (charset.name().toLowerCase().startsWith("x-")) {
+ // Non-standard charset that browsers won't be using
+ // Likely something used internally by the JRE
+ continue;
+ }
+ try {
+ leftover = charset.newEncoder().maxBytesPerChar();
+ } catch (UnsupportedOperationException uoe) {
+ // Skip it
+ continue;
+ }
+ if (leftover > maxLeftover) {
+ maxLeftover = leftover;
+ }
+ }
+ Assert.assertTrue("Limit needs to be at least " + maxLeftover,
+ maxLeftover <= B2CConverter.LEFTOVER_SIZE);
+ }
}
Modified: tomcat/tc7.0.x/trunk/webapps/docs/changelog.xml
URL:
http://svn.apache.org/viewvc/tomcat/tc7.0.x/trunk/webapps/docs/changelog.xml?rev=1452791&r1=1452790&r2=1452791&view=diff
==============================================================================
--- tomcat/tc7.0.x/trunk/webapps/docs/changelog.xml (original)
+++ tomcat/tc7.0.x/trunk/webapps/docs/changelog.xml Tue Mar 5 13:41:19 2013
@@ -98,6 +98,13 @@
</fix>
</changelog>
</subsection>
+ <subsection name="Coyote">
+ <changelog>
+ <scode>
+ Refactor char encoding/decoding using NIO APIs. (remm)
+ </scode>
+ </changelog>
+ </subsection>
<subsection name="Web applications">
<changelog>
<fix>
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]