[
https://issues.apache.org/jira/browse/XERCESJ-1398?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=12765213#action_12765213
]
Karl Wright commented on XERCESJ-1398:
--------------------------------------
The patch I made was the following:
Index: org/apache/xerces/impl/XMLEntityManager.java
===================================================================
--- org/apache/xerces/impl/XMLEntityManager.java (revision 101962)
+++ org/apache/xerces/impl/XMLEntityManager.java (working copy)
@@ -1013,7 +1013,8 @@
}
}
// wrap this stream in RewindableInputStream
- stream = new RewindableInputStream(stream);
+ RewindableInputStream rstream = new RewindableInputStream(stream);
+ stream = rstream;
// perform auto-detect of encoding if necessary
if (encoding == null) {
@@ -1041,9 +1042,11 @@
stream.skip(3);
}
}
+ rstream.stopBuffering();
reader = createReader(stream, encoding, isBigEndian);
}
else {
+ rstream.stopBuffering();
reader = createReader(stream, encoding, isBigEndian);
}
}
@@ -1070,6 +1073,7 @@
else {
stream.reset();
}
+ rstream.stopBuffering();
reader = createReader(stream, encoding, isBigEndian);
}
// If encoding is UTF-16, we still need to read the first four
bytes
@@ -1113,6 +1117,7 @@
}
}
}
+ rstream.stopBuffering();
reader = createReader(stream, utf16Encoding, isBigEndian);
}
// If encoding is UCS-4, we still need to read the first four
bytes
@@ -1138,6 +1143,7 @@
isBigEndian = Boolean.FALSE;
}
}
+ rstream.stopBuffering();
reader = createReader(stream, encoding, isBigEndian);
}
// If encoding is UCS-2, we still need to read the first four
bytes
@@ -1162,9 +1168,11 @@
isBigEndian = Boolean.FALSE;
}
}
+ rstream.stopBuffering();
reader = createReader(stream, encoding, isBigEndian);
}
else {
+ rstream.stopBuffering();
reader = createReader(stream, encoding, isBigEndian);
}
}
@@ -3043,6 +3051,7 @@
private int fOffset;
private int fLength;
private int fMark;
+ private boolean bufferingEnabled = true;
public RewindableInputStream(InputStream is) {
fData = new byte[DEFAULT_XMLDECL_BUFFER_SIZE];
@@ -3054,11 +3063,17 @@
fMark = 0;
}
+ public void stopBuffering() {
+ bufferingEnabled = false;
+ }
+
public void setStartOffset(int offset) {
fStartOffset = offset;
}
public void rewind() {
+ if (!bufferingEnabled)
+ throw new IllegalStateException("Can't rewind input stream
when buffering not enabled!");
fOffset = fStartOffset;
}
@@ -3070,7 +3085,7 @@
if (fOffset == fEndOffset) {
return -1;
}
- if (fOffset == fData.length) {
+ if (bufferingEnabled && fOffset == fData.length) {
byte[] newData = new byte[fOffset << 1];
System.arraycopy(fData, 0, newData, 0, fOffset);
fData = newData;
@@ -3080,42 +3095,60 @@
fEndOffset = fOffset;
return -1;
}
- fData[fLength++] = (byte)b;
+ if (fOffset < fData.length)
+ fData[fLength++] = (byte)b;
fOffset++;
return b & 0xff;
}
public int read(byte[] b, int off, int len) throws IOException {
- int bytesLeft = fLength - fOffset;
- if (bytesLeft == 0) {
- if (fOffset == fEndOffset) {
- return -1;
+ // Read out of the buffer until we can't any more
+ int returnLen = 0;
+ if (fOffset < fLength) {
+ int bytesLeft = fLength - fOffset;
+ if (len < bytesLeft) {
+ if (len <= 0) {
+ return 0;
+ }
+ bytesLeft = len;
}
- // better get some more for the voracious reader...
- if(fCurrentEntity.mayReadChunks) {
- return fInputStream.read(b, off, len);
+ if (b != null) {
+ System.arraycopy(fData, fOffset, b, off, bytesLeft);
}
- int returnedVal = read();
- if(returnedVal == -1) {
- fEndOffset = fOffset;
+ fOffset += bytesLeft;
+ returnLen += bytesLeft;
+ off += bytesLeft;
+ len -= bytesLeft;
+ }
+ // We're at the end of the buffer.
+ if (fOffset == fEndOffset) {
+ if (returnLen == 0)
return -1;
- }
- b[off] = (byte)returnedVal;
- return 1;
+ return returnLen;
}
- if (len < bytesLeft) {
- if (len <= 0) {
- return 0;
+
+ // better get some more for the voracious reader...
+ if(fCurrentEntity.mayReadChunks || !bufferingEnabled) {
+ int lenSeen = fInputStream.read(b, off, len);
+ if (lenSeen == -1) {
+ fEndOffset = fOffset;
+ if (returnLen == 0)
+ return -1;
+ return returnLen;
}
+ fOffset += lenSeen;
+ returnLen += lenSeen;
+ return returnLen;
}
- else {
- len = bytesLeft;
+
+ int returnedVal = read();
+ if(returnedVal == -1) {
+ if (returnLen == 0)
+ return -1;
+ return returnLen;
}
- if (b != null) {
- System.arraycopy(fData, fOffset, b, off, len);
- }
- fOffset += len;
- return len;
+ b[off] = (byte)returnedVal;
+ return 1;
}
public long skip(long n)
@@ -3125,22 +3158,24 @@
if (n <= 0) {
return 0;
}
- bytesLeft = fLength - fOffset;
- if (bytesLeft == 0) {
+ if (fOffset == fEndOffset) {
+ return 0;
+ }
+ long skipAmt = 0L;
+ if (fOffset < fLength)
+ {
+ bytesLeft = fLength - fOffset;
+ if (n <= bytesLeft) {
+ fOffset += n;
+ return n;
+ }
+ fOffset += bytesLeft;
if (fOffset == fEndOffset) {
- return 0;
+ return bytesLeft;
}
- return fInputStream.skip(n);
+ n -= bytesLeft;
+ skipAmt += bytesLeft;
}
- if (n <= bytesLeft) {
- fOffset += n;
- return n;
- }
- fOffset += bytesLeft;
- if (fOffset == fEndOffset) {
- return bytesLeft;
- }
- n -= bytesLeft;
/*
* In a manner of speaking, when this class isn't permitting more
* than one byte at a time to be read, it is "blocking". The
@@ -3149,31 +3184,35 @@
* that bytes in its buffer are available; otherwise, the result of
* available() on the underlying InputStream is appropriate.
*/
- return fInputStream.skip(n) + bytesLeft;
+ return fInputStream.skip(n) + skipAmt;
}
public int available() throws IOException {
- int bytesLeft = fLength - fOffset;
- if (bytesLeft == 0) {
- if (fOffset == fEndOffset) {
- return -1;
- }
- return fCurrentEntity.mayReadChunks ? fInputStream.available()
- : 0;
+ if (fOffset < fLength) {
+ int bytesLeft = fLength - fOffset;
+ return bytesLeft;
}
- return bytesLeft;
+ if (fOffset == fEndOffset) {
+ return -1;
+ }
+ return (fCurrentEntity.mayReadChunks || !bufferingEnabled) ?
fInputStream.available()
+ : 0;
}
public void mark(int howMuch) {
+ if (!bufferingEnabled)
+ throw new IllegalStateException("Mark not supported if
buffering is not enabled");
fMark = fOffset;
}
public void reset() {
+ if (!bufferingEnabled)
+ throw new IllegalStateException("Mark not supported if
buffering is not enabled");
fOffset = fMark;
}
public boolean markSupported() {
- return true;
+ return bufferingEnabled;
}
public void close() throws IOException {
> Supplying document without content-type headers causes entire stream to be
> buffered in memory, even when using SAX API
> ----------------------------------------------------------------------------------------------------------------------
>
> Key: XERCESJ-1398
> URL: https://issues.apache.org/jira/browse/XERCESJ-1398
> Project: Xerces2-J
> Issue Type: Bug
> Components: SAX
> Affects Versions: 2.9.1
> Environment: Debian Linux, Sun JDK 1.5.0_20
> Reporter: Karl Wright
> Fix For: 2.9.1
>
>
> If the parser needs to autodetect the encoding of the input stream, it wraps
> the input stream using the RewindableInputStream class within
> XMLEntityManager. But this class buffers everything that is read from the
> stream, even after the autodetection is complete (and no possibility of
> rewind being used exists anymore). It is therefore trivial to submit XML to
> xerces2-j which causes an "OutOfMemoryError" exception to be thrown, which
> could lead to a denial of service under appropriate conditions.
> The fix I created for this involved adding a method "stopBuffering()" to the
> RewindableInputStream class, which shuts off further buffering by that class.
> I call this method when the encoding has been decided upon (i.e. right
> before createReader is called, everywhere).
--
This message is automatically generated by JIRA.
-
You can reply to this email to add a comment to the issue online.
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]