This is an automated email from the ASF dual-hosted git repository. tallison pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/tika.git
commit a31ed0d995e16d67f510d0ffdaa1d890e1df2da1 Author: tballison <talli...@mitre.org> AuthorDate: Thu Apr 20 15:56:47 2017 -0400 TIKA-2331 -- more opportunities to check the alleged length of a byte[] --- .../apache/tika/parser/rtf/RTFEmbObjHandler.java | 7 +++--- .../apache/tika/parser/rtf/RTFObjDataParser.java | 28 ++++++++++++++++------ .../java/org/apache/tika/parser/rtf/RTFParser.java | 2 +- 3 files changed, 25 insertions(+), 12 deletions(-) diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/rtf/RTFEmbObjHandler.java b/tika-parsers/src/main/java/org/apache/tika/parser/rtf/RTFEmbObjHandler.java index 42900fc..bac828f 100644 --- a/tika-parsers/src/main/java/org/apache/tika/parser/rtf/RTFEmbObjHandler.java +++ b/tika-parsers/src/main/java/org/apache/tika/parser/rtf/RTFEmbObjHandler.java @@ -151,9 +151,9 @@ class RTFEmbObjHandler { if (len < 0) { throw new TikaException("Requesting I read < 0 bytes ?!"); } - if (len > memoryLimitInKb) { + if (len > memoryLimitInKb*1024) { throw new TikaMemoryLimitException("File embedded in RTF caused this (" + len + - ") bytes), but maximum allowed is ("+memoryLimitInKb+")."+ + ") bytes), but maximum allowed is ("+(memoryLimitInKb*1024)+")."+ "If this is a valid RTF file, consider increasing the memory limit via TikaConfig."); } @@ -171,10 +171,9 @@ class RTFEmbObjHandler { */ protected void handleCompletedObject() throws IOException, SAXException, TikaException { - byte[] bytes = os.toByteArray(); if (state == EMB_STATE.OBJDATA) { - RTFObjDataParser objParser = new RTFObjDataParser(); + RTFObjDataParser objParser = new RTFObjDataParser(memoryLimitInKb); try { byte[] objBytes = objParser.parse(bytes, metadata, unknownFilenameCount); extractObj(objBytes, handler, metadata); diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/rtf/RTFObjDataParser.java b/tika-parsers/src/main/java/org/apache/tika/parser/rtf/RTFObjDataParser.java index b878dd2..a43c789 100644 --- a/tika-parsers/src/main/java/org/apache/tika/parser/rtf/RTFObjDataParser.java +++ b/tika-parsers/src/main/java/org/apache/tika/parser/rtf/RTFObjDataParser.java @@ -36,6 +36,8 @@ import org.apache.poi.poifs.filesystem.NPOIFSFileSystem; import org.apache.poi.poifs.filesystem.Ole10Native; import org.apache.poi.poifs.filesystem.Ole10NativeException; import org.apache.poi.util.IOUtils; +import org.apache.tika.exception.TikaException; +import org.apache.tika.exception.TikaMemoryLimitException; import org.apache.tika.extractor.EmbeddedDocumentUtil; import org.apache.tika.io.EndianUtils; import org.apache.tika.io.TikaInputStream; @@ -52,7 +54,11 @@ import org.apache.tika.parser.microsoft.OfficeParser.POIFSDocumentType; class RTFObjDataParser { private final static String WIN_ASCII = "WINDOWS-1252"; + private final int memoryLimitInKb; + RTFObjDataParser(int memoryLimitInKb) { + this.memoryLimitInKb = memoryLimitInKb; + } /** * Parses the embedded object/pict string * @@ -71,7 +77,7 @@ class RTFObjDataParser { * @throws IOException */ protected byte[] parse(byte[] bytes, Metadata metadata, AtomicInteger unknownFilenameCount) - throws IOException { + throws IOException, TikaException { ByteArrayInputStream is = new ByteArrayInputStream(bytes); long version = readUInt(is); metadata.add(RTFMetadata.EMB_APP_VERSION, Long.toString(version)); @@ -192,7 +198,7 @@ class RTFObjDataParser { * can return null if there is a linked object * instead of an embedded file */ - private byte[] handlePackage(byte[] pkgBytes, Metadata metadata) throws IOException { + private byte[] handlePackage(byte[] pkgBytes, Metadata metadata) throws IOException, TikaException { //now parse the package header ByteArrayInputStream is = new ByteArrayInputStream(pkgBytes); readUShort(is); @@ -288,7 +294,7 @@ class RTFObjDataParser { return sb.toString(); } - private String readLengthPrefixedAnsiString(InputStream is) throws IOException { + private String readLengthPrefixedAnsiString(InputStream is) throws IOException, TikaException { long len = readUInt(is); byte[] bytes = readBytes(is, len); try { @@ -300,17 +306,25 @@ class RTFObjDataParser { } - private byte[] readBytes(InputStream is, long len) throws IOException { + private byte[] readBytes(InputStream is, long len) throws IOException, TikaException { //initByteArray tests for "reading of too many bytes" byte[] bytes = initByteArray(len); IOUtils.readFully(is, bytes); return bytes; } - private byte[] initByteArray(long len) throws IOException { - if (len < 0 || len > RTFParser.getMaxBytesForEmbeddedObject()) { - throw new IOException("Requested length for reading bytes is out of bounds: " + len); + private byte[] initByteArray(long len) throws IOException, TikaException { + if (len < 0) { + throw new IOException("Requested length for reading bytes < 0?!: " + len); + } else if (memoryLimitInKb > -1 && len > memoryLimitInKb*1024) { + throw new TikaMemoryLimitException("File embedded in RTF caused this (" + len + + ") bytes), but maximum allowed is ("+(memoryLimitInKb*1024)+")."+ + "If this is a valid RTF file, consider increasing the memory limit via TikaConfig."); + } else if (len > Integer.MAX_VALUE) { + throw new TikaMemoryLimitException("File embedded in RTF caused this (" + len + + ") bytes), but there is a hard limit of Integer.MAX_VALUE+"); } + return new byte[(int) len]; } diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/rtf/RTFParser.java b/tika-parsers/src/main/java/org/apache/tika/parser/rtf/RTFParser.java index 567a7a8..a553dc0 100644 --- a/tika-parsers/src/main/java/org/apache/tika/parser/rtf/RTFParser.java +++ b/tika-parsers/src/main/java/org/apache/tika/parser/rtf/RTFParser.java @@ -83,7 +83,7 @@ public class RTFParser extends AbstractParser { } @Field - private int memoryLimitInKb = EMB_OBJ_MAX_BYTES; + private int memoryLimitInKb = EMB_OBJ_MAX_BYTES/1024; public void parse( InputStream stream, ContentHandler handler, -- To stop receiving notification emails like this one, please contact "commits@tika.apache.org" <commits@tika.apache.org>.