This is an automated email from the ASF dual-hosted git repository.

tallison pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/tika.git

commit a31ed0d995e16d67f510d0ffdaa1d890e1df2da1
Author: tballison <talli...@mitre.org>
AuthorDate: Thu Apr 20 15:56:47 2017 -0400

    TIKA-2331 -- more opportunities to check the alleged length of a byte[]
---
 .../apache/tika/parser/rtf/RTFEmbObjHandler.java   |  7 +++---
 .../apache/tika/parser/rtf/RTFObjDataParser.java   | 28 ++++++++++++++++------
 .../java/org/apache/tika/parser/rtf/RTFParser.java |  2 +-
 3 files changed, 25 insertions(+), 12 deletions(-)

diff --git 
a/tika-parsers/src/main/java/org/apache/tika/parser/rtf/RTFEmbObjHandler.java 
b/tika-parsers/src/main/java/org/apache/tika/parser/rtf/RTFEmbObjHandler.java
index 42900fc..bac828f 100644
--- 
a/tika-parsers/src/main/java/org/apache/tika/parser/rtf/RTFEmbObjHandler.java
+++ 
b/tika-parsers/src/main/java/org/apache/tika/parser/rtf/RTFEmbObjHandler.java
@@ -151,9 +151,9 @@ class RTFEmbObjHandler {
         if (len < 0) {
             throw new TikaException("Requesting I read < 0 bytes ?!");
         }
-        if (len > memoryLimitInKb) {
+        if (len > memoryLimitInKb*1024) {
             throw new TikaMemoryLimitException("File embedded in RTF caused 
this (" + len +
-                    ") bytes), but maximum allowed is ("+memoryLimitInKb+")."+
+                    ") bytes), but maximum allowed is 
("+(memoryLimitInKb*1024)+")."+
                     "If this is a valid RTF file, consider increasing the 
memory limit via TikaConfig.");
         }
 
@@ -171,10 +171,9 @@ class RTFEmbObjHandler {
      */
     protected void handleCompletedObject() throws IOException, SAXException, 
TikaException {
 
-
         byte[] bytes = os.toByteArray();
         if (state == EMB_STATE.OBJDATA) {
-            RTFObjDataParser objParser = new RTFObjDataParser();
+            RTFObjDataParser objParser = new RTFObjDataParser(memoryLimitInKb);
             try {
                 byte[] objBytes = objParser.parse(bytes, metadata, 
unknownFilenameCount);
                 extractObj(objBytes, handler, metadata);
diff --git 
a/tika-parsers/src/main/java/org/apache/tika/parser/rtf/RTFObjDataParser.java 
b/tika-parsers/src/main/java/org/apache/tika/parser/rtf/RTFObjDataParser.java
index b878dd2..a43c789 100644
--- 
a/tika-parsers/src/main/java/org/apache/tika/parser/rtf/RTFObjDataParser.java
+++ 
b/tika-parsers/src/main/java/org/apache/tika/parser/rtf/RTFObjDataParser.java
@@ -36,6 +36,8 @@ import org.apache.poi.poifs.filesystem.NPOIFSFileSystem;
 import org.apache.poi.poifs.filesystem.Ole10Native;
 import org.apache.poi.poifs.filesystem.Ole10NativeException;
 import org.apache.poi.util.IOUtils;
+import org.apache.tika.exception.TikaException;
+import org.apache.tika.exception.TikaMemoryLimitException;
 import org.apache.tika.extractor.EmbeddedDocumentUtil;
 import org.apache.tika.io.EndianUtils;
 import org.apache.tika.io.TikaInputStream;
@@ -52,7 +54,11 @@ import 
org.apache.tika.parser.microsoft.OfficeParser.POIFSDocumentType;
 class RTFObjDataParser {
 
     private final static String WIN_ASCII = "WINDOWS-1252";
+    private final int memoryLimitInKb;
 
+    RTFObjDataParser(int memoryLimitInKb) {
+        this.memoryLimitInKb = memoryLimitInKb;
+    }
     /**
      * Parses the embedded object/pict string
      *
@@ -71,7 +77,7 @@ class RTFObjDataParser {
      * @throws IOException
      */
     protected byte[] parse(byte[] bytes, Metadata metadata, AtomicInteger 
unknownFilenameCount)
-            throws IOException {
+            throws IOException, TikaException {
         ByteArrayInputStream is = new ByteArrayInputStream(bytes);
         long version = readUInt(is);
         metadata.add(RTFMetadata.EMB_APP_VERSION, Long.toString(version));
@@ -192,7 +198,7 @@ class RTFObjDataParser {
      * can return null if there is a linked object
      * instead of an embedded file
      */
-    private byte[] handlePackage(byte[] pkgBytes, Metadata metadata) throws 
IOException {
+    private byte[] handlePackage(byte[] pkgBytes, Metadata metadata) throws 
IOException, TikaException {
         //now parse the package header
         ByteArrayInputStream is = new ByteArrayInputStream(pkgBytes);
         readUShort(is);
@@ -288,7 +294,7 @@ class RTFObjDataParser {
         return sb.toString();
     }
 
-    private String readLengthPrefixedAnsiString(InputStream is) throws 
IOException {
+    private String readLengthPrefixedAnsiString(InputStream is) throws 
IOException, TikaException {
         long len = readUInt(is);
         byte[] bytes = readBytes(is, len);
         try {
@@ -300,17 +306,25 @@ class RTFObjDataParser {
     }
 
 
-    private byte[] readBytes(InputStream is, long len) throws IOException {
+    private byte[] readBytes(InputStream is, long len) throws IOException, 
TikaException {
         //initByteArray tests for "reading of too many bytes"
         byte[] bytes = initByteArray(len);
         IOUtils.readFully(is, bytes);
         return bytes;
     }
 
-    private byte[] initByteArray(long len) throws IOException {
-        if (len < 0 || len > RTFParser.getMaxBytesForEmbeddedObject()) {
-            throw new IOException("Requested length for reading bytes is out 
of bounds: " + len);
+    private byte[] initByteArray(long len) throws IOException, TikaException {
+        if (len < 0) {
+            throw new IOException("Requested length for reading bytes < 0?!: " 
+ len);
+        } else if (memoryLimitInKb > -1 && len > memoryLimitInKb*1024) {
+            throw new TikaMemoryLimitException("File embedded in RTF caused 
this (" + len +
+                    ") bytes), but maximum allowed is 
("+(memoryLimitInKb*1024)+")."+
+                    "If this is a valid RTF file, consider increasing the 
memory limit via TikaConfig.");
+        } else if (len > Integer.MAX_VALUE) {
+            throw new TikaMemoryLimitException("File embedded in RTF caused 
this (" + len +
+                    ") bytes), but there is a hard limit of 
Integer.MAX_VALUE+");
         }
+
         return new byte[(int) len];
 
     }
diff --git 
a/tika-parsers/src/main/java/org/apache/tika/parser/rtf/RTFParser.java 
b/tika-parsers/src/main/java/org/apache/tika/parser/rtf/RTFParser.java
index 567a7a8..a553dc0 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/rtf/RTFParser.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/rtf/RTFParser.java
@@ -83,7 +83,7 @@ public class RTFParser extends AbstractParser {
     }
 
     @Field
-    private int memoryLimitInKb = EMB_OBJ_MAX_BYTES;
+    private int memoryLimitInKb = EMB_OBJ_MAX_BYTES/1024;
 
     public void parse(
             InputStream stream, ContentHandler handler,

-- 
To stop receiving notification emails like this one, please contact
"commits@tika.apache.org" <commits@tika.apache.org>.

Reply via email to