Author: tallison
Date: Fri May 30 18:23:15 2014
New Revision: 1598693

URL: http://svn.apache.org/r1598693
Log:
TIKA-1305: make RTF list handling slightly more robust against corrupt list 
metadata

Modified:
    tika/trunk/CHANGES.txt
    
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/rtf/TextExtractor.java
    
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/rtf/RTFParserTest.java

Modified: tika/trunk/CHANGES.txt
URL: 
http://svn.apache.org/viewvc/tika/trunk/CHANGES.txt?rev=1598693&r1=1598692&r2=1598693&view=diff
==============================================================================
--- tika/trunk/CHANGES.txt (original)
+++ tika/trunk/CHANGES.txt Fri May 30 18:23:15 2014
@@ -1,5 +1,8 @@
 Release 1.6 - ??/??/2014
 
+  * Made RTFParser's list handling slightly more robust against corrupt
+    list metadata (TIKA-1305)
+
   * Fixed bug in CLI json output (TIKA-1291/TIKA-1310)
 
   * Added ability to turn off image extraction from PDFs (TIKA-1294).

Modified: 
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/rtf/TextExtractor.java
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/rtf/TextExtractor.java?rev=1598693&r1=1598692&r2=1598693&view=diff
==============================================================================
--- 
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/rtf/TextExtractor.java
 (original)
+++ 
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/rtf/TextExtractor.java
 Fri May 30 18:23:15 2014
@@ -883,7 +883,11 @@ final class TextExtractor {
                 } else if (equals("listtemplateid")) {
                     currentList.templateID = param;
                 } else if (equals("levelnfc") || equals("levelnfcn")) {
-                    currentList.numberType[listTableLevel] = param;
+                    //sanity check to make sure list information isn't corrupt
+                    if (listTableLevel > -1 && 
+                        listTableLevel < currentList.numberType.length ) {
+                        currentList.numberType[listTableLevel] = param;
+                    }
                 }
             }
         } else {

Modified: 
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/rtf/RTFParserTest.java
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/rtf/RTFParserTest.java?rev=1598693&r1=1598692&r2=1598693&view=diff
==============================================================================
--- 
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/rtf/RTFParserTest.java
 (original)
+++ 
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/rtf/RTFParserTest.java
 Fri May 30 18:23:15 2014
@@ -413,7 +413,16 @@ public class RTFParserTest extends TikaT
         assertContains("Body", content);
     }
 
-    //TIKA-1010
+    // TIKA-1305
+    @Test
+    public void testCorruptListOverride() throws Exception {
+        Result r = getResult("testRTFCorruptListOverride.rtf");
+        String content = r.text;
+        assertContains("apple", content);
+    }
+
+
+    // TIKA-1010
     @Test
     public void testEmbeddedMonster() throws Exception {
         Set<MediaType> skipTypes = new HashSet<MediaType>();


Reply via email to