Author: tallison Date: Fri May 30 18:23:15 2014 New Revision: 1598693 URL: http://svn.apache.org/r1598693 Log: TIKA-1305: make RTF list handling slightly more robust against corrupt list metadata
Modified: tika/trunk/CHANGES.txt tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/rtf/TextExtractor.java tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/rtf/RTFParserTest.java Modified: tika/trunk/CHANGES.txt URL: http://svn.apache.org/viewvc/tika/trunk/CHANGES.txt?rev=1598693&r1=1598692&r2=1598693&view=diff ============================================================================== --- tika/trunk/CHANGES.txt (original) +++ tika/trunk/CHANGES.txt Fri May 30 18:23:15 2014 @@ -1,5 +1,8 @@ Release 1.6 - ??/??/2014 + * Made RTFParser's list handling slightly more robust against corrupt + list metadata (TIKA-1305) + * Fixed bug in CLI json output (TIKA-1291/TIKA-1310) * Added ability to turn off image extraction from PDFs (TIKA-1294). Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/rtf/TextExtractor.java URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/rtf/TextExtractor.java?rev=1598693&r1=1598692&r2=1598693&view=diff ============================================================================== --- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/rtf/TextExtractor.java (original) +++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/rtf/TextExtractor.java Fri May 30 18:23:15 2014 @@ -883,7 +883,11 @@ final class TextExtractor { } else if (equals("listtemplateid")) { currentList.templateID = param; } else if (equals("levelnfc") || equals("levelnfcn")) { - currentList.numberType[listTableLevel] = param; + //sanity check to make sure list information isn't corrupt + if (listTableLevel > -1 && + listTableLevel < currentList.numberType.length ) { + currentList.numberType[listTableLevel] = param; + } } } } else { Modified: tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/rtf/RTFParserTest.java URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/rtf/RTFParserTest.java?rev=1598693&r1=1598692&r2=1598693&view=diff ============================================================================== --- tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/rtf/RTFParserTest.java (original) +++ tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/rtf/RTFParserTest.java Fri May 30 18:23:15 2014 @@ -413,7 +413,16 @@ public class RTFParserTest extends TikaT assertContains("Body", content); } - //TIKA-1010 + // TIKA-1305 + @Test + public void testCorruptListOverride() throws Exception { + Result r = getResult("testRTFCorruptListOverride.rtf"); + String content = r.text; + assertContains("apple", content); + } + + + // TIKA-1010 @Test public void testEmbeddedMonster() throws Exception { Set<MediaType> skipTypes = new HashSet<MediaType>();