Author: mikemccand
Date: Tue Aug 30 19:07:45 2011
New Revision: 1163336

URL: http://svn.apache.org/viewvc?rev=1163336&view=rev
Log:
TIKA-392: add 3 RTF test cases

Added:
    
tika/trunk/tika-parsers/src/test/resources/test-documents/testRTFHexEscapeInsideWord.rtf
   (with props)
    
tika/trunk/tika-parsers/src/test/resources/test-documents/testRTFTableCellSeparation2.rtf
   (with props)
    
tika/trunk/tika-parsers/src/test/resources/test-documents/testRTFWindowsCodepage1250.rtf
   (with props)
Modified:
    
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/rtf/RTFParserTest.java

Modified: 
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/rtf/RTFParserTest.java
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/rtf/RTFParserTest.java?rev=1163336&r1=1163335&r2=1163336&view=diff
==============================================================================
--- 
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/rtf/RTFParserTest.java
 (original)
+++ 
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/rtf/RTFParserTest.java
 Tue Aug 30 19:07:45 2011
@@ -82,13 +82,31 @@ public class RTFParserTest extends TikaT
         assertContains("starov\u011Bk\u00E9 \u017Eidovsk\u00E9 
n\u00E1bo\u017Eensk\u00E9 texty", content);
     }
 
+    public void testHexEscapeInsideWord() throws Exception {
+        String content = getText("testRTFHexEscapeInsideWord.rtf");
+        assertContains("ESPÍRITO", content);
+    }
+
+    public void testWindowsCodepage1250() throws Exception {
+        String content = getText("testRTFWindowsCodepage1250.rtf");
+        assertContains("zażółć gęślą jaźń", content);
+        assertContains("ZAŻÓŁĆ GĘŚLĄ JAŹŃ", content);
+    }
+
     public void testRTFTableCellSeparation() throws Exception {
         String content = getText("testRTFTableCellSeparation.rtf");
-
+        // TODO: why do we insert extra whitespace...?
         content = content.replaceAll("\\s+"," ");
         assertContains("a b c d \u00E4 \u00EB \u00F6 \u00FC", content);
     }
     
+    public void testRTFTableCellSeparation2() throws Exception {
+        String content = getText("testRTFTableCellSeparation2.rtf");
+        // TODO: why do we insert extra whitespace...?
+        content = content.replaceAll("\\s+"," ");
+        assertContains("Station Fax", content);
+    }
+
     public void testGothic() throws Exception {
        String content = getText("testRTFUnicodeGothic.rtf");
        
assertContains("\uD800\uDF32\uD800\uDF3f\uD800\uDF44\uD800\uDF39\uD800\uDF43\uD800\uDF3A",
 content);

Added: 
tika/trunk/tika-parsers/src/test/resources/test-documents/testRTFHexEscapeInsideWord.rtf
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/resources/test-documents/testRTFHexEscapeInsideWord.rtf?rev=1163336&view=auto
==============================================================================
--- 
tika/trunk/tika-parsers/src/test/resources/test-documents/testRTFHexEscapeInsideWord.rtf
 (added)
+++ 
tika/trunk/tika-parsers/src/test/resources/test-documents/testRTFHexEscapeInsideWord.rtf
 Tue Aug 30 19:07:45 2011
@@ -0,0 +1,4 @@
+{\rtf1\ansi\ansicpg1252\deff0\deflang1033{\fonttbl{\f0\fswiss\fcharset0 
Arial;}}
+{\*\generator Msftedit 5.41.21.2500;}\viewkind4\uc1\pard\f0\fs20 GOVERNO DO 
ESTADO DO ESP\'cdRITO SANTO\par
+}
+

Propchange: 
tika/trunk/tika-parsers/src/test/resources/test-documents/testRTFHexEscapeInsideWord.rtf
------------------------------------------------------------------------------
    svn:eol-style = native

Added: 
tika/trunk/tika-parsers/src/test/resources/test-documents/testRTFTableCellSeparation2.rtf
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/resources/test-documents/testRTFTableCellSeparation2.rtf?rev=1163336&view=auto
==============================================================================
--- 
tika/trunk/tika-parsers/src/test/resources/test-documents/testRTFTableCellSeparation2.rtf
 (added)
+++ 
tika/trunk/tika-parsers/src/test/resources/test-documents/testRTFTableCellSeparation2.rtf
 Tue Aug 30 19:07:45 2011
@@ -0,0 +1,3 @@
+{\rtf1\ansi\ansicpg1252\deff0\deflang1033{\fonttbl{\f0\fswiss\fcharset0 
Arial;}}
+{\rtlch\fcs1 \af0\afs24 \ltrch\fcs0 
\f0\fs24\lang2055\langfe2055\langfenp2055\insrsid9461491\charrsid9461491 Fax / 
Phone Station\cell Fax / Phone #\cell }
+}

Propchange: 
tika/trunk/tika-parsers/src/test/resources/test-documents/testRTFTableCellSeparation2.rtf
------------------------------------------------------------------------------
    svn:eol-style = native

Added: 
tika/trunk/tika-parsers/src/test/resources/test-documents/testRTFWindowsCodepage1250.rtf
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/resources/test-documents/testRTFWindowsCodepage1250.rtf?rev=1163336&view=auto
==============================================================================
--- 
tika/trunk/tika-parsers/src/test/resources/test-documents/testRTFWindowsCodepage1250.rtf
 (added)
+++ 
tika/trunk/tika-parsers/src/test/resources/test-documents/testRTFWindowsCodepage1250.rtf
 Tue Aug 30 19:07:45 2011
@@ -0,0 +1,5 @@
+{\rtf1\ansi\ansicpg1250\deff0\deflang1045{\fonttbl{\f0\fswiss\fcharset238{\*\fname
 Arial;}Arial CE;}}
+{\*\generator Msftedit 5.41.15.1515;}\viewkind4\uc1\pard\f0\fs20 
za\'bf\'f3\'b3\'e6 g\'ea\'9cl\'b9 ja\'9f\'f1\par
+ZA\'af\'d3\'a3\'c6 G\'ca\'8cL\'a5 JA\'8f\'d1\par
+\par
+}

Propchange: 
tika/trunk/tika-parsers/src/test/resources/test-documents/testRTFWindowsCodepage1250.rtf
------------------------------------------------------------------------------
    svn:eol-style = native


Reply via email to