Author: mattmann
Date: Wed Aug 17 15:46:30 2011
New Revision: 1158779

URL: http://svn.apache.org/viewvc?rev=1158779&view=rev
Log:
- patch for TIKA-422 contributed by Mike McCandless.

Added:
    tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/geo/
    
tika/trunk/tika-parsers/src/test/resources/test-documents/testRTFWithCurlyBraces.rtf
Modified:
    
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/rtf/RTFParserTest.java

Modified: 
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/rtf/RTFParserTest.java
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/rtf/RTFParserTest.java?rev=1158779&r1=1158778&r2=1158779&view=diff
==============================================================================
--- 
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/rtf/RTFParserTest.java
 (original)
+++ 
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/rtf/RTFParserTest.java
 Wed Aug 17 15:46:30 2011
@@ -102,6 +102,12 @@ public class RTFParserTest extends TikaT
        assertContains("\u6771\u4eac\u90fd\u4e09\u9df9\u5e02", content);
     }
 
+    public void testTextWithCurlyBraces() throws Exception {
+        String content = getText("testRTFWithCurlyBraces.rtf");
+        //assertContains("{ some text inside curly brackets }", content);
+        assertContains("{  some text inside curly brackets  }", content);
+    }
+
     private String getText(String filename) throws Exception {
        File file = getResourceAsFile("/test-documents/" + filename);
        

Added: 
tika/trunk/tika-parsers/src/test/resources/test-documents/testRTFWithCurlyBraces.rtf
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/resources/test-documents/testRTFWithCurlyBraces.rtf?rev=1158779&view=auto
==============================================================================
--- 
tika/trunk/tika-parsers/src/test/resources/test-documents/testRTFWithCurlyBraces.rtf
 (added)
+++ 
tika/trunk/tika-parsers/src/test/resources/test-documents/testRTFWithCurlyBraces.rtf
 Wed Aug 17 15:46:30 2011
@@ -0,0 +1,44 @@
+{\rtf1\ansi\ansicpg1251\uc1\deff1\stshfdbch0\stshfloch0\stshfhich0\stshfbi0\deflang1049\deflangfe1049{\fonttbl{\f0\froman\fcharset204\fprq2{\*\panose
 02020603050405020304}Times New Roman;}
+{\f1\fswiss\fcharset204\fprq2{\*\panose 
020b0604020202020204}Arial;}{\f2\fmodern\fcharset204\fprq1{\*\panose 
02070309020205020404}Courier New;}{\f38\fswiss\fcharset204\fprq2{\*\panose 
020b0604030504040204}Verdana;}
+{\f41\froman\fcharset0\fprq2 Times New Roman;}{\f39\froman\fcharset238\fprq2 
Times New Roman CE;}{\f42\froman\fcharset161\fprq2 Times New Roman 
Greek;}{\f43\froman\fcharset162\fprq2 Times New Roman Tur;}
+{\f44\froman\fcharset177\fprq2 Times New Roman 
(Hebrew);}{\f45\froman\fcharset178\fprq2 Times New Roman 
(Arabic);}{\f46\froman\fcharset186\fprq2 Times New Roman 
Baltic;}{\f47\froman\fcharset163\fprq2 Times New Roman (Vietnamese);}
+{\f51\fswiss\fcharset0\fprq2 Arial;}{\f49\fswiss\fcharset238\fprq2 Arial 
CE;}{\f52\fswiss\fcharset161\fprq2 Arial Greek;}{\f53\fswiss\fcharset162\fprq2 
Arial Tur;}{\f54\fswiss\fcharset177\fprq2 Arial (Hebrew);}
+{\f55\fswiss\fcharset178\fprq2 Arial (Arabic);}{\f56\fswiss\fcharset186\fprq2 
Arial Baltic;}{\f57\fswiss\fcharset163\fprq2 Arial 
(Vietnamese);}{\f61\fmodern\fcharset0\fprq1 Courier 
New;}{\f59\fmodern\fcharset238\fprq1 Courier New CE;}
+{\f62\fmodern\fcharset161\fprq1 Courier New 
Greek;}{\f63\fmodern\fcharset162\fprq1 Courier New 
Tur;}{\f64\fmodern\fcharset177\fprq1 Courier New 
(Hebrew);}{\f65\fmodern\fcharset178\fprq1 Courier New (Arabic);}
+{\f66\fmodern\fcharset186\fprq1 Courier New 
Baltic;}{\f67\fmodern\fcharset163\fprq1 Courier New 
(Vietnamese);}{\f421\fswiss\fcharset0\fprq2 
Verdana;}{\f419\fswiss\fcharset238\fprq2 Verdana 
CE;}{\f422\fswiss\fcharset161\fprq2 Verdana Greek;}
+{\f423\fswiss\fcharset162\fprq2 Verdana Tur;}{\f426\fswiss\fcharset186\fprq2 
Verdana Baltic;}{\f427\fswiss\fcharset163\fprq2 Verdana 
(Vietnamese);}}{\colortbl;\red0\green0\blue0;\red0\green0\blue255;\red0\green255\blue255;\red0\green255\blue0;
+\red255\green0\blue255;\red255\green0\blue0;\red255\green255\blue0;\red255\green255\blue255;\red0\green0\blue128;\red0\green128\blue128;\red0\green128\blue0;\red128\green0\blue128;\red128\green0\blue0;\red128\green128\blue0;\red128\green128\blue128;
+\red192\green192\blue192;\red255\green255\blue255;}{\stylesheet{\qj 
\fi720\li0\ri0\nowidctlpar\faauto\rin0\lin0\itap0 
\f1\fs20\lang1049\langfe1049\cgrid\langnp1049\langfenp1049 \snext0 Normal;}{
+\s1\qc \li0\ri0\sb108\sa108\nowidctlpar\faauto\outlinelevel0\rin0\lin0\itap0 
\b\f1\fs20\cf9\lang1049\langfe1049\cgrid\langnp1049\langfenp1049 \sbasedon0 
\snext0 heading 1;}{\s2\qc 
\li0\ri0\sb108\sa108\nowidctlpar\faauto\outlinelevel1\rin0\lin0\itap0 
+\b\f1\fs20\cf9\lang1049\langfe1049\cgrid\langnp1049\langfenp1049 \sbasedon1 
\snext0 heading 2;}{\s3\qc 
\li0\ri0\sb108\sa108\nowidctlpar\faauto\outlinelevel2\rin0\lin0\itap0 
\b\f1\fs20\cf9\lang1049\langfe1049\cgrid\langnp1049\langfenp1049 
+\sbasedon2 \snext0 heading 3;}{\s4\qc 
\li0\ri0\sb108\sa108\nowidctlpar\faauto\outlinelevel3\rin0\lin0\itap0 
\b\f1\fs20\cf9\lang1049\langfe1049\cgrid\langnp1049\langfenp1049 \sbasedon3 
\snext0 heading 4;}{\*\cs10 \additive \ssemihidden 
+Default Paragraph 
Font;}{\*\ts11\tsrowd\trftsWidthB3\trpaddl108\trpaddr108\trpaddfl3\trpaddft3\trpaddfb3\trpaddfr3\tscellwidthfts0\tsvertalt\tsbrdrt\tsbrdrl\tsbrdrb\tsbrdrr\tsbrdrdgl\tsbrdrdgr\tsbrdrh\tsbrdrv
 
+\ql \li0\ri0\widctlpar\aspalpha\aspnum\faauto\adjustright\rin0\lin0\itap0 
\fs20\lang1024\langfe1024\cgrid\langnp1024\langfenp1024 \snext11 \ssemihidden 
Normal Table;}{\*\cs15 \additive \b\fs20\cf9 
+\'d6\'e2\'e5\'f2\'ee\'e2\'ee\'e5 
\'e2\'fb\'e4\'e5\'eb\'e5\'ed\'e8\'e5;}{\*\cs16 \additive \fs20\ul\cf11 
\sbasedon15 \'c3\'e8\'ef\'e5\'f0\'f2\'e5\'ea\'f1\'f2\'ee\'e2\'e0\'ff 
\'f1\'f1\'fb\'eb\'ea\'e0;}{
+\s17\qj \fi-2504\li1612\ri0\nowidctlpar\faauto\rin0\lin1612\itap0 
\f1\fs20\lang1049\langfe1049\cgrid\langnp1049\langfenp1049 \sbasedon0 \snext0 
\'c7\'e0\'e3\'ee\'eb\'ee\'e2\'ee\'ea \'f1\'f2\'e0\'f2\'fc\'e8;}{
+\s18\ql \li0\ri0\nowidctlpar\faauto\rin0\lin0\itap0 
\f1\fs20\lang1049\langfe1049\cgrid\langnp1049\langfenp1049 \sbasedon0 \snext0 
\'d2\'e5\'ea\'f1\'f2 (\'eb\'e5\'e2. \'ef\'ee\'e4\'ef\'e8\'f1\'fc);}{\s19\ql 
\li0\ri0\nowidctlpar\faauto\rin0\lin0\itap0 
+\f1\fs14\lang1049\langfe1049\cgrid\langnp1049\langfenp1049 \sbasedon18 \snext0 
\'ca\'ee\'eb\'ee\'ed\'f2\'e8\'f2\'f3\'eb (\'eb\'e5\'e2\'fb\'e9);}{\s20\qr 
\li0\ri0\nowidctlpar\faauto\rin0\lin0\itap0 
+\f1\fs20\lang1049\langfe1049\cgrid\langnp1049\langfenp1049 \sbasedon0 \snext0 
\'d2\'e5\'ea\'f1\'f2 (\'ef\'f0\'e0\'e2. \'ef\'ee\'e4\'ef\'e8\'f1\'fc);}{\s21\qr 
\li0\ri0\nowidctlpar\faauto\rin0\lin0\itap0 
+\f1\fs14\lang1049\langfe1049\cgrid\langnp1049\langfenp1049 \sbasedon20 \snext0 
\'ca\'ee\'eb\'ee\'ed\'f2\'e8\'f2\'f3\'eb (\'ef\'f0\'e0\'e2\'fb\'e9);}{\s22\qj 
\fi-170\li170\ri0\nowidctlpar\faauto\rin0\lin170\itap0 
+\i\f1\fs20\cf12\lang1049\langfe1049\cgrid\langnp1049\langfenp1049 \sbasedon0 
\snext0 \'ca\'ee\'ec\'ec\'e5\'ed\'f2\'e0\'f0\'e8\'e9;}{\s23\ql 
\fi-170\li170\ri0\nowidctlpar\faauto\rin0\lin170\itap0 
+\i\f1\fs20\cf9\lang1049\langfe1049\cgrid\langnp1049\langfenp1049 \sbasedon22 
\snext0 \'ca\'ee\'ec\'ec\'e5\'ed\'f2\'e0\'f0\'e8\'e9 
\'ef\'ee\'eb\'fc\'e7\'ee\'e2\'e0\'f2\'e5\'eb\'ff;}{\*\cs24 \additive 
\b\fs20\cf9 \sbasedon15 
+\'cd\'e0\'e9\'e4\'e5\'ed\'ed\'fb\'e5 \'f1\'eb\'ee\'e2\'e0;}{\*\cs25 \additive 
\fs20\cf10 \sbasedon15 \'cd\'e5 \'e2\'f1\'f2\'f3\'ef\'e8\'eb \'e2 
\'f1\'e8\'eb\'f3;}{\s26\qj \li0\ri0\nowidctlpar\faauto\rin0\lin0\itap0 
+\f2\fs20\lang1049\langfe1049\cgrid\langnp1049\langfenp1049 \sbasedon0 \snext0 
\'d2\'e0\'e1\'eb\'e8\'f6\'fb 
(\'ec\'ee\'ed\'ee\'f8\'e8\'f0\'e8\'ed\'ed\'fb\'e9);}{\s27\qj 
\fi-140\li140\ri0\nowidctlpar\faauto\rin0\lin140\itap0 
+\f2\fs20\lang1049\langfe1049\cgrid\langnp1049\langfenp1049 \sbasedon26 \snext0 
\'ce\'e3\'eb\'e0\'e2\'eb\'e5\'ed\'e8\'e5;}{\s28\qj 
\fi720\li0\ri0\nowidctlpar\faauto\rin0\lin0\itap0 
\f38\fs18\lang1049\langfe1049\cgrid\langnp1049\langfenp1049 
+\sbasedon0 \snext0 \'ce\'f1\'ed\'ee\'e2\'ed\'ee\'e5 \'ec\'e5\'ed\'fe;}{\s29\qj 
\fi720\li0\ri0\nowidctlpar\faauto\rin0\lin0\itap0 
\f38\fs18\lang1049\langfe1049\cgrid\langnp1049\langfenp1049 \sbasedon28 \snext0 
+\'cf\'e5\'f0\'e5\'ec\'e5\'ed\'ed\'e0\'ff \'f7\'e0\'f1\'f2\'fc;}{\s30\qj 
\fi720\li0\ri0\nowidctlpar\faauto\rin0\lin0\itap0 
\b\f38\fs18\ul\lang1049\langfe1049\cgrid\langnp1049\langfenp1049 \sbasedon28 
\snext0 
+\'cf\'ee\'f1\'f2\'ee\'ff\'ed\'ed\'e0\'ff \'f7\'e0\'f1\'f2\'fc;}{\s31\ql 
\li0\ri0\nowidctlpar\faauto\rin0\lin0\itap0 
\f1\fs20\lang1049\langfe1049\cgrid\langnp1049\langfenp1049 \sbasedon0 \snext0 
\'cf\'f0\'e8\'e6\'e0\'f2\'fb\'e9 \'e2\'eb\'e5\'e2\'ee;}{\*
+\cs32 \additive \fs20\ul\cf11 \sbasedon16 
\'cf\'f0\'ee\'e4\'ee\'eb\'e6\'e5\'ed\'e8\'e5 \'f1\'f1\'fb\'eb\'ea\'e8;}{\s33\qj 
\li0\ri118\nowidctlpar\faauto\rin118\lin0\itap0 
\f1\fs20\lang1049\langfe1049\cgrid\langnp1049\langfenp1049 \sbasedon0 \snext0 
+\'d1\'eb\'ee\'e2\'e0\'f0\'ed\'e0\'ff \'f1\'f2\'e0\'f2\'fc\'ff;}{\s34\ql 
\fi-170\li170\ri170\nowidctlpar\faauto\rin170\lin170\itap0 
\f1\fs20\lang1049\langfe1049\cgrid\langnp1049\langfenp1049 \sbasedon0 \snext0 
+\'d2\'e5\'ea\'f1\'f2 (\'f1\'ef\'f0\'e0\'e2\'ea\'e0);}{\*\cs35 \additive 
\strike\fs20\cf14 \sbasedon15 \'d3\'f2\'f0\'e0\'f2\'e8\'eb 
\'f1\'e8\'eb\'f3;}}{\*\revtbl {Unknown;}}{\*\rsidtbl \rsid6824087}{\*\generator 
Microsoft Word 10.0.2627;}{\info
+{\title \'cc\'e5\'f2\'ee\'e4\'e8\'ea\'e0}{\author kashina}{\operator 
kashina}{\creatim\yr2006\mo6\dy13\hr13\min51}{\revtim\yr2006\mo6\dy13\hr13\min51}{\version2}{\edmins1}{\nofpages10}{\nofwords4564}{\nofchars26020}{\*\company
 \'d1\'e8\'e1\'cd\'c0\'d6}
+{\nofcharsws30523}{\vern16437}}\paperw11906\paperh16838\margl1134\margr850 
\widowctrl\ftnbj\aenddoc\noxlattoyen\expshrtn\noultrlspc\dntblnsbdb\nospaceforul\hyphcaps0\horzdoc\dghspace120\dgvspace120\dghorigin1701\dgvorigin1984\dghshow0\dgvshow3
+\jcompress\viewkind1\viewscale100\nolnhtadjtbl\rsidroot6824087 \fet0\sectd 
\linex0\sectdefaultcl\sftnbj {\*\pnseclvl1\pnucrm\pnstart1\pnindent720\pnhang 
{\pntxta .}}{\*\pnseclvl2\pnucltr\pnstart1\pnindent720\pnhang {\pntxta 
.}}{\*\pnseclvl3
+\pndec\pnstart1\pnindent720\pnhang {\pntxta 
.}}{\*\pnseclvl4\pnlcltr\pnstart1\pnindent720\pnhang {\pntxta 
)}}{\*\pnseclvl5\pndec\pnstart1\pnindent720\pnhang {\pntxtb (}{\pntxta 
)}}{\*\pnseclvl6\pnlcltr\pnstart1\pnindent720\pnhang {\pntxtb (}{\pntxta )}}
+{\*\pnseclvl7\pnlcrm\pnstart1\pnindent720\pnhang {\pntxtb (}{\pntxta 
)}}{\*\pnseclvl8\pnlcltr\pnstart1\pnindent720\pnhang {\pntxtb (}{\pntxta 
)}}{\*\pnseclvl9\pnlcrm\pnstart1\pnindent720\pnhang {\pntxtb (}{\pntxta 
)}}\pard\plain 
+\s1\qc \li0\ri0\sb108\sa108\nowidctlpar\faauto\outlinelevel0\rin0\lin0\itap0 
\b\f1\fs20\cf9\lang1049\langfe1049\cgrid\langnp1049\langfenp1049 {
+\insrsid6824087 
+\par }
+{\lang1024\langfe1024\noproof\insrsid6824087       \{ some text inside curly 
brackets \} }{
+\insrsid6824087 
+\par }}


Reply via email to