Author: tallison
Date: Fri Dec 13 13:20:43 2013
New Revision: 1550725

URL: http://svn.apache.org/r1550725
Log:
TIKA-973 reopened. Would prefer test docs unequivocally consistent with Apache 
License 2.0.  Deleted initial test docs from trunk and commented out test case. 
 Also added extractAcroFormContent to parameter file (should have been done in 
initial check in).

Removed:
    
tika/trunk/tika-parsers/src/test/resources/test-documents/testPDF_acroForm1.pdf
    
tika/trunk/tika-parsers/src/test/resources/test-documents/testPDF_acroForm2.pdf
Modified:
    
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDFParserConfig.java
    
tika/trunk/tika-parsers/src/main/resources/org/apache/tika/parser/pdf/PDFParser.properties
    
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/pdf/PDFParserTest.java

Modified: 
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDFParserConfig.java
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDFParserConfig.java?rev=1550725&r1=1550724&r2=1550725&view=diff
==============================================================================
--- 
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDFParserConfig.java
 (original)
+++ 
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDFParserConfig.java
 Fri Dec 13 13:20:43 2013
@@ -32,7 +32,7 @@ import java.util.Properties;
  * </ol>
  * 
  * Parameters can also be set by modifying the PDFParserConfig.properties file,
- * which lives here in trunk:
+ * which lives in the expected places, in trunk:
  * tika-parsers/src/main/resources/org/apache/tika/parser/pdf
  * 
  * Or, in tika-app-x.x.jar or tika-parsers-x.x.jar:

Modified: 
tika/trunk/tika-parsers/src/main/resources/org/apache/tika/parser/pdf/PDFParser.properties
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/resources/org/apache/tika/parser/pdf/PDFParser.properties?rev=1550725&r1=1550724&r2=1550725&view=diff
==============================================================================
--- 
tika/trunk/tika-parsers/src/main/resources/org/apache/tika/parser/pdf/PDFParser.properties
 (original)
+++ 
tika/trunk/tika-parsers/src/main/resources/org/apache/tika/parser/pdf/PDFParser.properties
 Fri Dec 13 13:20:43 2013
@@ -17,4 +17,5 @@ enableAutospace true
 extractAnnotationText true
 sortByPosition false
 suppressDuplicateOverlappingText       false
-useNonSequentialParser false
\ No newline at end of file
+useNonSequentialParser false
+extractAcroFormContent true

Modified: 
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/pdf/PDFParserTest.java
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/pdf/PDFParserTest.java?rev=1550725&r1=1550724&r2=1550725&view=diff
==============================================================================
--- 
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/pdf/PDFParserTest.java
 (original)
+++ 
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/pdf/PDFParserTest.java
 Fri Dec 13 13:20:43 2013
@@ -540,12 +540,18 @@ public class PDFParserTest extends TikaT
             }
         }
         //make sure nothing went wrong with getting the resource to 
test-documents
-        assertEquals("Number of pdf files tested", 16, pdfs);
+        //This will require modification with each new pdf test.
+        //If this is too annoying, we can turn it off.
+        assertEquals("Number of pdf files tested", 14, pdfs);
     }
 
 
     // TIKA-973
-    public void testAcroForm() throws Exception{
+    //commented out until test documents that are unambiguously
+    //consistent with Apache License v2.0 are contributed.
+    //TODO: add back test for AcroForm extraction; test document should include
+    //recursive forms
+/*    public void testAcroForm() throws Exception{
        Parser p = new AutoDetectParser();
        ParseContext context = new ParseContext();
        InputStream stream = 
getResourceAsStream("/test-documents/testPDF_acroForm1.pdf");
@@ -585,8 +591,7 @@ public class PDFParserTest extends TikaT
        stream = getResourceAsStream("/test-documents/testPDF_acroForm2.pdf");
        txt = getText(stream, p, context);
        assertContains("123 Main St.", txt);
-       stream.close();
-
-       
+       stream.close();     
     }
+*/
 }


Reply via email to