Author: tallison
Date: Fri Dec 13 13:20:43 2013
New Revision: 1550725
URL: http://svn.apache.org/r1550725
Log:
TIKA-973 reopened. Would prefer test docs unequivocally consistent with Apache
License 2.0. Deleted initial test docs from trunk and commented out test case.
Also added extractAcroFormContent to parameter file (should have been done in
initial check in).
Removed:
tika/trunk/tika-parsers/src/test/resources/test-documents/testPDF_acroForm1.pdf
tika/trunk/tika-parsers/src/test/resources/test-documents/testPDF_acroForm2.pdf
Modified:
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDFParserConfig.java
tika/trunk/tika-parsers/src/main/resources/org/apache/tika/parser/pdf/PDFParser.properties
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/pdf/PDFParserTest.java
Modified:
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDFParserConfig.java
URL:
http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDFParserConfig.java?rev=1550725&r1=1550724&r2=1550725&view=diff
==============================================================================
---
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDFParserConfig.java
(original)
+++
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDFParserConfig.java
Fri Dec 13 13:20:43 2013
@@ -32,7 +32,7 @@ import java.util.Properties;
* </ol>
*
* Parameters can also be set by modifying the PDFParserConfig.properties file,
- * which lives here in trunk:
+ * which lives in the expected places, in trunk:
* tika-parsers/src/main/resources/org/apache/tika/parser/pdf
*
* Or, in tika-app-x.x.jar or tika-parsers-x.x.jar:
Modified:
tika/trunk/tika-parsers/src/main/resources/org/apache/tika/parser/pdf/PDFParser.properties
URL:
http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/resources/org/apache/tika/parser/pdf/PDFParser.properties?rev=1550725&r1=1550724&r2=1550725&view=diff
==============================================================================
---
tika/trunk/tika-parsers/src/main/resources/org/apache/tika/parser/pdf/PDFParser.properties
(original)
+++
tika/trunk/tika-parsers/src/main/resources/org/apache/tika/parser/pdf/PDFParser.properties
Fri Dec 13 13:20:43 2013
@@ -17,4 +17,5 @@ enableAutospace true
extractAnnotationText true
sortByPosition false
suppressDuplicateOverlappingText false
-useNonSequentialParser false
\ No newline at end of file
+useNonSequentialParser false
+extractAcroFormContent true
Modified:
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/pdf/PDFParserTest.java
URL:
http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/pdf/PDFParserTest.java?rev=1550725&r1=1550724&r2=1550725&view=diff
==============================================================================
---
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/pdf/PDFParserTest.java
(original)
+++
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/pdf/PDFParserTest.java
Fri Dec 13 13:20:43 2013
@@ -540,12 +540,18 @@ public class PDFParserTest extends TikaT
}
}
//make sure nothing went wrong with getting the resource to
test-documents
- assertEquals("Number of pdf files tested", 16, pdfs);
+ //This will require modification with each new pdf test.
+ //If this is too annoying, we can turn it off.
+ assertEquals("Number of pdf files tested", 14, pdfs);
}
// TIKA-973
- public void testAcroForm() throws Exception{
+ //commented out until test documents that are unambiguously
+ //consistent with Apache License v2.0 are contributed.
+ //TODO: add back test for AcroForm extraction; test document should include
+ //recursive forms
+/* public void testAcroForm() throws Exception{
Parser p = new AutoDetectParser();
ParseContext context = new ParseContext();
InputStream stream =
getResourceAsStream("/test-documents/testPDF_acroForm1.pdf");
@@ -585,8 +591,7 @@ public class PDFParserTest extends TikaT
stream = getResourceAsStream("/test-documents/testPDF_acroForm2.pdf");
txt = getText(stream, p, context);
assertContains("123 Main St.", txt);
- stream.close();
-
-
+ stream.close();
}
+*/
}