This is an automated email from the ASF dual-hosted git repository.
tallison pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/tika.git
The following commit(s) were added to refs/heads/master by this push:
new 4a55830 TIKA-3094 -- new metadata for every parse :(
4a55830 is described below
commit 4a558303d1ed9b352e519d35a48a4e30367ebfff
Author: tballison <[email protected]>
AuthorDate: Tue May 5 10:42:12 2020 -0400
TIKA-3094 -- new metadata for every parse :(
---
.../src/test/java/org/apache/tika/bundle/BundleIT.java | 15 ++-------------
1 file changed, 2 insertions(+), 13 deletions(-)
diff --git a/tika-bundle/src/test/java/org/apache/tika/bundle/BundleIT.java
b/tika-bundle/src/test/java/org/apache/tika/bundle/BundleIT.java
index 12804ca..e00900c 100644
--- a/tika-bundle/src/test/java/org/apache/tika/bundle/BundleIT.java
+++ b/tika-bundle/src/test/java/org/apache/tika/bundle/BundleIT.java
@@ -318,29 +318,18 @@ public class BundleIT {
Parser parser = tika.getParser();
ParseContext context = new ParseContext();
context.set(Parser.class, parser);
- Metadata metadata = new Metadata();
Set<String> needToFix = new HashSet<>();
needToFix.add("testAccess2_encrypted.accdb");
-
- Set<String> unknownProblem = new HashSet<>();
- //these all trigger org.apache.tika.metadata.PropertyTypeException
- //which for some reason we can't catch (?!)
- //We don't see problems with these files in tika-parsers?!
-/* unknownProblem.add("testPPT_embedded_two_slides.pptx");
- unknownProblem.add("testWORD_multi_authors.docx");
- unknownProblem.add("testEXCEL_embeded.xlsx");
- unknownProblem.add("testVORBIS.ogg");
- unknownProblem.add("testWORD_2006ml.docx");
- unknownProblem.add("testRTFEmbeddedLink.rtf");*/
System.out.println(getTestDir());
for (File f : getTestDir().listFiles()) {
if (f.isDirectory()) {
continue;
}
- if (needToFix.contains(f.getName()) ||
unknownProblem.contains(f.getName())) {
+ if (needToFix.contains(f.getName())) {
continue;
}
System.out.println("about to parse "+f);
+ Metadata metadata = new Metadata();
try (InputStream is = TikaInputStream.get(f)) {
parser.parse(is, handler, metadata, context);
} catch (EncryptedDocumentException e) {