Repository: tika
Updated Branches:
  refs/heads/master b9befb427 -> aa7a0c353


http://git-wip-us.apache.org/repos/asf/tika/blob/aa7a0c35/tika-eval/src/test/resources/test-dirs/extractsA/file1.pdf.json
----------------------------------------------------------------------
diff --git a/tika-eval/src/test/resources/test-dirs/extractsA/file1.pdf.json 
b/tika-eval/src/test/resources/test-dirs/extractsA/file1.pdf.json
new file mode 100644
index 0000000..6ef09de
--- /dev/null
+++ b/tika-eval/src/test/resources/test-dirs/extractsA/file1.pdf.json
@@ -0,0 +1,5 @@
+[{
+  "Content-Type":"text/plain",
+  "X-TIKA:content":"the quick brown fox fox fox jumped over the lazy lazy dog 
1,200 120000",
+  "xmpTPg:NPages":2
+}]
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/tika/blob/aa7a0c35/tika-eval/src/test/resources/test-dirs/extractsA/file10_permahang.txt.json
----------------------------------------------------------------------
diff --git 
a/tika-eval/src/test/resources/test-dirs/extractsA/file10_permahang.txt.json 
b/tika-eval/src/test/resources/test-dirs/extractsA/file10_permahang.txt.json
new file mode 100644
index 0000000..e69de29

http://git-wip-us.apache.org/repos/asf/tika/blob/aa7a0c35/tika-eval/src/test/resources/test-dirs/extractsA/file11_oom.txt.json
----------------------------------------------------------------------
diff --git 
a/tika-eval/src/test/resources/test-dirs/extractsA/file11_oom.txt.json 
b/tika-eval/src/test/resources/test-dirs/extractsA/file11_oom.txt.json
new file mode 100644
index 0000000..e69de29

http://git-wip-us.apache.org/repos/asf/tika/blob/aa7a0c35/tika-eval/src/test/resources/test-dirs/extractsA/file12_es.txt.json
----------------------------------------------------------------------
diff --git 
a/tika-eval/src/test/resources/test-dirs/extractsA/file12_es.txt.json 
b/tika-eval/src/test/resources/test-dirs/extractsA/file12_es.txt.json
new file mode 100644
index 0000000..0e2558b
--- /dev/null
+++ b/tika-eval/src/test/resources/test-dirs/extractsA/file12_es.txt.json
@@ -0,0 +1,4 @@
+[{
+  "Content-Type":"text/plain",
+  "X-TIKA:content":"El zorro marrón rápido saltó sobre el perro. El zorro 
marrón rápido saltó sobre el perro. El zorro marrón rápido saltó sobre el 
perro"
+}]
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/tika/blob/aa7a0c35/tika-eval/src/test/resources/test-dirs/extractsA/file13_attachANotB.doc.json
----------------------------------------------------------------------
diff --git 
a/tika-eval/src/test/resources/test-dirs/extractsA/file13_attachANotB.doc.json 
b/tika-eval/src/test/resources/test-dirs/extractsA/file13_attachANotB.doc.json
new file mode 100644
index 0000000..5371c87
--- /dev/null
+++ 
b/tika-eval/src/test/resources/test-dirs/extractsA/file13_attachANotB.doc.json
@@ -0,0 +1,10 @@
+[{
+  "Content-Type":"text/plain",
+  "X-TIKA:content":"the quick brown fox fox fox jumped over the lazy lazy dog"
+  },
+  {
+    "Content-Type":"text/plain",
+    "X-TIKA:embedded_resource_path":"inner.txt",
+    "X-TIKA:content":"attachment contents"
+  }
+]
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/tika/blob/aa7a0c35/tika-eval/src/test/resources/test-dirs/extractsA/file2_attachANotB.doc.json
----------------------------------------------------------------------
diff --git 
a/tika-eval/src/test/resources/test-dirs/extractsA/file2_attachANotB.doc.json 
b/tika-eval/src/test/resources/test-dirs/extractsA/file2_attachANotB.doc.json
new file mode 100644
index 0000000..5371c87
--- /dev/null
+++ 
b/tika-eval/src/test/resources/test-dirs/extractsA/file2_attachANotB.doc.json
@@ -0,0 +1,10 @@
+[{
+  "Content-Type":"text/plain",
+  "X-TIKA:content":"the quick brown fox fox fox jumped over the lazy lazy dog"
+  },
+  {
+    "Content-Type":"text/plain",
+    "X-TIKA:embedded_resource_path":"inner.txt",
+    "X-TIKA:content":"attachment contents"
+  }
+]
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/tika/blob/aa7a0c35/tika-eval/src/test/resources/test-dirs/extractsA/file3_attachBNotA.doc.json
----------------------------------------------------------------------
diff --git 
a/tika-eval/src/test/resources/test-dirs/extractsA/file3_attachBNotA.doc.json 
b/tika-eval/src/test/resources/test-dirs/extractsA/file3_attachBNotA.doc.json
new file mode 100644
index 0000000..18763d1
--- /dev/null
+++ 
b/tika-eval/src/test/resources/test-dirs/extractsA/file3_attachBNotA.doc.json
@@ -0,0 +1,4 @@
+[{
+  "Content-Type":"text/plain",
+  "X-TIKA:content":"the quick brown fox fox fox jumped over the lazy lazy dog"
+}]
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/tika/blob/aa7a0c35/tika-eval/src/test/resources/test-dirs/extractsA/file4_emptyB.pdf.json
----------------------------------------------------------------------
diff --git 
a/tika-eval/src/test/resources/test-dirs/extractsA/file4_emptyB.pdf.json 
b/tika-eval/src/test/resources/test-dirs/extractsA/file4_emptyB.pdf.json
new file mode 100644
index 0000000..18763d1
--- /dev/null
+++ b/tika-eval/src/test/resources/test-dirs/extractsA/file4_emptyB.pdf.json
@@ -0,0 +1,4 @@
+[{
+  "Content-Type":"text/plain",
+  "X-TIKA:content":"the quick brown fox fox fox jumped over the lazy lazy dog"
+}]
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/tika/blob/aa7a0c35/tika-eval/src/test/resources/test-dirs/extractsA/file5_emptyA.pdf.json
----------------------------------------------------------------------
diff --git 
a/tika-eval/src/test/resources/test-dirs/extractsA/file5_emptyA.pdf.json 
b/tika-eval/src/test/resources/test-dirs/extractsA/file5_emptyA.pdf.json
new file mode 100644
index 0000000..e69de29

http://git-wip-us.apache.org/repos/asf/tika/blob/aa7a0c35/tika-eval/src/test/resources/test-dirs/extractsA/file6_accessEx.pdf.json
----------------------------------------------------------------------
diff --git 
a/tika-eval/src/test/resources/test-dirs/extractsA/file6_accessEx.pdf.json 
b/tika-eval/src/test/resources/test-dirs/extractsA/file6_accessEx.pdf.json
new file mode 100644
index 0000000..ded29af
--- /dev/null
+++ b/tika-eval/src/test/resources/test-dirs/extractsA/file6_accessEx.pdf.json
@@ -0,0 +1 @@
+[{"Content-Type":"application/pdf","X-Parsed-By":["org.apache.tika.parser.DefaultParser","org.apache.tika.parser.pdf.PDFParser"],"X-TIKA:EXCEPTION:runtime":"org.apache.tika.exception.AccessPermissionException:
 Content extraction is not allowed.\n\tat 
org.apache.tika.parser.pdf.AccessChecker.check(AccessChecker.java:77)\n\tat 
org.apache.tika.parser.pdf.PDFParser.parse(PDFParser.java:147)\n\tat 
org.apache.tika.parser.CompositeParser.parse(CompositeParser.java:270)\n\tat 
org.apache.tika.parser.CompositeParser.parse(CompositeParser.java:270)\n\tat 
org.apache.tika.parser.AutoDetectParser.parse(AutoDetectParser.java:120)\n\tat 
org.apache.tika.parser.RecursiveParserWrapper.parse(RecursiveParserWrapper.java:130)\n\tat
 
org.apache.tika.batch.fs.RecursiveParserWrapperFSConsumer.processFileResource(RecursiveParserWrapperFSConsumer.java:123)\n\tat
 
org.apache.tika.batch.FileResourceConsumer._processFileResource(FileResourceConsumer.java:171)\n\tat
 org.apache.tika.batch.FileResourceConsumer.call(F
 ileResourceConsumer.java:104)\n\tat 
org.apache.tika.batch.FileResourceConsumer.call(FileResourceConsumer.java:44)\n\tat
 java.util.concurrent.FutureTask.run(FutureTask.java:262)\n\tat 
java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:471)\n\tat 
java.util.concurrent.FutureTask.run(FutureTask.java:262)\n\tat 
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145)\n\tat
 
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615)\n\tat
 
java.lang.Thread.run(Thread.java:745)\n","access_permission:assemble_document":"false","access_permission:can_modify":"false","access_permission:can_print":"true","access_permission:can_print_degraded":"true","access_permission:extract_content":"false","access_permission:extract_for_accessibility":"true","access_permission:fill_in_form":"false","access_permission:modify_annotations":"false","pdf:encrypted":"true","resourceName":"file3_accessEx","tika:file_ext":"pdf","tika_batch_fs:relative_path":
 "file3_accessEx","xmpTPg:NPages":"4"}]
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/tika/blob/aa7a0c35/tika-eval/src/test/resources/test-dirs/extractsA/file7_badJson.pdf.json
----------------------------------------------------------------------
diff --git 
a/tika-eval/src/test/resources/test-dirs/extractsA/file7_badJson.pdf.json 
b/tika-eval/src/test/resources/test-dirs/extractsA/file7_badJson.pdf.json
new file mode 100644
index 0000000..8cf61da
--- /dev/null
+++ b/tika-eval/src/test/resources/test-dirs/extractsA/file7_badJson.pdf.json
@@ -0,0 +1,4 @@
+[{
+  "Content-Type":"text/plain",
+  "X-TIKA:content":2,100
+}]
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/tika/blob/aa7a0c35/tika-eval/src/test/resources/test-dirs/extractsA/file8_IOEx.pdf.json
----------------------------------------------------------------------
diff --git 
a/tika-eval/src/test/resources/test-dirs/extractsA/file8_IOEx.pdf.json 
b/tika-eval/src/test/resources/test-dirs/extractsA/file8_IOEx.pdf.json
new file mode 100644
index 0000000..4ecf0e8
--- /dev/null
+++ b/tika-eval/src/test/resources/test-dirs/extractsA/file8_IOEx.pdf.json
@@ -0,0 +1 @@
+[{"Content-Length":"479562","Content-Type":"application/pdf","X-Parsed-By":["org.apache.tika.parser.DefaultParser","org.apache.tika.parser.pdf.PDFParser"],"X-TIKA:EXCEPTION:runtime":"java.lang.RuntimeException:
 java.io.IOException: Value is not an integer: 8546736428538085463808\n\tat 
org.apache.pdfbox.pdfparser.PDFStreamParser$1.tryNext(PDFStreamParser.java:186)\n\tat
 
org.apache.pdfbox.pdfparser.PDFStreamParser$1.hasNext(PDFStreamParser.java:193)\n\tat
 
org.apache.pdfbox.util.PDFStreamEngine.processSubStream(PDFStreamEngine.java:255)\n\tat
 
org.apache.pdfbox.util.PDFStreamEngine.processSubStream(PDFStreamEngine.java:235)\n\tat
 
org.apache.pdfbox.util.PDFStreamEngine.processStream(PDFStreamEngine.java:215)\n\tat
 
org.apache.pdfbox.util.PDFTextStripper.processPage(PDFTextStripper.java:456)\n\tat
 
org.apache.pdfbox.util.PDFTextStripper.processPages(PDFTextStripper.java:381)\n\tat
 
org.apache.pdfbox.util.PDFTextStripper.writeText(PDFTextStripper.java:340)\n\tat
 org.apache.tika.parser.pdf.PDF
 2XHTML.process(PDF2XHTML.java:106)\n\tat 
org.apache.tika.parser.pdf.PDFParser.parse(PDFParser.java:148)\n\tat 
org.apache.tika.parser.CompositeParser.parse(CompositeParser.java:247)\n\tat 
org.apache.tika.parser.CompositeParser.parse(CompositeParser.java:247)\n\tat 
org.apache.tika.parser.AutoDetectParser.parse(AutoDetectParser.java:120)\n\tat 
org.apache.tika.parser.RecursiveParserWrapper.parse(RecursiveParserWrapper.java:130)\n\tat
 
org.apache.tika.batch.FileResourceConsumer.parse(FileResourceConsumer.java:410)\n\tat
 
org.apache.tika.batch.fs.RecursiveParserWrapperFSConsumer.processFileResource(RecursiveParserWrapperFSConsumer.java:106)\n\tat
 
org.apache.tika.batch.FileResourceConsumer._processFileResource(FileResourceConsumer.java:182)\n\tat
 
org.apache.tika.batch.FileResourceConsumer.call(FileResourceConsumer.java:115)\n\tat
 
org.apache.tika.batch.FileResourceConsumer.call(FileResourceConsumer.java:49)\n\tat
 java.util.concurrent.FutureTask.run(FutureTask.java:262)\n\tat 
java.util.concurr
 ent.Executors$RunnableAdapter.call(Executors.java:471)\n\tat 
java.util.concurrent.FutureTask.run(FutureTask.java:262)\n\tat 
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145)\n\tat
 
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615)\n\tat
 java.lang.Thread.run(Thread.java:745)\nCaused by: java.io.IOException: Value 
is not an integer: 8546736428538085463808\n\tat 
org.apache.pdfbox.cos.COSNumber.get(COSNumber.java:104)\n\tat 
org.apache.pdfbox.pdfparser.PDFStreamParser.parseNextToken(PDFStreamParser.java:350)\n\tat
 
org.apache.pdfbox.pdfparser.PDFStreamParser.access$000(PDFStreamParser.java:46)\n\tat
 
org.apache.pdfbox.pdfparser.PDFStreamParser$1.tryNext(PDFStreamParser.java:181)\n\t...
 24 
more\n","access_permission:assemble_document":"true","access_permission:can_modify":"true","access_permission:can_print":"true","access_permission:can_print_degraded":"true","access_permission:extract_content":"true","access_permission:extract_for
 
_accessibility":"true","access_permission:fill_in_form":"true","access_permission:modify_annotations":"true",
 
"resourceName":"file8_IOEx.pdf","tika:file_ext":"pdf","tika_batch_fs:relative_path":"file8_IOEx.pdf"}]
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/tika/blob/aa7a0c35/tika-eval/src/test/resources/test-dirs/extractsB/file1.pdf.json
----------------------------------------------------------------------
diff --git a/tika-eval/src/test/resources/test-dirs/extractsB/file1.pdf.json 
b/tika-eval/src/test/resources/test-dirs/extractsB/file1.pdf.json
new file mode 100644
index 0000000..cbb51cf
--- /dev/null
+++ b/tika-eval/src/test/resources/test-dirs/extractsB/file1.pdf.json
@@ -0,0 +1,2 @@
+[{  "Content-Type":"text/plain",
+  "X-TIKA:content":"the quick brown fox jumped the lazy dog aardvark aardvark 
aardvark bear bear"}]
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/tika/blob/aa7a0c35/tika-eval/src/test/resources/test-dirs/extractsB/file11_oom.txt.json
----------------------------------------------------------------------
diff --git 
a/tika-eval/src/test/resources/test-dirs/extractsB/file11_oom.txt.json 
b/tika-eval/src/test/resources/test-dirs/extractsB/file11_oom.txt.json
new file mode 100644
index 0000000..e69de29

http://git-wip-us.apache.org/repos/asf/tika/blob/aa7a0c35/tika-eval/src/test/resources/test-dirs/extractsB/file12_es.txt.json
----------------------------------------------------------------------
diff --git 
a/tika-eval/src/test/resources/test-dirs/extractsB/file12_es.txt.json 
b/tika-eval/src/test/resources/test-dirs/extractsB/file12_es.txt.json
new file mode 100644
index 0000000..0e2558b
--- /dev/null
+++ b/tika-eval/src/test/resources/test-dirs/extractsB/file12_es.txt.json
@@ -0,0 +1,4 @@
+[{
+  "Content-Type":"text/plain",
+  "X-TIKA:content":"El zorro marrón rápido saltó sobre el perro. El zorro 
marrón rápido saltó sobre el perro. El zorro marrón rápido saltó sobre el 
perro"
+}]
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/tika/blob/aa7a0c35/tika-eval/src/test/resources/test-dirs/extractsB/file13_attachANotB.doc.txt
----------------------------------------------------------------------
diff --git 
a/tika-eval/src/test/resources/test-dirs/extractsB/file13_attachANotB.doc.txt 
b/tika-eval/src/test/resources/test-dirs/extractsB/file13_attachANotB.doc.txt
new file mode 100644
index 0000000..240a94e
--- /dev/null
+++ 
b/tika-eval/src/test/resources/test-dirs/extractsB/file13_attachANotB.doc.txt
@@ -0,0 +1 @@
+the quick brown fox fox fox jumped over the lazy lazy dog

http://git-wip-us.apache.org/repos/asf/tika/blob/aa7a0c35/tika-eval/src/test/resources/test-dirs/extractsB/file2_attachANotB.doc.json
----------------------------------------------------------------------
diff --git 
a/tika-eval/src/test/resources/test-dirs/extractsB/file2_attachANotB.doc.json 
b/tika-eval/src/test/resources/test-dirs/extractsB/file2_attachANotB.doc.json
new file mode 100644
index 0000000..18763d1
--- /dev/null
+++ 
b/tika-eval/src/test/resources/test-dirs/extractsB/file2_attachANotB.doc.json
@@ -0,0 +1,4 @@
+[{
+  "Content-Type":"text/plain",
+  "X-TIKA:content":"the quick brown fox fox fox jumped over the lazy lazy dog"
+}]
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/tika/blob/aa7a0c35/tika-eval/src/test/resources/test-dirs/extractsB/file3_attachBNotA.doc.json
----------------------------------------------------------------------
diff --git 
a/tika-eval/src/test/resources/test-dirs/extractsB/file3_attachBNotA.doc.json 
b/tika-eval/src/test/resources/test-dirs/extractsB/file3_attachBNotA.doc.json
new file mode 100644
index 0000000..5371c87
--- /dev/null
+++ 
b/tika-eval/src/test/resources/test-dirs/extractsB/file3_attachBNotA.doc.json
@@ -0,0 +1,10 @@
+[{
+  "Content-Type":"text/plain",
+  "X-TIKA:content":"the quick brown fox fox fox jumped over the lazy lazy dog"
+  },
+  {
+    "Content-Type":"text/plain",
+    "X-TIKA:embedded_resource_path":"inner.txt",
+    "X-TIKA:content":"attachment contents"
+  }
+]
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/tika/blob/aa7a0c35/tika-eval/src/test/resources/test-dirs/extractsB/file4_emptyB.pdf.json
----------------------------------------------------------------------
diff --git 
a/tika-eval/src/test/resources/test-dirs/extractsB/file4_emptyB.pdf.json 
b/tika-eval/src/test/resources/test-dirs/extractsB/file4_emptyB.pdf.json
new file mode 100644
index 0000000..e69de29

http://git-wip-us.apache.org/repos/asf/tika/blob/aa7a0c35/tika-eval/src/test/resources/test-dirs/extractsB/file5_emptyA.pdf.json
----------------------------------------------------------------------
diff --git 
a/tika-eval/src/test/resources/test-dirs/extractsB/file5_emptyA.pdf.json 
b/tika-eval/src/test/resources/test-dirs/extractsB/file5_emptyA.pdf.json
new file mode 100644
index 0000000..18763d1
--- /dev/null
+++ b/tika-eval/src/test/resources/test-dirs/extractsB/file5_emptyA.pdf.json
@@ -0,0 +1,4 @@
+[{
+  "Content-Type":"text/plain",
+  "X-TIKA:content":"the quick brown fox fox fox jumped over the lazy lazy dog"
+}]
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/tika/blob/aa7a0c35/tika-eval/src/test/resources/test-dirs/extractsB/file6_accessEx.pdf.json
----------------------------------------------------------------------
diff --git 
a/tika-eval/src/test/resources/test-dirs/extractsB/file6_accessEx.pdf.json 
b/tika-eval/src/test/resources/test-dirs/extractsB/file6_accessEx.pdf.json
new file mode 100644
index 0000000..ded29af
--- /dev/null
+++ b/tika-eval/src/test/resources/test-dirs/extractsB/file6_accessEx.pdf.json
@@ -0,0 +1 @@
+[{"Content-Type":"application/pdf","X-Parsed-By":["org.apache.tika.parser.DefaultParser","org.apache.tika.parser.pdf.PDFParser"],"X-TIKA:EXCEPTION:runtime":"org.apache.tika.exception.AccessPermissionException:
 Content extraction is not allowed.\n\tat 
org.apache.tika.parser.pdf.AccessChecker.check(AccessChecker.java:77)\n\tat 
org.apache.tika.parser.pdf.PDFParser.parse(PDFParser.java:147)\n\tat 
org.apache.tika.parser.CompositeParser.parse(CompositeParser.java:270)\n\tat 
org.apache.tika.parser.CompositeParser.parse(CompositeParser.java:270)\n\tat 
org.apache.tika.parser.AutoDetectParser.parse(AutoDetectParser.java:120)\n\tat 
org.apache.tika.parser.RecursiveParserWrapper.parse(RecursiveParserWrapper.java:130)\n\tat
 
org.apache.tika.batch.fs.RecursiveParserWrapperFSConsumer.processFileResource(RecursiveParserWrapperFSConsumer.java:123)\n\tat
 
org.apache.tika.batch.FileResourceConsumer._processFileResource(FileResourceConsumer.java:171)\n\tat
 org.apache.tika.batch.FileResourceConsumer.call(F
 ileResourceConsumer.java:104)\n\tat 
org.apache.tika.batch.FileResourceConsumer.call(FileResourceConsumer.java:44)\n\tat
 java.util.concurrent.FutureTask.run(FutureTask.java:262)\n\tat 
java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:471)\n\tat 
java.util.concurrent.FutureTask.run(FutureTask.java:262)\n\tat 
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145)\n\tat
 
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615)\n\tat
 
java.lang.Thread.run(Thread.java:745)\n","access_permission:assemble_document":"false","access_permission:can_modify":"false","access_permission:can_print":"true","access_permission:can_print_degraded":"true","access_permission:extract_content":"false","access_permission:extract_for_accessibility":"true","access_permission:fill_in_form":"false","access_permission:modify_annotations":"false","pdf:encrypted":"true","resourceName":"file3_accessEx","tika:file_ext":"pdf","tika_batch_fs:relative_path":
 "file3_accessEx","xmpTPg:NPages":"4"}]
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/tika/blob/aa7a0c35/tika-eval/src/test/resources/test-dirs/extractsB/file7_badJson.pdf.json
----------------------------------------------------------------------
diff --git 
a/tika-eval/src/test/resources/test-dirs/extractsB/file7_badJson.pdf.json 
b/tika-eval/src/test/resources/test-dirs/extractsB/file7_badJson.pdf.json
new file mode 100644
index 0000000..e69de29

http://git-wip-us.apache.org/repos/asf/tika/blob/aa7a0c35/tika-eval/src/test/resources/test-dirs/extractsB/file8_IOEx.pdf.json
----------------------------------------------------------------------
diff --git 
a/tika-eval/src/test/resources/test-dirs/extractsB/file8_IOEx.pdf.json 
b/tika-eval/src/test/resources/test-dirs/extractsB/file8_IOEx.pdf.json
new file mode 100644
index 0000000..4ecf0e8
--- /dev/null
+++ b/tika-eval/src/test/resources/test-dirs/extractsB/file8_IOEx.pdf.json
@@ -0,0 +1 @@
+[{"Content-Length":"479562","Content-Type":"application/pdf","X-Parsed-By":["org.apache.tika.parser.DefaultParser","org.apache.tika.parser.pdf.PDFParser"],"X-TIKA:EXCEPTION:runtime":"java.lang.RuntimeException:
 java.io.IOException: Value is not an integer: 8546736428538085463808\n\tat 
org.apache.pdfbox.pdfparser.PDFStreamParser$1.tryNext(PDFStreamParser.java:186)\n\tat
 
org.apache.pdfbox.pdfparser.PDFStreamParser$1.hasNext(PDFStreamParser.java:193)\n\tat
 
org.apache.pdfbox.util.PDFStreamEngine.processSubStream(PDFStreamEngine.java:255)\n\tat
 
org.apache.pdfbox.util.PDFStreamEngine.processSubStream(PDFStreamEngine.java:235)\n\tat
 
org.apache.pdfbox.util.PDFStreamEngine.processStream(PDFStreamEngine.java:215)\n\tat
 
org.apache.pdfbox.util.PDFTextStripper.processPage(PDFTextStripper.java:456)\n\tat
 
org.apache.pdfbox.util.PDFTextStripper.processPages(PDFTextStripper.java:381)\n\tat
 
org.apache.pdfbox.util.PDFTextStripper.writeText(PDFTextStripper.java:340)\n\tat
 org.apache.tika.parser.pdf.PDF
 2XHTML.process(PDF2XHTML.java:106)\n\tat 
org.apache.tika.parser.pdf.PDFParser.parse(PDFParser.java:148)\n\tat 
org.apache.tika.parser.CompositeParser.parse(CompositeParser.java:247)\n\tat 
org.apache.tika.parser.CompositeParser.parse(CompositeParser.java:247)\n\tat 
org.apache.tika.parser.AutoDetectParser.parse(AutoDetectParser.java:120)\n\tat 
org.apache.tika.parser.RecursiveParserWrapper.parse(RecursiveParserWrapper.java:130)\n\tat
 
org.apache.tika.batch.FileResourceConsumer.parse(FileResourceConsumer.java:410)\n\tat
 
org.apache.tika.batch.fs.RecursiveParserWrapperFSConsumer.processFileResource(RecursiveParserWrapperFSConsumer.java:106)\n\tat
 
org.apache.tika.batch.FileResourceConsumer._processFileResource(FileResourceConsumer.java:182)\n\tat
 
org.apache.tika.batch.FileResourceConsumer.call(FileResourceConsumer.java:115)\n\tat
 
org.apache.tika.batch.FileResourceConsumer.call(FileResourceConsumer.java:49)\n\tat
 java.util.concurrent.FutureTask.run(FutureTask.java:262)\n\tat 
java.util.concurr
 ent.Executors$RunnableAdapter.call(Executors.java:471)\n\tat 
java.util.concurrent.FutureTask.run(FutureTask.java:262)\n\tat 
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145)\n\tat
 
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615)\n\tat
 java.lang.Thread.run(Thread.java:745)\nCaused by: java.io.IOException: Value 
is not an integer: 8546736428538085463808\n\tat 
org.apache.pdfbox.cos.COSNumber.get(COSNumber.java:104)\n\tat 
org.apache.pdfbox.pdfparser.PDFStreamParser.parseNextToken(PDFStreamParser.java:350)\n\tat
 
org.apache.pdfbox.pdfparser.PDFStreamParser.access$000(PDFStreamParser.java:46)\n\tat
 
org.apache.pdfbox.pdfparser.PDFStreamParser$1.tryNext(PDFStreamParser.java:181)\n\t...
 24 
more\n","access_permission:assemble_document":"true","access_permission:can_modify":"true","access_permission:can_print":"true","access_permission:can_print_degraded":"true","access_permission:extract_content":"true","access_permission:extract_for
 
_accessibility":"true","access_permission:fill_in_form":"true","access_permission:modify_annotations":"true",
 
"resourceName":"file8_IOEx.pdf","tika:file_ext":"pdf","tika_batch_fs:relative_path":"file8_IOEx.pdf"}]
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/tika/blob/aa7a0c35/tika-eval/src/test/resources/test-dirs/raw_input/file1.pdf
----------------------------------------------------------------------
diff --git a/tika-eval/src/test/resources/test-dirs/raw_input/file1.pdf 
b/tika-eval/src/test/resources/test-dirs/raw_input/file1.pdf
new file mode 100644
index 0000000..ef9ddba
--- /dev/null
+++ b/tika-eval/src/test/resources/test-dirs/raw_input/file1.pdf
@@ -0,0 +1,13 @@
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+
+dummy source file

http://git-wip-us.apache.org/repos/asf/tika/blob/aa7a0c35/tika-eval/src/test/resources/test-dirs/raw_input/file11_oom.txt
----------------------------------------------------------------------
diff --git a/tika-eval/src/test/resources/test-dirs/raw_input/file11_oom.txt 
b/tika-eval/src/test/resources/test-dirs/raw_input/file11_oom.txt
new file mode 100644
index 0000000..d3cf2f9
--- /dev/null
+++ b/tika-eval/src/test/resources/test-dirs/raw_input/file11_oom.txt
@@ -0,0 +1,2 @@
+dummy 
+dummy
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/tika/blob/aa7a0c35/tika-eval/src/test/resources/test-dirs/raw_input/file2_attachANotB.doc
----------------------------------------------------------------------
diff --git 
a/tika-eval/src/test/resources/test-dirs/raw_input/file2_attachANotB.doc 
b/tika-eval/src/test/resources/test-dirs/raw_input/file2_attachANotB.doc
new file mode 100644
index 0000000..ef9ddba
--- /dev/null
+++ b/tika-eval/src/test/resources/test-dirs/raw_input/file2_attachANotB.doc
@@ -0,0 +1,13 @@
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+
+dummy source file

http://git-wip-us.apache.org/repos/asf/tika/blob/aa7a0c35/tika-eval/src/test/resources/test-dirs/raw_input/file3_attachBNotA.doc
----------------------------------------------------------------------
diff --git 
a/tika-eval/src/test/resources/test-dirs/raw_input/file3_attachBNotA.doc 
b/tika-eval/src/test/resources/test-dirs/raw_input/file3_attachBNotA.doc
new file mode 100644
index 0000000..ef9ddba
--- /dev/null
+++ b/tika-eval/src/test/resources/test-dirs/raw_input/file3_attachBNotA.doc
@@ -0,0 +1,13 @@
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+
+dummy source file

http://git-wip-us.apache.org/repos/asf/tika/blob/aa7a0c35/tika-eval/src/test/resources/test-dirs/raw_input/file4_emptyB.pdf
----------------------------------------------------------------------
diff --git a/tika-eval/src/test/resources/test-dirs/raw_input/file4_emptyB.pdf 
b/tika-eval/src/test/resources/test-dirs/raw_input/file4_emptyB.pdf
new file mode 100644
index 0000000..ef9ddba
--- /dev/null
+++ b/tika-eval/src/test/resources/test-dirs/raw_input/file4_emptyB.pdf
@@ -0,0 +1,13 @@
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+
+dummy source file

http://git-wip-us.apache.org/repos/asf/tika/blob/aa7a0c35/tika-eval/src/test/resources/test-dirs/raw_input/file5_emptyA.pdf
----------------------------------------------------------------------
diff --git a/tika-eval/src/test/resources/test-dirs/raw_input/file5_emptyA.pdf 
b/tika-eval/src/test/resources/test-dirs/raw_input/file5_emptyA.pdf
new file mode 100644
index 0000000..ef9ddba
--- /dev/null
+++ b/tika-eval/src/test/resources/test-dirs/raw_input/file5_emptyA.pdf
@@ -0,0 +1,13 @@
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+
+dummy source file

http://git-wip-us.apache.org/repos/asf/tika/blob/aa7a0c35/tika-eval/src/test/resources/test-dirs/raw_input/file6_accessEx.pdf
----------------------------------------------------------------------
diff --git 
a/tika-eval/src/test/resources/test-dirs/raw_input/file6_accessEx.pdf 
b/tika-eval/src/test/resources/test-dirs/raw_input/file6_accessEx.pdf
new file mode 100644
index 0000000..ef9ddba
--- /dev/null
+++ b/tika-eval/src/test/resources/test-dirs/raw_input/file6_accessEx.pdf
@@ -0,0 +1,13 @@
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+
+dummy source file

http://git-wip-us.apache.org/repos/asf/tika/blob/aa7a0c35/tika-eval/src/test/resources/test-dirs/raw_input/file7_badJson.pdf
----------------------------------------------------------------------
diff --git a/tika-eval/src/test/resources/test-dirs/raw_input/file7_badJson.pdf 
b/tika-eval/src/test/resources/test-dirs/raw_input/file7_badJson.pdf
new file mode 100644
index 0000000..ef9ddba
--- /dev/null
+++ b/tika-eval/src/test/resources/test-dirs/raw_input/file7_badJson.pdf
@@ -0,0 +1,13 @@
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+
+dummy source file

http://git-wip-us.apache.org/repos/asf/tika/blob/aa7a0c35/tika-eval/src/test/resources/test-dirs/raw_input/file8_IOEx.pdf
----------------------------------------------------------------------
diff --git a/tika-eval/src/test/resources/test-dirs/raw_input/file8_IOEx.pdf 
b/tika-eval/src/test/resources/test-dirs/raw_input/file8_IOEx.pdf
new file mode 100644
index 0000000..ef9ddba
--- /dev/null
+++ b/tika-eval/src/test/resources/test-dirs/raw_input/file8_IOEx.pdf
@@ -0,0 +1,13 @@
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+
+dummy source file

http://git-wip-us.apache.org/repos/asf/tika/blob/aa7a0c35/tika-eval/src/test/resources/test-dirs/raw_input/file9_noextract.txt
----------------------------------------------------------------------
diff --git 
a/tika-eval/src/test/resources/test-dirs/raw_input/file9_noextract.txt 
b/tika-eval/src/test/resources/test-dirs/raw_input/file9_noextract.txt
new file mode 100644
index 0000000..5c3118d
--- /dev/null
+++ b/tika-eval/src/test/resources/test-dirs/raw_input/file9_noextract.txt
@@ -0,0 +1 @@
+dummy file

Reply via email to