This is an automated email from the ASF dual-hosted git repository.
tallison pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tika.git
The following commit(s) were added to refs/heads/main by this push:
new e58526385 update pdf-parser config throughout (#2432)
e58526385 is described below
commit e585263851a4512922a4d8b728e40b340ae46577
Author: Tim Allison <[email protected]>
AuthorDate: Tue Dec 9 14:41:28 2025 -0500
update pdf-parser config throughout (#2432)
---
.../src/test/resources/kafka/tika-config-kafka.xml | 7 ++++++-
.../src/test/resources/tika-config-kafka.xml | 7 ++++++-
.../src/test/resources/opensearch/tika-config-opensearch.json | 5 ++++-
.../src/test/resources/s3/tika-config-s3.json | 5 ++++-
.../src/test/resources/s3/tika-config-s3.xml | 7 ++++++-
.../src/test/resources/tika-config-s3-integration-test.xml | 7 ++++++-
.../src/test/resources/solr/tika-config-solr.xml | 7 ++++++-
.../src/test/resources/tika-config-solr-urls.json | 5 ++++-
.../src/test/resources/tika-config-solr-urls.xml | 7 ++++++-
9 files changed, 48 insertions(+), 9 deletions(-)
diff --git
a/tika-integration-tests/tika-pipes-kafka-integration-tests/src/test/resources/kafka/tika-config-kafka.xml
b/tika-integration-tests/tika-pipes-kafka-integration-tests/src/test/resources/kafka/tika-config-kafka.xml
index 21f04a26a..fb29c9ad6 100644
---
a/tika-integration-tests/tika-pipes-kafka-integration-tests/src/test/resources/kafka/tika-config-kafka.xml
+++
b/tika-integration-tests/tika-pipes-kafka-integration-tests/src/test/resources/kafka/tika-config-kafka.xml
@@ -28,7 +28,12 @@
<parser class="org.apache.tika.parser.pdf.PDFParser">
<params>
<param name="extractActions" type="bool">true</param>
- <param name="checkExtractAccessPermissions" type="bool">true</param>
+ <param name="accessChecker"
type="org.apache.tika.parser.pdf.AccessChecker">
+ <params>
+ <param name="needToCheck" type="bool">true</param>
+ <param name="allowExtractionForAccessibility"
type="bool">true</param>
+ </params>
+ </param>
</params>
</parser>
<parser class="org.apache.tika.parser.microsoft.ooxml.OOXMLParser">
diff --git
a/tika-integration-tests/tika-pipes-kafka-integration-tests/src/test/resources/tika-config-kafka.xml
b/tika-integration-tests/tika-pipes-kafka-integration-tests/src/test/resources/tika-config-kafka.xml
index 8471ea5e5..820e1bc7b 100644
---
a/tika-integration-tests/tika-pipes-kafka-integration-tests/src/test/resources/tika-config-kafka.xml
+++
b/tika-integration-tests/tika-pipes-kafka-integration-tests/src/test/resources/tika-config-kafka.xml
@@ -28,7 +28,12 @@
<parser class="org.apache.tika.parser.pdf.PDFParser">
<params>
<param name="extractActions" type="bool">true</param>
- <param name="checkExtractAccessPermissions" type="bool">true</param>
+ <param name="accessChecker"
type="org.apache.tika.parser.pdf.AccessChecker">
+ <params>
+ <param name="needToCheck" type="bool">true</param>
+ <param name="allowExtractionForAccessibility"
type="bool">true</param>
+ </params>
+ </param>
</params>
</parser>
<parser class="org.apache.tika.parser.microsoft.ooxml.OOXMLParser">
diff --git
a/tika-integration-tests/tika-pipes-opensearch-integration-tests/src/test/resources/opensearch/tika-config-opensearch.json
b/tika-integration-tests/tika-pipes-opensearch-integration-tests/src/test/resources/opensearch/tika-config-opensearch.json
index 5665c3419..a35a9d755 100644
---
a/tika-integration-tests/tika-pipes-opensearch-integration-tests/src/test/resources/opensearch/tika-config-opensearch.json
+++
b/tika-integration-tests/tika-pipes-opensearch-integration-tests/src/test/resources/opensearch/tika-config-opensearch.json
@@ -6,7 +6,10 @@
{
"pdf-parser": {
"extractActions": true,
- "checkExtractAccessPermissions": true
+ "accessChecker": {
+ "needToCheck": true,
+ "allowExtractionForAccessibility": true
+ }
}
},
{
diff --git
a/tika-integration-tests/tika-pipes-s3-integration-tests/src/test/resources/s3/tika-config-s3.json
b/tika-integration-tests/tika-pipes-s3-integration-tests/src/test/resources/s3/tika-config-s3.json
index 52374a66a..017a047b9 100644
---
a/tika-integration-tests/tika-pipes-s3-integration-tests/src/test/resources/s3/tika-config-s3.json
+++
b/tika-integration-tests/tika-pipes-s3-integration-tests/src/test/resources/s3/tika-config-s3.json
@@ -15,7 +15,10 @@
{
"pdf-parser": {
"extractActions": true,
- "checkExtractAccessPermissions": true
+ "accessChecker": {
+ "needToCheck": true,
+ "allowExtractionForAccessibility": true
+ }
}
},
{
diff --git
a/tika-integration-tests/tika-pipes-s3-integration-tests/src/test/resources/s3/tika-config-s3.xml
b/tika-integration-tests/tika-pipes-s3-integration-tests/src/test/resources/s3/tika-config-s3.xml
index 5f7c3ebf6..c151bf907 100644
---
a/tika-integration-tests/tika-pipes-s3-integration-tests/src/test/resources/s3/tika-config-s3.xml
+++
b/tika-integration-tests/tika-pipes-s3-integration-tests/src/test/resources/s3/tika-config-s3.xml
@@ -28,7 +28,12 @@
<parser class="org.apache.tika.parser.pdf.PDFParser">
<params>
<param name="extractActions" type="bool">true</param>
- <param name="checkExtractAccessPermissions" type="bool">true</param>
+ <param name="accessChecker"
type="org.apache.tika.parser.pdf.AccessChecker">
+ <params>
+ <param name="needToCheck" type="bool">true</param>
+ <param name="allowExtractionForAccessibility"
type="bool">true</param>
+ </params>
+ </param>
</params>
</parser>
<parser class="org.apache.tika.parser.microsoft.ooxml.OOXMLParser">
diff --git
a/tika-integration-tests/tika-pipes-s3-integration-tests/src/test/resources/tika-config-s3-integration-test.xml
b/tika-integration-tests/tika-pipes-s3-integration-tests/src/test/resources/tika-config-s3-integration-test.xml
index 1ac0e28c4..7b361483c 100644
---
a/tika-integration-tests/tika-pipes-s3-integration-tests/src/test/resources/tika-config-s3-integration-test.xml
+++
b/tika-integration-tests/tika-pipes-s3-integration-tests/src/test/resources/tika-config-s3-integration-test.xml
@@ -28,7 +28,12 @@
<parser class="org.apache.tika.parser.pdf.PDFParser">
<params>
<param name="extractActions" type="bool">true</param>
- <param name="checkExtractAccessPermissions" type="bool">true</param>
+ <param name="accessChecker"
type="org.apache.tika.parser.pdf.AccessChecker">
+ <params>
+ <param name="needToCheck" type="bool">true</param>
+ <param name="allowExtractionForAccessibility"
type="bool">true</param>
+ </params>
+ </param>
</params>
</parser>
<parser class="org.apache.tika.parser.microsoft.ooxml.OOXMLParser">
diff --git
a/tika-integration-tests/tika-pipes-solr-integration-tests/src/test/resources/solr/tika-config-solr.xml
b/tika-integration-tests/tika-pipes-solr-integration-tests/src/test/resources/solr/tika-config-solr.xml
index 106a91261..53ad77b46 100644
---
a/tika-integration-tests/tika-pipes-solr-integration-tests/src/test/resources/solr/tika-config-solr.xml
+++
b/tika-integration-tests/tika-pipes-solr-integration-tests/src/test/resources/solr/tika-config-solr.xml
@@ -28,7 +28,12 @@
<parser class="org.apache.tika.parser.pdf.PDFParser">
<params>
<param name="extractActions" type="bool">true</param>
- <param name="checkExtractAccessPermissions" type="bool">true</param>
+ <param name="accessChecker"
type="org.apache.tika.parser.pdf.AccessChecker">
+ <params>
+ <param name="needToCheck" type="bool">true</param>
+ <param name="allowExtractionForAccessibility"
type="bool">true</param>
+ </params>
+ </param>
</params>
</parser>
<parser class="org.apache.tika.parser.microsoft.ooxml.OOXMLParser">
diff --git
a/tika-integration-tests/tika-pipes-solr-integration-tests/src/test/resources/tika-config-solr-urls.json
b/tika-integration-tests/tika-pipes-solr-integration-tests/src/test/resources/tika-config-solr-urls.json
index 9b6aadf08..26f6d50e0 100644
---
a/tika-integration-tests/tika-pipes-solr-integration-tests/src/test/resources/tika-config-solr-urls.json
+++
b/tika-integration-tests/tika-pipes-solr-integration-tests/src/test/resources/tika-config-solr-urls.json
@@ -6,7 +6,10 @@
{
"pdf-parser": {
"extractActions": true,
- "checkExtractAccessPermissions": true
+ "accessChecker": {
+ "needToCheck": true,
+ "allowExtractionForAccessibility": true
+ }
}
},
{
diff --git
a/tika-integration-tests/tika-pipes-solr-integration-tests/src/test/resources/tika-config-solr-urls.xml
b/tika-integration-tests/tika-pipes-solr-integration-tests/src/test/resources/tika-config-solr-urls.xml
index 7b4acb036..ba17c705c 100644
---
a/tika-integration-tests/tika-pipes-solr-integration-tests/src/test/resources/tika-config-solr-urls.xml
+++
b/tika-integration-tests/tika-pipes-solr-integration-tests/src/test/resources/tika-config-solr-urls.xml
@@ -28,7 +28,12 @@
<parser class="org.apache.tika.parser.pdf.PDFParser">
<params>
<param name="extractActions" type="bool">true</param>
- <param name="checkExtractAccessPermissions" type="bool">true</param>
+ <param name="accessChecker"
type="org.apache.tika.parser.pdf.AccessChecker">
+ <params>
+ <param name="needToCheck" type="bool">true</param>
+ <param name="allowExtractionForAccessibility"
type="bool">true</param>
+ </params>
+ </param>
</params>
</parser>
<parser class="org.apache.tika.parser.microsoft.ooxml.OOXMLParser">