This is an automated email from the ASF dual-hosted git repository. tallison pushed a commit to branch clean-up-pdf-parser-configuration in repository https://gitbox.apache.org/repos/asf/tika.git
commit 9ddbac11d8229136c7d1d101232d10908bb153f2 Author: tallison <[email protected]> AuthorDate: Tue Dec 9 14:00:02 2025 -0500 update pdf-parser config throughout --- .../src/test/resources/kafka/tika-config-kafka.xml | 7 ++++++- .../src/test/resources/tika-config-kafka.xml | 7 ++++++- .../src/test/resources/opensearch/tika-config-opensearch.json | 5 ++++- .../src/test/resources/s3/tika-config-s3.json | 5 ++++- .../src/test/resources/s3/tika-config-s3.xml | 7 ++++++- .../src/test/resources/tika-config-s3-integration-test.xml | 7 ++++++- .../src/test/resources/solr/tika-config-solr.xml | 7 ++++++- .../src/test/resources/tika-config-solr-urls.json | 5 ++++- .../src/test/resources/tika-config-solr-urls.xml | 7 ++++++- 9 files changed, 48 insertions(+), 9 deletions(-) diff --git a/tika-integration-tests/tika-pipes-kafka-integration-tests/src/test/resources/kafka/tika-config-kafka.xml b/tika-integration-tests/tika-pipes-kafka-integration-tests/src/test/resources/kafka/tika-config-kafka.xml index 21f04a26a..fb29c9ad6 100644 --- a/tika-integration-tests/tika-pipes-kafka-integration-tests/src/test/resources/kafka/tika-config-kafka.xml +++ b/tika-integration-tests/tika-pipes-kafka-integration-tests/src/test/resources/kafka/tika-config-kafka.xml @@ -28,7 +28,12 @@ <parser class="org.apache.tika.parser.pdf.PDFParser"> <params> <param name="extractActions" type="bool">true</param> - <param name="checkExtractAccessPermissions" type="bool">true</param> + <param name="accessChecker" type="org.apache.tika.parser.pdf.AccessChecker"> + <params> + <param name="needToCheck" type="bool">true</param> + <param name="allowExtractionForAccessibility" type="bool">true</param> + </params> + </param> </params> </parser> <parser class="org.apache.tika.parser.microsoft.ooxml.OOXMLParser"> diff --git a/tika-integration-tests/tika-pipes-kafka-integration-tests/src/test/resources/tika-config-kafka.xml b/tika-integration-tests/tika-pipes-kafka-integration-tests/src/test/resources/tika-config-kafka.xml index 8471ea5e5..820e1bc7b 100644 --- a/tika-integration-tests/tika-pipes-kafka-integration-tests/src/test/resources/tika-config-kafka.xml +++ b/tika-integration-tests/tika-pipes-kafka-integration-tests/src/test/resources/tika-config-kafka.xml @@ -28,7 +28,12 @@ <parser class="org.apache.tika.parser.pdf.PDFParser"> <params> <param name="extractActions" type="bool">true</param> - <param name="checkExtractAccessPermissions" type="bool">true</param> + <param name="accessChecker" type="org.apache.tika.parser.pdf.AccessChecker"> + <params> + <param name="needToCheck" type="bool">true</param> + <param name="allowExtractionForAccessibility" type="bool">true</param> + </params> + </param> </params> </parser> <parser class="org.apache.tika.parser.microsoft.ooxml.OOXMLParser"> diff --git a/tika-integration-tests/tika-pipes-opensearch-integration-tests/src/test/resources/opensearch/tika-config-opensearch.json b/tika-integration-tests/tika-pipes-opensearch-integration-tests/src/test/resources/opensearch/tika-config-opensearch.json index b15e7c437..b5dc74e9c 100644 --- a/tika-integration-tests/tika-pipes-opensearch-integration-tests/src/test/resources/opensearch/tika-config-opensearch.json +++ b/tika-integration-tests/tika-pipes-opensearch-integration-tests/src/test/resources/opensearch/tika-config-opensearch.json @@ -6,7 +6,10 @@ { "pdf-parser": { "extractActions": true, - "checkExtractAccessPermissions": true + "accessChecker": { + "needToCheck": true, + "allowExtractionForAccessibility": true + } } }, { diff --git a/tika-integration-tests/tika-pipes-s3-integration-tests/src/test/resources/s3/tika-config-s3.json b/tika-integration-tests/tika-pipes-s3-integration-tests/src/test/resources/s3/tika-config-s3.json index 52374a66a..017a047b9 100644 --- a/tika-integration-tests/tika-pipes-s3-integration-tests/src/test/resources/s3/tika-config-s3.json +++ b/tika-integration-tests/tika-pipes-s3-integration-tests/src/test/resources/s3/tika-config-s3.json @@ -15,7 +15,10 @@ { "pdf-parser": { "extractActions": true, - "checkExtractAccessPermissions": true + "accessChecker": { + "needToCheck": true, + "allowExtractionForAccessibility": true + } } }, { diff --git a/tika-integration-tests/tika-pipes-s3-integration-tests/src/test/resources/s3/tika-config-s3.xml b/tika-integration-tests/tika-pipes-s3-integration-tests/src/test/resources/s3/tika-config-s3.xml index 5f7c3ebf6..c151bf907 100644 --- a/tika-integration-tests/tika-pipes-s3-integration-tests/src/test/resources/s3/tika-config-s3.xml +++ b/tika-integration-tests/tika-pipes-s3-integration-tests/src/test/resources/s3/tika-config-s3.xml @@ -28,7 +28,12 @@ <parser class="org.apache.tika.parser.pdf.PDFParser"> <params> <param name="extractActions" type="bool">true</param> - <param name="checkExtractAccessPermissions" type="bool">true</param> + <param name="accessChecker" type="org.apache.tika.parser.pdf.AccessChecker"> + <params> + <param name="needToCheck" type="bool">true</param> + <param name="allowExtractionForAccessibility" type="bool">true</param> + </params> + </param> </params> </parser> <parser class="org.apache.tika.parser.microsoft.ooxml.OOXMLParser"> diff --git a/tika-integration-tests/tika-pipes-s3-integration-tests/src/test/resources/tika-config-s3-integration-test.xml b/tika-integration-tests/tika-pipes-s3-integration-tests/src/test/resources/tika-config-s3-integration-test.xml index 1ac0e28c4..7b361483c 100644 --- a/tika-integration-tests/tika-pipes-s3-integration-tests/src/test/resources/tika-config-s3-integration-test.xml +++ b/tika-integration-tests/tika-pipes-s3-integration-tests/src/test/resources/tika-config-s3-integration-test.xml @@ -28,7 +28,12 @@ <parser class="org.apache.tika.parser.pdf.PDFParser"> <params> <param name="extractActions" type="bool">true</param> - <param name="checkExtractAccessPermissions" type="bool">true</param> + <param name="accessChecker" type="org.apache.tika.parser.pdf.AccessChecker"> + <params> + <param name="needToCheck" type="bool">true</param> + <param name="allowExtractionForAccessibility" type="bool">true</param> + </params> + </param> </params> </parser> <parser class="org.apache.tika.parser.microsoft.ooxml.OOXMLParser"> diff --git a/tika-integration-tests/tika-pipes-solr-integration-tests/src/test/resources/solr/tika-config-solr.xml b/tika-integration-tests/tika-pipes-solr-integration-tests/src/test/resources/solr/tika-config-solr.xml index 106a91261..53ad77b46 100644 --- a/tika-integration-tests/tika-pipes-solr-integration-tests/src/test/resources/solr/tika-config-solr.xml +++ b/tika-integration-tests/tika-pipes-solr-integration-tests/src/test/resources/solr/tika-config-solr.xml @@ -28,7 +28,12 @@ <parser class="org.apache.tika.parser.pdf.PDFParser"> <params> <param name="extractActions" type="bool">true</param> - <param name="checkExtractAccessPermissions" type="bool">true</param> + <param name="accessChecker" type="org.apache.tika.parser.pdf.AccessChecker"> + <params> + <param name="needToCheck" type="bool">true</param> + <param name="allowExtractionForAccessibility" type="bool">true</param> + </params> + </param> </params> </parser> <parser class="org.apache.tika.parser.microsoft.ooxml.OOXMLParser"> diff --git a/tika-integration-tests/tika-pipes-solr-integration-tests/src/test/resources/tika-config-solr-urls.json b/tika-integration-tests/tika-pipes-solr-integration-tests/src/test/resources/tika-config-solr-urls.json index 134f77962..022600ce8 100644 --- a/tika-integration-tests/tika-pipes-solr-integration-tests/src/test/resources/tika-config-solr-urls.json +++ b/tika-integration-tests/tika-pipes-solr-integration-tests/src/test/resources/tika-config-solr-urls.json @@ -6,7 +6,10 @@ { "pdf-parser": { "extractActions": true, - "checkExtractAccessPermissions": true + "accessChecker": { + "needToCheck": true, + "allowExtractionForAccessibility": true + } } }, { diff --git a/tika-integration-tests/tika-pipes-solr-integration-tests/src/test/resources/tika-config-solr-urls.xml b/tika-integration-tests/tika-pipes-solr-integration-tests/src/test/resources/tika-config-solr-urls.xml index 7b4acb036..ba17c705c 100644 --- a/tika-integration-tests/tika-pipes-solr-integration-tests/src/test/resources/tika-config-solr-urls.xml +++ b/tika-integration-tests/tika-pipes-solr-integration-tests/src/test/resources/tika-config-solr-urls.xml @@ -28,7 +28,12 @@ <parser class="org.apache.tika.parser.pdf.PDFParser"> <params> <param name="extractActions" type="bool">true</param> - <param name="checkExtractAccessPermissions" type="bool">true</param> + <param name="accessChecker" type="org.apache.tika.parser.pdf.AccessChecker"> + <params> + <param name="needToCheck" type="bool">true</param> + <param name="allowExtractionForAccessibility" type="bool">true</param> + </params> + </param> </params> </parser> <parser class="org.apache.tika.parser.microsoft.ooxml.OOXMLParser">
