This is an automated email from the ASF dual-hosted git repository.

tallison pushed a change to branch TIKA-4565
in repository https://gitbox.apache.org/repos/asf/tika.git


    from 4f485bd465 TIKA-4565 -- tweak configurations for include/exclude
     add 5387cef917 TIKA-4553-rm-tika-config-parsers-standard-package (#2440)
     add bee7682711 TIKA-4566 - allow non-config elements to standalone in 
array (#2442)
     add a66b33f6b2 TIKA-4567 -- update PDFParserConfig for the new world
     add 0f3dc28a26 Merge branch 'main' into TIKA-4565

No new revisions were added by this update.

Summary of changes:
 .../resources/tika-config-default-single-file.json |   5 +-
 .../org/apache/tika/config/ConfigDeserializer.java |   5 +
 .../tika/language/translate/DefaultTranslator.java |   6 +
 .../java/org/apache/tika/parser/EmptyParser.java   |   2 +
 .../java/org/apache/tika/parser/ErrorParser.java   |   2 +
 .../src/test/resources/kafka/tika-config-kafka.xml |  72 -----
 .../src/test/resources/tika-config-kafka.xml       | 123 ---------
 .../opensearch/tika-config-opensearch.json         |   5 +-
 .../src/test/resources/s3/tika-config-s3.json      |   5 +-
 .../src/test/resources/s3/tika-config-s3.xml       |  68 -----
 .../resources/tika-config-s3-integration-test.xml  | 121 --------
 .../src/test/resources/tika-config-s3ToFs.xml      |  37 ---
 .../src/test/resources/tika-config-s3Tos3.xml      |  47 ----
 .../src/test/resources/solr/tika-config-solr.xml   |  70 -----
 .../src/test/resources/tika-config-solr-urls.json  |   5 +-
 .../src/test/resources/tika-config-solr-urls.xml   | 120 --------
 .../tika-parser-pdf-module/pom.xml                 |   6 +
 .../apache/tika/parser/pdf/AbstractPDF2XHTML.java  |  22 +-
 .../org/apache/tika/parser/pdf/AccessChecker.java  | 123 ---------
 .../java/org/apache/tika/parser/pdf/OCR2XHTML.java |   2 +-
 .../java/org/apache/tika/parser/pdf/OcrConfig.java | 181 ++++++++++++
 .../java/org/apache/tika/parser/pdf/PDFParser.java |  67 +++--
 .../apache/tika/parser/pdf/PDFParserConfig.java    | 303 ++++++---------------
 .../pdf/image/ImageGraphicsEngineFactory.java      |  19 --
 .../tika/renderer/pdf/pdfbox/PDFBoxRenderer.java   |   4 +-
 .../apache/tika/parser/pdf/AccessCheckerTest.java  | 135 ---------
 .../pdf/MyCustomImageGraphicsEngineFactory.java    |  53 +++-
 .../org/apache/tika/parser/pdf/PDFParserTest.java  | 173 ++++++------
 .../pdf/tika-config-custom-graphics-engine.json    |  11 +
 .../pdf/tika-config-custom-graphics-engine.xml     |  28 --
 .../tika/parser/pdf/tika-config-non-primitives.xml |  29 --
 .../org/apache/tika/parser/pdf/tika-config.json    |   9 +
 .../org/apache/tika/parser/pdf/tika-config.xml     |  26 --
 .../apache/tika/parser/pdf/tika-inline-config.json |  19 ++
 .../apache/tika/parser/pdf/tika-inline-config.xml  |  38 ---
 .../org/apache/tika/parser/pdf/tika-ocr-config.xml |  36 ---
 .../tika/parser/pdf/tika-rendering-config.xml      |  34 ---
 .../parser/pdf/tika-rendering-per-page-config.xml  |  32 ---
 .../tika/parser/pdf/tika-xml-profiler-config.xml   |  24 --
 .../tika/config/TikaConfigSerializerTest.java      |  19 +-
 .../apache/tika/config/TikaDetectorConfigTest.java |  38 +--
 .../tika/config/TikaEncodingDetectorTest.java      |   3 +-
 .../apache/tika/config/TikaParserConfigTest.java   | 164 ++++++-----
 .../tika/config/TikaTranslatorConfigTest.java      |  54 ++--
 .../org/apache/tika/detect/TestZipDetector.java    |   7 +-
 .../java/org/apache/tika/parser/TestXXEInXML.java  | 244 -----------------
 .../apache/tika/parser/crypto/TSDParserTest.java   |   3 +-
 .../parser/fork/ForkParserIntegrationTest.java     |   8 +-
 .../tika/parser/ocr/TesseractOCRParserTest.java    |   9 +-
 .../org/apache/tika/parser/pdf/PDFParserTest.java  |  42 +--
 .../pkg/CompositeZipContainerDetectorTest.java     |  14 +-
 .../org/apache/tika/parser/pkg/GzipParserTest.java |   9 +-
 .../apache/tika/parser/pkg/UnrarParserTest.java    |  10 +-
 .../configs/TIKA-1708-detector-composite.json      |  13 +
 .../test/resources/configs/tika-4424-config.xml    |  26 --
 .../src/test/resources/configs/tika-4441-120.xml   |  36 ---
 .../test/resources/configs/tika-4441-12000000.xml  |  36 ---
 .../src/test/resources/configs/tika-4441-neg1.xml  |  36 ---
 .../src/test/resources/configs/tika-4533.xml       |  47 ----
 .../configs/tika-config-digests-pdf-only.xml       |  33 ---
 .../configs/tika-config-digests-skip-container.xml |  33 ---
 .../test/resources/configs/tika-config-digests.xml |  32 ---
 ...ka-config-doubling-custom-handler-decorator.xml |  27 --
 .../tika-config-geo-point-metadata-filter.xml      |  24 --
 .../test/resources/configs/tika-config-lib-pst.xml |  26 --
 .../resources/configs/tika-config-multiple-gz.json |  14 +
 .../resources/configs/tika-config-multiple-gz.xml  |  29 --
 .../test/resources/configs/tika-unrar-config.json  |  12 +
 .../org/apache/tika/config/TIKA-1558-exclude.xml   |  29 --
 .../apache/tika/config/TIKA-1558-excludesub.xml    |  24 --
 .../tika/config/TIKA-1702-detector-exclude.xml     |  31 ---
 .../tika/config/TIKA-1702-translator-default.json  |  10 +-
 .../tika/config/TIKA-1702-translator-default.xml   |  24 --
 .../config/TIKA-1702-translator-empty-default.json |  11 +-
 .../config/TIKA-1702-translator-empty-default.xml  |  22 --
 .../tika/config/TIKA-1702-translator-empty.json    |   8 +-
 .../tika/config/TIKA-1702-translator-empty.xml     |  20 --
 .../tika/config/TIKA-1708-detector-composite.json  |  13 +-
 .../tika/config/TIKA-1708-detector-composite.xml   |  25 --
 ...-2273-encoding-detector-outside-static-init.xml |  34 ---
 ...TIKA-2273-exclude-encoding-detector-default.xml |  29 --
 .../TIKA-2273-no-icu4j-encoding-detector.xml       |  27 --
 .../TIKA-2273-non-detecting-params-bad-charset.xml |  29 --
 .../tika/config/TIKA-2273-non-detecting-params.xml |  29 --
 .../TIKA-2273-parameterize-encoding-detector.xml   |  30 --
 .../TIKA-2485-encoding-detector-mark-limits.xml    |  38 ---
 .../org/apache/tika/parser/TIKA-3137-include.xml   |  34 ---
 .../apache/tika/parser/ocr/tesseract-config.xml    |  32 ---
 .../apache/tika/config/loader/DetectorLoader.java  |   7 +
 .../apache/tika/config/loader/ParserLoader.java    |  24 +-
 .../apache/tika/config/loader/TikaJsonConfig.java  |  38 +--
 .../serialization/ParseContextDeserializer.java    |   2 +-
 .../tika/config/loader/TikaJsonConfigTest.java     | 193 +++++++++++++
 .../standard/UnpackerResourceWithConfigTest.java   |   4 +-
 94 files changed, 987 insertions(+), 2895 deletions(-)
 delete mode 100644 
tika-integration-tests/tika-pipes-kafka-integration-tests/src/test/resources/kafka/tika-config-kafka.xml
 delete mode 100644 
tika-integration-tests/tika-pipes-kafka-integration-tests/src/test/resources/tika-config-kafka.xml
 delete mode 100644 
tika-integration-tests/tika-pipes-s3-integration-tests/src/test/resources/s3/tika-config-s3.xml
 delete mode 100644 
tika-integration-tests/tika-pipes-s3-integration-tests/src/test/resources/tika-config-s3-integration-test.xml
 delete mode 100644 
tika-integration-tests/tika-pipes-s3-integration-tests/src/test/resources/tika-config-s3ToFs.xml
 delete mode 100644 
tika-integration-tests/tika-pipes-s3-integration-tests/src/test/resources/tika-config-s3Tos3.xml
 delete mode 100644 
tika-integration-tests/tika-pipes-solr-integration-tests/src/test/resources/solr/tika-config-solr.xml
 delete mode 100644 
tika-integration-tests/tika-pipes-solr-integration-tests/src/test/resources/tika-config-solr-urls.xml
 delete mode 100644 
tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/main/java/org/apache/tika/parser/pdf/AccessChecker.java
 create mode 100644 
tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/main/java/org/apache/tika/parser/pdf/OcrConfig.java
 delete mode 100644 
tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/test/java/org/apache/tika/parser/pdf/AccessCheckerTest.java
 create mode 100644 
tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/test/resources/org/apache/tika/parser/pdf/tika-config-custom-graphics-engine.json
 delete mode 100644 
tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/test/resources/org/apache/tika/parser/pdf/tika-config-custom-graphics-engine.xml
 delete mode 100644 
tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/test/resources/org/apache/tika/parser/pdf/tika-config-non-primitives.xml
 create mode 100644 
tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/test/resources/org/apache/tika/parser/pdf/tika-config.json
 delete mode 100644 
tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/test/resources/org/apache/tika/parser/pdf/tika-config.xml
 create mode 100644 
tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/test/resources/org/apache/tika/parser/pdf/tika-inline-config.json
 delete mode 100644 
tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/test/resources/org/apache/tika/parser/pdf/tika-inline-config.xml
 delete mode 100644 
tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/test/resources/org/apache/tika/parser/pdf/tika-ocr-config.xml
 delete mode 100644 
tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/test/resources/org/apache/tika/parser/pdf/tika-rendering-config.xml
 delete mode 100644 
tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/test/resources/org/apache/tika/parser/pdf/tika-rendering-per-page-config.xml
 delete mode 100644 
tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/test/resources/org/apache/tika/parser/pdf/tika-xml-profiler-config.xml
 delete mode 100644 
tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/parser/TestXXEInXML.java
 create mode 100644 
tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/TIKA-1708-detector-composite.json
 delete mode 100644 
tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/tika-4424-config.xml
 delete mode 100644 
tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/tika-4441-120.xml
 delete mode 100644 
tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/tika-4441-12000000.xml
 delete mode 100644 
tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/tika-4441-neg1.xml
 delete mode 100644 
tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/tika-4533.xml
 delete mode 100644 
tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/tika-config-digests-pdf-only.xml
 delete mode 100644 
tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/tika-config-digests-skip-container.xml
 delete mode 100644 
tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/tika-config-digests.xml
 delete mode 100644 
tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/tika-config-doubling-custom-handler-decorator.xml
 delete mode 100644 
tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/tika-config-geo-point-metadata-filter.xml
 delete mode 100644 
tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/tika-config-lib-pst.xml
 create mode 100644 
tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/tika-config-multiple-gz.json
 delete mode 100644 
tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/tika-config-multiple-gz.xml
 create mode 100644 
tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/tika-unrar-config.json
 delete mode 100644 
tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/org/apache/tika/config/TIKA-1558-exclude.xml
 delete mode 100644 
tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/org/apache/tika/config/TIKA-1558-excludesub.xml
 delete mode 100644 
tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/org/apache/tika/config/TIKA-1702-detector-exclude.xml
 delete mode 100644 
tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/org/apache/tika/config/TIKA-1702-translator-default.xml
 delete mode 100644 
tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/org/apache/tika/config/TIKA-1702-translator-empty-default.xml
 delete mode 100644 
tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/org/apache/tika/config/TIKA-1702-translator-empty.xml
 delete mode 100644 
tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/org/apache/tika/config/TIKA-1708-detector-composite.xml
 delete mode 100644 
tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/org/apache/tika/config/TIKA-2273-encoding-detector-outside-static-init.xml
 delete mode 100644 
tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/org/apache/tika/config/TIKA-2273-exclude-encoding-detector-default.xml
 delete mode 100644 
tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/org/apache/tika/config/TIKA-2273-no-icu4j-encoding-detector.xml
 delete mode 100644 
tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/org/apache/tika/config/TIKA-2273-non-detecting-params-bad-charset.xml
 delete mode 100644 
tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/org/apache/tika/config/TIKA-2273-non-detecting-params.xml
 delete mode 100644 
tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/org/apache/tika/config/TIKA-2273-parameterize-encoding-detector.xml
 delete mode 100644 
tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/org/apache/tika/config/TIKA-2485-encoding-detector-mark-limits.xml
 delete mode 100644 
tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/org/apache/tika/parser/TIKA-3137-include.xml
 delete mode 100644 
tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/org/apache/tika/parser/ocr/tesseract-config.xml
 create mode 100644 
tika-serialization/src/test/java/org/apache/tika/config/loader/TikaJsonConfigTest.java

Reply via email to