This is an automated email from the ASF dual-hosted git repository. tallison pushed a change to branch main in repository https://gitbox.apache.org/repos/asf/tika.git
from c01c048a9 TIKA-4025 -- extract Java's ImageReader's num images into the metadata. (#1108) add b116d71bf TIKA-4017 -- extract incremental updates (#1085) No new revisions were added by this update. Summary of changes: CHANGES.txt | 3 + .../src/main/java/org/apache/tika/cli/TikaCLI.java | 8 +- .../java/org/apache/tika/metadata/Metadata.java | 24 ++ .../main/java/org/apache/tika/metadata/PDF.java | 19 ++ .../apache/tika/parser/pdf/AbstractPDF2XHTML.java | 131 ++++++++--- .../java/org/apache/tika/parser/pdf/PDFParser.java | 100 ++++++++ .../apache/tika/parser/pdf/PDFParserConfig.java | 36 +++ .../pdf/updates/IncrementalUpdateRecord.java | 27 +-- .../parser/pdf/updates/IsIncrementalUpdate.java | 6 +- .../tika/parser/pdf/updates/StartXRefOffset.java | 68 ++++++ .../tika/parser/pdf/updates/StartXRefScanner.java | 252 +++++++++++++++++++++ .../tika/parser/pdf/PDFIncrementalUpdatesTest.java | 172 ++++++++++++++ .../src/test/resources/log4j2.xml | 2 +- .../test-documents/testPDF_incrementalUpdates.pdf | Bin 0 -> 64872 bytes 14 files changed, 789 insertions(+), 59 deletions(-) copy tika-batch/src/main/java/org/apache/tika/batch/fs/FSConsumersManager.java => tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/main/java/org/apache/tika/parser/pdf/updates/IncrementalUpdateRecord.java (65%) copy tika-core/src/main/java/org/apache/tika/language/detect/LanguageConfidence.java => tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/main/java/org/apache/tika/parser/pdf/updates/IsIncrementalUpdate.java (82%) create mode 100644 tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/main/java/org/apache/tika/parser/pdf/updates/StartXRefOffset.java create mode 100644 tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/main/java/org/apache/tika/parser/pdf/updates/StartXRefScanner.java create mode 100644 tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/test/java/org/apache/tika/parser/pdf/PDFIncrementalUpdatesTest.java create mode 100644 tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/test/resources/test-documents/testPDF_incrementalUpdates.pdf