This is an automated email from the ASF dual-hosted git repository.
tallison pushed a change to branch TIKA-3948
in repository https://gitbox.apache.org/repos/asf/tika.git
from 62950c5d9 simplify dependencies -- further cleanups
add c08ebcc4d Bump aws.version from 1.12.549 to 1.12.550
add 574a76d63 Merge pull request #1338 from
apache/dependabot/maven/aws.version-1.12.550
add 2178540af Bump com.azure:azure-storage-blob from 12.23.1 to 12.24.0
add 1515b1116 Merge pull request #1340 from
apache/dependabot/maven/com.azure-azure-storage-blob-12.24.0
add 67f7b8e99 Bump com.google.cloud:google-cloud-storage from 2.26.1 to
2.27.0
add 37b7892ac Merge pull request #1339 from
apache/dependabot/maven/com.google.cloud-google-cloud-storage-2.27.0
add 0c3eda925 Bump org.springframework:spring-context from 5.3.29 to 5.3.30
add 45dff3e13 Merge pull request #1344 from
apache/dependabot/maven/org.springframework-spring-context-5.3.30
add 4b57cb850 Bump aws.version from 1.12.550 to 1.12.551
add db89767b0 Merge pull request #1343 from
apache/dependabot/maven/aws.version-1.12.551
add 3adb2e2ad TIKA-4120 -- comment out test that breaks with recent
version of gdalinfo
add 3c8824608 TIKA-4123 -- general updates for 3.0.0-BETA -- upgrade
commons-compress
add aeb637b57 TIKA-4133 -- add a capture group metadatafilter (#1346)
add 7da1d6b70 TIKA-4129: update tyrus
add 3f7a89544 TIKA-4129: update javadoc, mockito, jwarc, enforcer
add 86ec9ce43 TIKA-4129: update aws, plexus
add 162f0cbbc TIKA-4129: update h2, plexus
add d9f2f0840 Bump aws.version from 1.12.553 to 1.12.554
add 46c1cae3c Merge pull request #1347 from
apache/dependabot/maven/aws.version-1.12.554
add 70a4481e0 Bump org.codehaus.mojo:versions-maven-plugin from 2.16.0 to
2.16.1
add 72b1ad394 Merge pull request #1348 from
apache/dependabot/maven/org.codehaus.mojo-versions-maven-plugin-2.16.1
add c75460f68 Bump aws.version from 1.12.554 to 1.12.555
add e7d296e2b Merge pull request #1349 from
apache/dependabot/maven/aws.version-1.12.555
add 55989e1f8 TIKA-4108 (#1351)
add b2bdb5b83 Bump io.netty:netty-bom from 4.1.97.Final to 4.1.98.Final
add d2adc3c94 Merge pull request #1352 from
apache/dependabot/maven/io.netty-netty-bom-4.1.98.Final
add 72a81a16e Tika 4137 (#1353)
add 6871c9157 TIKA-4137 -- add a jdk21 build workflow
add e04c47820 TIKA-4138 -- move BoilerpipeContentHandler (#1355)
add db6f790f7 Bump org.xerial.snappy:snappy-java from 1.1.10.3 to 1.1.10.4
add 315aec2f7 Merge pull request #1357 from
apache/dependabot/maven/org.xerial.snappy-snappy-java-1.1.10.4
add 967547418 Bump aws.version from 1.12.555 to 1.12.556
add 4fd18a9f5 Merge pull request #1358 from
apache/dependabot/maven/aws.version-1.12.556
add 147a1682e Bump aws.version from 1.12.556 to 1.12.557
add 960fba53d Merge pull request #1359 from
apache/dependabot/maven/aws.version-1.12.557
add caf312593 TIKA-4135 -- remove xerces2 as a dependency (#1360)
add 5361b6d12 TIKA-1599 (#1356)
add 79b1d9563 fix unit test that is failing in github actions' environment
add 07198df1d Bump com.google.cloud:google-cloud-storage from 2.27.0 to
2.27.1
add 3750ff054 Merge pull request #1361 from
apache/dependabot/maven/com.google.cloud-google-cloud-storage-2.27.1
add 4f0a0383a Bump aws.version from 1.12.557 to 1.12.558
add e431b8ba0 Merge pull request #1362 from
apache/dependabot/maven/aws.version-1.12.558
add ea44239bd Merge remote-tracking branch 'origin/main' into TIKA-3948
add fa65c11c3 merge upstream
No new revisions were added by this update.
Summary of changes:
.../{main-jdk17-build.yml => main-jdk21-build.yml} | 4 +-
CHANGES.txt | 14 ++
pom.xml | 3 +
tika-app/pom.xml | 2 +-
.../test/java/org/apache/tika/cli/TikaCLITest.java | 4 +-
.../src/test/resources/test-data/tika-config1.xml | 2 +-
tika-bom/pom.xml | 13 +-
tika-bundles/tika-bundle-standard/pom.xml | 8 +-
tika-core/pom.xml | 10 +-
.../filter/CaptureGroupMetadataFilter.java | 110 ++++++++++
.../java/org/apache/tika/utils/XMLReaderUtils.java | 10 +
.../tika/metadata/filter/TestMetadataFilter.java | 53 +++++
.../apache/tika/sax/CustomErrorHandlerTest.java | 2 +
.../tika/sax/ErrorResistantSAXParserFactory.java | 39 ----
...e.xml => TIKA-4133-capture-group-overwrite.xml} | 9 +-
...137-exclude.xml => TIKA-4133-capture-group.xml} | 9 +-
.../org/apache/tika/example/TIAParsingExample.java | 6 +-
tika-handlers/README.md | 2 +
.../tika-emitter-jdbc => tika-handlers}/pom.xml | 24 +-
.../tika-handler-boilerpipe}/pom.xml | 21 +-
.../sax/boilerpipe/BoilerpipeContentHandler.java | 0
tika-langdetect/tika-langdetect-optimaize/pom.xml | 13 +-
tika-parent/pom.xml | 97 ++++----
.../apache/tika/parser/gdal/TestGDALParser.java | 6 +-
.../src/test/resources/2.4.0-no-tesseract.txt | 8 +-
.../src/test/resources/2.4.0-tesseract.txt | 8 +-
.../src/test/resources/2.4.1-no-tesseract.txt | 8 +-
.../src/test/resources/2.4.1-tesseract.txt | 8 +-
.../tika-parser-tagsoup-module/pom.xml | 34 +++
.../tika/parser/html/tagsoup}/DataURIScheme.java | 2 +-
.../html/tagsoup}/DataURISchemeParseException.java | 2 +-
.../parser/html/tagsoup}/DataURISchemeUtil.java | 2 +-
.../parser/html/tagsoup}/DefaultHtmlMapper.java | 2 +-
.../parser/html/tagsoup}/HtmlEncodingDetector.java | 2 +-
.../tika/parser/html/tagsoup}/HtmlHandler.java | 2 +-
.../tika/parser/html/tagsoup}/HtmlMapper.java | 2 +-
.../tika/parser/html/tagsoup}/HtmlParser.java | 2 +-
.../parser/html/tagsoup}/IdentityHtmlMapper.java | 2 +-
.../html/tagsoup}/XHTMLDowngradeHandler.java | 2 +-
.../tagsoup}/charsetdetector/CharsetAliases.java | 6 +-
.../charsetdetector/CharsetDetectionResult.java | 2 +-
.../tagsoup}/charsetdetector/MetaProcessor.java | 6 +-
.../html/tagsoup}/charsetdetector/PreScanner.java | 2 +-
.../StandardHtmlEncodingDetector.java | 6 +-
.../charsets/ReplacementCharset.java | 2 +-
.../charsets/XUserDefinedCharset.java | 2 +-
.../org.apache.tika.detect.EncodingDetector | 2 +-
.../services/org.apache.tika.parser.Parser | 2 +-
.../StandardCharsets_unsupported_by_IANA.txt | 0
.../html/tagsoup}/DataURISchemeParserTest.java | 3 +-
.../html/tagsoup}/HtmlEncodingDetectorTest.java | 3 +-
.../tika/parser/html/tagsoup}/HtmlParserTest.java | 5 +-
.../tika/parser/html/tagsoup}/SrcDocTest.java | 2 +-
.../tagsoup}/StandardHtmlEncodingDetectorTest.java | 6 +-
.../org/apache/tika/parser/html/tika-config.xml | 4 +-
.../resources/test-documents/big-preamble.html | 0
.../test-documents/boilerplate-whitespace.html | 0
.../test/resources/test-documents/boilerplate.html | 0
.../testBoilerplateMissingSpace.html | 0
.../test/resources/test-documents/testHTML.html | 0
.../test-documents/testHTMLBadScript.html | 0
.../test-documents/testHTMLGoodScript.html | 0
.../testHTMLNoisyMetaEncoding_1.html | 0
.../testHTMLNoisyMetaEncoding_2.html | 0
.../testHTMLNoisyMetaEncoding_3.html | 0
.../testHTMLNoisyMetaEncoding_4.html | 0
.../test-documents/testHTML_charset_utf16le.html | Bin
.../test-documents/testHTML_charset_utf8.html | 0
.../testHTML_embedded_data_uri_js.html | 0
.../test-documents/testHTML_embedded_img.html | 0
.../testHTML_embedded_img_in_js.html | 0
.../resources/test-documents/testHTML_head.html | 0
.../test-documents/testHTML_metadata.html | 0
.../testHTML_metadata_two_titles.html | 0
.../resources/test-documents/testHTML_utf8.html | 0
.../test/resources/test-documents/testSrcDoc.html | 0
.../test-documents/testUserDefinedCharset.mhtml | 0
.../test/resources/test-documents/testXHTML.html | 0
.../src/test/resources/test-documents/tika434.html | 0
.../pom.xml | 46 +---
.../tika-parsers-ml/tika-age-recogniser/pom.xml | 2 +-
.../tika-parsers-ml/tika-parser-nlp-module/pom.xml | 4 +
.../tika-parsers-standard-modules/pom.xml | 1 -
.../tika-parser-html-commons/README.md | 22 --
.../tika-parser-html-commons/pom.xml | 74 -------
.../tika-parser-html-module/pom.xml | 5 +-
.../org/apache/tika/parser/html/JSoupParser.java | 243 +++++++++++++++++++++
.../services/org.apache.tika.parser.Parser | 2 +-
.../apache/tika/parser/html/HtmlParserTest.java | 121 ++++------
.../org/apache/tika/parser/html/tika-config.xml | 4 +-
.../tika/parser/mail/MailContentHandler.java | 4 +-
.../tika-parser-microsoft-module/pom.xml | 2 -
.../tika/parser/microsoft/JackcessExtractor.java | 6 +-
.../tika/parser/microsoft/OutlookExtractor.java | 6 +-
.../tika/parser/microsoft/chm/ChmParser.java | 6 +-
.../tika-parser-xml-module/pom.xml | 4 -
.../tika-parsers-standard-package/pom.xml | 2 +-
.../apache/tika/parser/TestXMLEntityExpansion.java | 4 +-
.../java/org/apache/tika/parser/XMLTestBase.java | 3 +-
.../tika/parser/microsoft/rtf/RTFParserTest.java | 2 +-
.../org/apache/tika/sax/BoilerpipeHandlerTest.java | 21 +-
tika-server/tika-server-core/pom.xml | 2 +-
.../tika/server/core/resource/TikaResource.java | 1 +
tika-server/tika-server-standard/pom.xml | 6 +-
104 files changed, 744 insertions(+), 449 deletions(-)
copy .github/workflows/{main-jdk17-build.yml => main-jdk21-build.yml} (96%)
create mode 100644
tika-core/src/main/java/org/apache/tika/metadata/filter/CaptureGroupMetadataFilter.java
delete mode 100644
tika-core/src/test/java/org/apache/tika/sax/ErrorResistantSAXParserFactory.java
copy
tika-core/src/test/resources/org/apache/tika/config/{TIKA-3137-exclude.xml =>
TIKA-4133-capture-group-overwrite.xml} (81%)
copy
tika-core/src/test/resources/org/apache/tika/config/{TIKA-3137-exclude.xml =>
TIKA-4133-capture-group.xml} (81%)
create mode 100644 tika-handlers/README.md
copy {tika-pipes/tika-emitters/tika-emitter-jdbc => tika-handlers}/pom.xml
(70%)
copy
{tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-jdbc-commons
=> tika-handlers/tika-handler-boilerpipe}/pom.xml (66%)
rename
{tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-html-commons
=>
tika-handlers/tika-handler-boilerpipe}/src/main/java/org/apache/tika/sax/boilerpipe/BoilerpipeContentHandler.java
(100%)
create mode 100644
tika-parsers/tika-parsers-extended/tika-parser-tagsoup-module/pom.xml
copy
tika-parsers/{tika-parsers-standard/tika-parsers-standard-modules/tika-parser-html-module/src/main/java/org/apache/tika/parser/html
=>
tika-parsers-extended/tika-parser-tagsoup-module/src/main/java/org/apache/tika/parser/html/tagsoup}/DataURIScheme.java
(98%)
copy
tika-parsers/{tika-parsers-standard/tika-parsers-standard-modules/tika-parser-html-module/src/main/java/org/apache/tika/parser/html
=>
tika-parsers-extended/tika-parser-tagsoup-module/src/main/java/org/apache/tika/parser/html/tagsoup}/DataURISchemeParseException.java
(95%)
copy
tika-parsers/{tika-parsers-standard/tika-parsers-standard-modules/tika-parser-html-module/src/main/java/org/apache/tika/parser/html
=>
tika-parsers-extended/tika-parser-tagsoup-module/src/main/java/org/apache/tika/parser/html/tagsoup}/DataURISchemeUtil.java
(98%)
copy
tika-parsers/{tika-parsers-standard/tika-parsers-standard-modules/tika-parser-html-module/src/main/java/org/apache/tika/parser/html
=>
tika-parsers-extended/tika-parser-tagsoup-module/src/main/java/org/apache/tika/parser/html/tagsoup}/DefaultHtmlMapper.java
(99%)
copy
tika-parsers/{tika-parsers-standard/tika-parsers-standard-modules/tika-parser-html-module/src/main/java/org/apache/tika/parser/html
=>
tika-parsers-extended/tika-parser-tagsoup-module/src/main/java/org/apache/tika/parser/html/tagsoup}/HtmlEncodingDetector.java
(99%)
copy
tika-parsers/{tika-parsers-standard/tika-parsers-standard-modules/tika-parser-html-module/src/main/java/org/apache/tika/parser/html
=>
tika-parsers-extended/tika-parser-tagsoup-module/src/main/java/org/apache/tika/parser/html/tagsoup}/HtmlHandler.java
(99%)
copy
tika-parsers/{tika-parsers-standard/tika-parsers-standard-modules/tika-parser-html-module/src/main/java/org/apache/tika/parser/html
=>
tika-parsers-extended/tika-parser-tagsoup-module/src/main/java/org/apache/tika/parser/html/tagsoup}/HtmlMapper.java
(98%)
rename
tika-parsers/{tika-parsers-standard/tika-parsers-standard-modules/tika-parser-html-module/src/main/java/org/apache/tika/parser/html
=>
tika-parsers-extended/tika-parser-tagsoup-module/src/main/java/org/apache/tika/parser/html/tagsoup}/HtmlParser.java
(99%)
copy
tika-parsers/{tika-parsers-standard/tika-parsers-standard-modules/tika-parser-html-module/src/main/java/org/apache/tika/parser/html
=>
tika-parsers-extended/tika-parser-tagsoup-module/src/main/java/org/apache/tika/parser/html/tagsoup}/IdentityHtmlMapper.java
(96%)
copy
tika-parsers/{tika-parsers-standard/tika-parsers-standard-modules/tika-parser-html-module/src/main/java/org/apache/tika/parser/html
=>
tika-parsers-extended/tika-parser-tagsoup-module/src/main/java/org/apache/tika/parser/html/tagsoup}/XHTMLDowngradeHandler.java
(98%)
copy
tika-parsers/{tika-parsers-standard/tika-parsers-standard-modules/tika-parser-html-module/src/main/java/org/apache/tika/parser/html
=>
tika-parsers-extended/tika-parser-tagsoup-module/src/main/java/org/apache/tika/parser/html/tagsoup}/charsetdetector/CharsetAliases.java
(97%)
copy
tika-parsers/{tika-parsers-standard/tika-parsers-standard-modules/tika-parser-html-module/src/main/java/org/apache/tika/parser/html
=>
tika-parsers-extended/tika-parser-tagsoup-module/src/main/java/org/apache/tika/parser/html/tagsoup}/charsetdetector/CharsetDetectionResult.java
(97%)
copy
tika-parsers/{tika-parsers-standard/tika-parsers-standard-modules/tika-parser-html-module/src/main/java/org/apache/tika/parser/html
=>
tika-parsers-extended/tika-parser-tagsoup-module/src/main/java/org/apache/tika/parser/html/tagsoup}/charsetdetector/MetaProcessor.java
(92%)
copy
tika-parsers/{tika-parsers-standard/tika-parsers-standard-modules/tika-parser-html-module/src/main/java/org/apache/tika/parser/html
=>
tika-parsers-extended/tika-parser-tagsoup-module/src/main/java/org/apache/tika/parser/html/tagsoup}/charsetdetector/PreScanner.java
(99%)
copy
tika-parsers/{tika-parsers-standard/tika-parsers-standard-modules/tika-parser-html-module/src/main/java/org/apache/tika/parser/html
=>
tika-parsers-extended/tika-parser-tagsoup-module/src/main/java/org/apache/tika/parser/html/tagsoup}/charsetdetector/StandardHtmlEncodingDetector.java
(95%)
copy
tika-parsers/{tika-parsers-standard/tika-parsers-standard-modules/tika-parser-html-module/src/main/java/org/apache/tika/parser/html
=>
tika-parsers-extended/tika-parser-tagsoup-module/src/main/java/org/apache/tika/parser/html/tagsoup}/charsetdetector/charsets/ReplacementCharset.java
(96%)
copy
tika-parsers/{tika-parsers-standard/tika-parsers-standard-modules/tika-parser-html-module/src/main/java/org/apache/tika/parser/html
=>
tika-parsers-extended/tika-parser-tagsoup-module/src/main/java/org/apache/tika/parser/html/tagsoup}/charsetdetector/charsets/XUserDefinedCharset.java
(96%)
copy
tika-parsers/{tika-parsers-standard/tika-parsers-standard-modules/tika-parser-html-module
=>
tika-parsers-extended/tika-parser-tagsoup-module}/src/main/resources/META-INF/services/org.apache.tika.detect.EncodingDetector
(93%)
copy {tika-core/src/test =>
tika-parsers/tika-parsers-extended/tika-parser-tagsoup-module/src/main}/resources/META-INF/services/org.apache.tika.parser.Parser
(94%)
copy
tika-parsers/{tika-parsers-standard/tika-parsers-standard-modules/tika-parser-html-module/src/main/resources/org/apache/tika/parser/html
=>
tika-parsers-extended/tika-parser-tagsoup-module/src/main/resources/org/apache/tika/parser/html/tagsoup}/StandardCharsets_unsupported_by_IANA.txt
(100%)
copy
tika-parsers/{tika-parsers-standard/tika-parsers-standard-modules/tika-parser-html-module/src/test/java/org/apache/tika/parser/html
=>
tika-parsers-extended/tika-parser-tagsoup-module/src/test/java/org/apache/tika/parser/html/tagsoup}/DataURISchemeParserTest.java
(96%)
copy
tika-parsers/{tika-parsers-standard/tika-parsers-standard-modules/tika-parser-html-module/src/test/java/org/apache/tika/parser/html
=>
tika-parsers-extended/tika-parser-tagsoup-module/src/test/java/org/apache/tika/parser/html/tagsoup}/HtmlEncodingDetectorTest.java
(97%)
copy
tika-parsers/{tika-parsers-standard/tika-parsers-standard-modules/tika-parser-html-module/src/test/java/org/apache/tika/parser/html
=>
tika-parsers-extended/tika-parser-tagsoup-module/src/test/java/org/apache/tika/parser/html/tagsoup}/HtmlParserTest.java
(99%)
copy
tika-parsers/{tika-parsers-standard/tika-parsers-standard-modules/tika-parser-html-module/src/test/java/org/apache/tika/parser/html
=>
tika-parsers-extended/tika-parser-tagsoup-module/src/test/java/org/apache/tika/parser/html/tagsoup}/SrcDocTest.java
(97%)
copy
tika-parsers/{tika-parsers-standard/tika-parsers-standard-modules/tika-parser-html-module/src/test/java/org/apache/tika/parser/html
=>
tika-parsers-extended/tika-parser-tagsoup-module/src/test/java/org/apache/tika/parser/html/tagsoup}/StandardHtmlEncodingDetectorTest.java
(98%)
copy
tika-parsers/{tika-parsers-standard/tika-parsers-standard-modules/tika-parser-html-module
=>
tika-parsers-extended/tika-parser-tagsoup-module}/src/test/resources/org/apache/tika/parser/html/tika-config.xml
(87%)
copy
tika-parsers/{tika-parsers-standard/tika-parsers-standard-modules/tika-parser-html-module
=>
tika-parsers-extended/tika-parser-tagsoup-module}/src/test/resources/test-documents/big-preamble.html
(100%)
copy
tika-parsers/{tika-parsers-standard/tika-parsers-standard-modules/tika-parser-html-module
=>
tika-parsers-extended/tika-parser-tagsoup-module}/src/test/resources/test-documents/boilerplate-whitespace.html
(100%)
copy
tika-parsers/{tika-parsers-standard/tika-parsers-standard-modules/tika-parser-html-module
=>
tika-parsers-extended/tika-parser-tagsoup-module}/src/test/resources/test-documents/boilerplate.html
(100%)
copy
tika-parsers/{tika-parsers-standard/tika-parsers-standard-modules/tika-parser-html-module
=>
tika-parsers-extended/tika-parser-tagsoup-module}/src/test/resources/test-documents/testBoilerplateMissingSpace.html
(100%)
copy
tika-parsers/{tika-parsers-standard/tika-parsers-standard-modules/tika-parser-html-module
=>
tika-parsers-extended/tika-parser-tagsoup-module}/src/test/resources/test-documents/testHTML.html
(100%)
copy
tika-parsers/{tika-parsers-standard/tika-parsers-standard-modules/tika-parser-html-module
=>
tika-parsers-extended/tika-parser-tagsoup-module}/src/test/resources/test-documents/testHTMLBadScript.html
(100%)
copy
tika-parsers/{tika-parsers-standard/tika-parsers-standard-modules/tika-parser-html-module
=>
tika-parsers-extended/tika-parser-tagsoup-module}/src/test/resources/test-documents/testHTMLGoodScript.html
(100%)
copy
tika-parsers/{tika-parsers-standard/tika-parsers-standard-modules/tika-parser-html-module
=>
tika-parsers-extended/tika-parser-tagsoup-module}/src/test/resources/test-documents/testHTMLNoisyMetaEncoding_1.html
(100%)
copy
tika-parsers/{tika-parsers-standard/tika-parsers-standard-modules/tika-parser-html-module
=>
tika-parsers-extended/tika-parser-tagsoup-module}/src/test/resources/test-documents/testHTMLNoisyMetaEncoding_2.html
(100%)
copy
tika-parsers/{tika-parsers-standard/tika-parsers-standard-modules/tika-parser-html-module
=>
tika-parsers-extended/tika-parser-tagsoup-module}/src/test/resources/test-documents/testHTMLNoisyMetaEncoding_3.html
(100%)
copy
tika-parsers/{tika-parsers-standard/tika-parsers-standard-modules/tika-parser-html-module
=>
tika-parsers-extended/tika-parser-tagsoup-module}/src/test/resources/test-documents/testHTMLNoisyMetaEncoding_4.html
(100%)
copy
tika-parsers/{tika-parsers-standard/tika-parsers-standard-modules/tika-parser-html-module
=>
tika-parsers-extended/tika-parser-tagsoup-module}/src/test/resources/test-documents/testHTML_charset_utf16le.html
(100%)
copy
tika-parsers/{tika-parsers-standard/tika-parsers-standard-modules/tika-parser-html-module
=>
tika-parsers-extended/tika-parser-tagsoup-module}/src/test/resources/test-documents/testHTML_charset_utf8.html
(100%)
copy
tika-parsers/{tika-parsers-standard/tika-parsers-standard-modules/tika-parser-html-module
=>
tika-parsers-extended/tika-parser-tagsoup-module}/src/test/resources/test-documents/testHTML_embedded_data_uri_js.html
(100%)
copy
tika-parsers/{tika-parsers-standard/tika-parsers-standard-modules/tika-parser-html-module
=>
tika-parsers-extended/tika-parser-tagsoup-module}/src/test/resources/test-documents/testHTML_embedded_img.html
(100%)
copy
tika-parsers/{tika-parsers-standard/tika-parsers-standard-modules/tika-parser-html-module
=>
tika-parsers-extended/tika-parser-tagsoup-module}/src/test/resources/test-documents/testHTML_embedded_img_in_js.html
(100%)
copy
tika-parsers/{tika-parsers-standard/tika-parsers-standard-modules/tika-parser-html-module
=>
tika-parsers-extended/tika-parser-tagsoup-module}/src/test/resources/test-documents/testHTML_head.html
(100%)
copy
tika-parsers/{tika-parsers-standard/tika-parsers-standard-modules/tika-parser-html-module
=>
tika-parsers-extended/tika-parser-tagsoup-module}/src/test/resources/test-documents/testHTML_metadata.html
(100%)
copy
tika-parsers/{tika-parsers-standard/tika-parsers-standard-modules/tika-parser-html-module
=>
tika-parsers-extended/tika-parser-tagsoup-module}/src/test/resources/test-documents/testHTML_metadata_two_titles.html
(100%)
copy
tika-parsers/{tika-parsers-standard/tika-parsers-standard-modules/tika-parser-html-module
=>
tika-parsers-extended/tika-parser-tagsoup-module}/src/test/resources/test-documents/testHTML_utf8.html
(100%)
copy
tika-parsers/{tika-parsers-standard/tika-parsers-standard-modules/tika-parser-html-module
=>
tika-parsers-extended/tika-parser-tagsoup-module}/src/test/resources/test-documents/testSrcDoc.html
(100%)
copy
tika-parsers/{tika-parsers-standard/tika-parsers-standard-modules/tika-parser-html-module
=>
tika-parsers-extended/tika-parser-tagsoup-module}/src/test/resources/test-documents/testUserDefinedCharset.mhtml
(100%)
copy
tika-parsers/{tika-parsers-standard/tika-parsers-standard-modules/tika-parser-html-module
=>
tika-parsers-extended/tika-parser-tagsoup-module}/src/test/resources/test-documents/testXHTML.html
(100%)
copy
tika-parsers/{tika-parsers-standard/tika-parsers-standard-modules/tika-parser-html-module
=>
tika-parsers-extended/tika-parser-tagsoup-module}/src/test/resources/test-documents/tika434.html
(100%)
copy tika-parsers/tika-parsers-extended/{tika-parser-sqlite3-package =>
tika-parser-tagsoup-package}/pom.xml (62%)
delete mode 100644
tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-html-commons/README.md
delete mode 100644
tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-html-commons/pom.xml
create mode 100644
tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-html-module/src/main/java/org/apache/tika/parser/html/JSoupParser.java