This is an automated email from the ASF dual-hosted git repository.

tallison pushed a change to branch TIKA-1599
in repository https://gitbox.apache.org/repos/asf/tika.git


    from 1d4e6ebb6 TIKA-1599 -- migrate to jsoup parser -- remove runtime 
exception
     add f05e9b45e TIKA-1599 -- migrate to jsoup parser -- mv tagsoup 
htmlparser to tika-parsrs-extended

No new revisions were added by this update.

Summary of changes:
 pom.xml                                            |   2 +
 tika-bom/pom.xml                                   |  11 ++-
 tika-parent/pom.xml                                |   5 +
 .../tika-parser-tagsoup-module/pom.xml             |  34 +++++++
 .../tika/parser/html/tagsoup}/DataURIScheme.java   |   2 +-
 .../html/tagsoup}/DataURISchemeParseException.java |   2 +-
 .../parser/html/tagsoup}/DataURISchemeUtil.java    |   2 +-
 .../parser/html/tagsoup}/DefaultHtmlMapper.java    |   2 +-
 .../parser/html/tagsoup}/HtmlEncodingDetector.java |   2 +-
 .../tika/parser/html/tagsoup}/HtmlHandler.java     |   2 +-
 .../tika/parser/html/tagsoup}/HtmlMapper.java      |   2 +-
 .../tika/parser/html/tagsoup}/HtmlParser.java      |   2 +-
 .../parser/html/tagsoup}/IdentityHtmlMapper.java   |   2 +-
 .../html/tagsoup}/XHTMLDowngradeHandler.java       |   2 +-
 .../tagsoup}/charsetdetector/CharsetAliases.java   |   6 +-
 .../charsetdetector/CharsetDetectionResult.java    |   2 +-
 .../tagsoup}/charsetdetector/MetaProcessor.java    |   6 +-
 .../html/tagsoup}/charsetdetector/PreScanner.java  |   2 +-
 .../StandardHtmlEncodingDetector.java              |   6 +-
 .../charsets/ReplacementCharset.java               |   2 +-
 .../charsets/XUserDefinedCharset.java              |   2 +-
 .../org.apache.tika.detect.EncodingDetector        |   2 +-
 .../services/org.apache.tika.parser.Parser         |   2 +-
 .../StandardCharsets_unsupported_by_IANA.txt       |   0
 .../html/tagsoup}/DataURISchemeParserTest.java     |   3 +-
 .../html/tagsoup}/HtmlEncodingDetectorTest.java    |   3 +-
 .../tika/parser/html/tagsoup}/HtmlParserTest.java  |   5 +-
 .../tika/parser/html/tagsoup}/SrcDocTest.java      |   2 +-
 .../tagsoup}/StandardHtmlEncodingDetectorTest.java |   6 +-
 .../org/apache/tika/parser/html/tika-config.xml    |   4 +-
 .../resources/test-documents/big-preamble.html     |   0
 .../test-documents/boilerplate-whitespace.html     |   0
 .../test/resources/test-documents/boilerplate.html |   0
 .../testBoilerplateMissingSpace.html               |   0
 .../test/resources/test-documents/testHTML.html    |   0
 .../test-documents/testHTMLBadScript.html          |   0
 .../test-documents/testHTMLGoodScript.html         |   0
 .../testHTMLNoisyMetaEncoding_1.html               |   0
 .../testHTMLNoisyMetaEncoding_2.html               |   0
 .../testHTMLNoisyMetaEncoding_3.html               |   0
 .../testHTMLNoisyMetaEncoding_4.html               |   0
 .../test-documents/testHTML_charset_utf16le.html   | Bin
 .../test-documents/testHTML_charset_utf8.html      |   0
 .../testHTML_embedded_data_uri_js.html             |   0
 .../test-documents/testHTML_embedded_img.html      |   0
 .../testHTML_embedded_img_in_js.html               |   0
 .../resources/test-documents/testHTML_head.html    |   0
 .../test-documents/testHTML_metadata.html          |   0
 .../testHTML_metadata_two_titles.html              |   0
 .../resources/test-documents/testHTML_utf8.html    |   0
 .../test/resources/test-documents/testSrcDoc.html  |   0
 .../test-documents/testUserDefinedCharset.mhtml    |   0
 .../test/resources/test-documents/testXHTML.html   |   0
 .../src/test/resources/test-documents/tika434.html |   0
 .../pom.xml                                        |  46 ++-------
 .../tika-parser-html-module/pom.xml                |   6 --
 .../org.apache.tika.detect.EncodingDetector        |   2 +-
 .../apache/tika/parser/html/HtmlParserTest.java    | 107 +++++++--------------
 ...TIKA-2273-exclude-encoding-detector-default.xml |   2 +-
 .../TIKA-2485-encoding-detector-mark-limits.xml    |   2 +-
 60 files changed, 138 insertions(+), 152 deletions(-)
 create mode 100644 
tika-parsers/tika-parsers-extended/tika-parser-tagsoup-module/pom.xml
 copy 
tika-parsers/{tika-parsers-standard/tika-parsers-standard-modules/tika-parser-html-module/src/main/java/org/apache/tika/parser/html
 => 
tika-parsers-extended/tika-parser-tagsoup-module/src/main/java/org/apache/tika/parser/html/tagsoup}/DataURIScheme.java
 (98%)
 copy 
tika-parsers/{tika-parsers-standard/tika-parsers-standard-modules/tika-parser-html-module/src/main/java/org/apache/tika/parser/html
 => 
tika-parsers-extended/tika-parser-tagsoup-module/src/main/java/org/apache/tika/parser/html/tagsoup}/DataURISchemeParseException.java
 (95%)
 copy 
tika-parsers/{tika-parsers-standard/tika-parsers-standard-modules/tika-parser-html-module/src/main/java/org/apache/tika/parser/html
 => 
tika-parsers-extended/tika-parser-tagsoup-module/src/main/java/org/apache/tika/parser/html/tagsoup}/DataURISchemeUtil.java
 (98%)
 copy 
tika-parsers/{tika-parsers-standard/tika-parsers-standard-modules/tika-parser-html-module/src/main/java/org/apache/tika/parser/html
 => 
tika-parsers-extended/tika-parser-tagsoup-module/src/main/java/org/apache/tika/parser/html/tagsoup}/DefaultHtmlMapper.java
 (99%)
 copy 
tika-parsers/{tika-parsers-standard/tika-parsers-standard-modules/tika-parser-html-module/src/main/java/org/apache/tika/parser/html
 => 
tika-parsers-extended/tika-parser-tagsoup-module/src/main/java/org/apache/tika/parser/html/tagsoup}/HtmlEncodingDetector.java
 (99%)
 copy 
tika-parsers/{tika-parsers-standard/tika-parsers-standard-modules/tika-parser-html-module/src/main/java/org/apache/tika/parser/html
 => 
tika-parsers-extended/tika-parser-tagsoup-module/src/main/java/org/apache/tika/parser/html/tagsoup}/HtmlHandler.java
 (99%)
 copy 
tika-parsers/{tika-parsers-standard/tika-parsers-standard-modules/tika-parser-html-module/src/main/java/org/apache/tika/parser/html
 => 
tika-parsers-extended/tika-parser-tagsoup-module/src/main/java/org/apache/tika/parser/html/tagsoup}/HtmlMapper.java
 (98%)
 rename 
tika-parsers/{tika-parsers-standard/tika-parsers-standard-modules/tika-parser-html-module/src/main/java/org/apache/tika/parser/html
 => 
tika-parsers-extended/tika-parser-tagsoup-module/src/main/java/org/apache/tika/parser/html/tagsoup}/HtmlParser.java
 (99%)
 copy 
tika-parsers/{tika-parsers-standard/tika-parsers-standard-modules/tika-parser-html-module/src/main/java/org/apache/tika/parser/html
 => 
tika-parsers-extended/tika-parser-tagsoup-module/src/main/java/org/apache/tika/parser/html/tagsoup}/IdentityHtmlMapper.java
 (96%)
 copy 
tika-parsers/{tika-parsers-standard/tika-parsers-standard-modules/tika-parser-html-module/src/main/java/org/apache/tika/parser/html
 => 
tika-parsers-extended/tika-parser-tagsoup-module/src/main/java/org/apache/tika/parser/html/tagsoup}/XHTMLDowngradeHandler.java
 (98%)
 copy 
tika-parsers/{tika-parsers-standard/tika-parsers-standard-modules/tika-parser-html-module/src/main/java/org/apache/tika/parser/html
 => 
tika-parsers-extended/tika-parser-tagsoup-module/src/main/java/org/apache/tika/parser/html/tagsoup}/charsetdetector/CharsetAliases.java
 (97%)
 copy 
tika-parsers/{tika-parsers-standard/tika-parsers-standard-modules/tika-parser-html-module/src/main/java/org/apache/tika/parser/html
 => 
tika-parsers-extended/tika-parser-tagsoup-module/src/main/java/org/apache/tika/parser/html/tagsoup}/charsetdetector/CharsetDetectionResult.java
 (97%)
 copy 
tika-parsers/{tika-parsers-standard/tika-parsers-standard-modules/tika-parser-html-module/src/main/java/org/apache/tika/parser/html
 => 
tika-parsers-extended/tika-parser-tagsoup-module/src/main/java/org/apache/tika/parser/html/tagsoup}/charsetdetector/MetaProcessor.java
 (92%)
 copy 
tika-parsers/{tika-parsers-standard/tika-parsers-standard-modules/tika-parser-html-module/src/main/java/org/apache/tika/parser/html
 => 
tika-parsers-extended/tika-parser-tagsoup-module/src/main/java/org/apache/tika/parser/html/tagsoup}/charsetdetector/PreScanner.java
 (99%)
 copy 
tika-parsers/{tika-parsers-standard/tika-parsers-standard-modules/tika-parser-html-module/src/main/java/org/apache/tika/parser/html
 => 
tika-parsers-extended/tika-parser-tagsoup-module/src/main/java/org/apache/tika/parser/html/tagsoup}/charsetdetector/StandardHtmlEncodingDetector.java
 (95%)
 copy 
tika-parsers/{tika-parsers-standard/tika-parsers-standard-modules/tika-parser-html-module/src/main/java/org/apache/tika/parser/html
 => 
tika-parsers-extended/tika-parser-tagsoup-module/src/main/java/org/apache/tika/parser/html/tagsoup}/charsetdetector/charsets/ReplacementCharset.java
 (96%)
 copy 
tika-parsers/{tika-parsers-standard/tika-parsers-standard-modules/tika-parser-html-module/src/main/java/org/apache/tika/parser/html
 => 
tika-parsers-extended/tika-parser-tagsoup-module/src/main/java/org/apache/tika/parser/html/tagsoup}/charsetdetector/charsets/XUserDefinedCharset.java
 (96%)
 copy 
tika-parsers/{tika-parsers-standard/tika-parsers-standard-modules/tika-parser-html-module
 => 
tika-parsers-extended/tika-parser-tagsoup-module}/src/main/resources/META-INF/services/org.apache.tika.detect.EncodingDetector
 (93%)
 copy {tika-core/src/test => 
tika-parsers/tika-parsers-extended/tika-parser-tagsoup-module/src/main}/resources/META-INF/services/org.apache.tika.parser.Parser
 (94%)
 copy 
tika-parsers/{tika-parsers-standard/tika-parsers-standard-modules/tika-parser-html-module/src/main/resources/org/apache/tika/parser/html
 => 
tika-parsers-extended/tika-parser-tagsoup-module/src/main/resources/org/apache/tika/parser/html/tagsoup}/StandardCharsets_unsupported_by_IANA.txt
 (100%)
 copy 
tika-parsers/{tika-parsers-standard/tika-parsers-standard-modules/tika-parser-html-module/src/test/java/org/apache/tika/parser/html
 => 
tika-parsers-extended/tika-parser-tagsoup-module/src/test/java/org/apache/tika/parser/html/tagsoup}/DataURISchemeParserTest.java
 (96%)
 copy 
tika-parsers/{tika-parsers-standard/tika-parsers-standard-modules/tika-parser-html-module/src/test/java/org/apache/tika/parser/html
 => 
tika-parsers-extended/tika-parser-tagsoup-module/src/test/java/org/apache/tika/parser/html/tagsoup}/HtmlEncodingDetectorTest.java
 (97%)
 copy 
tika-parsers/{tika-parsers-standard/tika-parsers-standard-modules/tika-parser-html-module/src/test/java/org/apache/tika/parser/html
 => 
tika-parsers-extended/tika-parser-tagsoup-module/src/test/java/org/apache/tika/parser/html/tagsoup}/HtmlParserTest.java
 (99%)
 copy 
tika-parsers/{tika-parsers-standard/tika-parsers-standard-modules/tika-parser-html-module/src/test/java/org/apache/tika/parser/html
 => 
tika-parsers-extended/tika-parser-tagsoup-module/src/test/java/org/apache/tika/parser/html/tagsoup}/SrcDocTest.java
 (97%)
 copy 
tika-parsers/{tika-parsers-standard/tika-parsers-standard-modules/tika-parser-html-module/src/test/java/org/apache/tika/parser/html
 => 
tika-parsers-extended/tika-parser-tagsoup-module/src/test/java/org/apache/tika/parser/html/tagsoup}/StandardHtmlEncodingDetectorTest.java
 (98%)
 copy 
tika-parsers/{tika-parsers-standard/tika-parsers-standard-modules/tika-parser-html-module
 => 
tika-parsers-extended/tika-parser-tagsoup-module}/src/test/resources/org/apache/tika/parser/html/tika-config.xml
 (87%)
 copy 
tika-parsers/{tika-parsers-standard/tika-parsers-standard-modules/tika-parser-html-module
 => 
tika-parsers-extended/tika-parser-tagsoup-module}/src/test/resources/test-documents/big-preamble.html
 (100%)
 copy 
tika-parsers/{tika-parsers-standard/tika-parsers-standard-modules/tika-parser-html-module
 => 
tika-parsers-extended/tika-parser-tagsoup-module}/src/test/resources/test-documents/boilerplate-whitespace.html
 (100%)
 copy 
tika-parsers/{tika-parsers-standard/tika-parsers-standard-modules/tika-parser-html-module
 => 
tika-parsers-extended/tika-parser-tagsoup-module}/src/test/resources/test-documents/boilerplate.html
 (100%)
 copy 
tika-parsers/{tika-parsers-standard/tika-parsers-standard-modules/tika-parser-html-module
 => 
tika-parsers-extended/tika-parser-tagsoup-module}/src/test/resources/test-documents/testBoilerplateMissingSpace.html
 (100%)
 copy 
tika-parsers/{tika-parsers-standard/tika-parsers-standard-modules/tika-parser-html-module
 => 
tika-parsers-extended/tika-parser-tagsoup-module}/src/test/resources/test-documents/testHTML.html
 (100%)
 copy 
tika-parsers/{tika-parsers-standard/tika-parsers-standard-modules/tika-parser-html-module
 => 
tika-parsers-extended/tika-parser-tagsoup-module}/src/test/resources/test-documents/testHTMLBadScript.html
 (100%)
 copy 
tika-parsers/{tika-parsers-standard/tika-parsers-standard-modules/tika-parser-html-module
 => 
tika-parsers-extended/tika-parser-tagsoup-module}/src/test/resources/test-documents/testHTMLGoodScript.html
 (100%)
 copy 
tika-parsers/{tika-parsers-standard/tika-parsers-standard-modules/tika-parser-html-module
 => 
tika-parsers-extended/tika-parser-tagsoup-module}/src/test/resources/test-documents/testHTMLNoisyMetaEncoding_1.html
 (100%)
 copy 
tika-parsers/{tika-parsers-standard/tika-parsers-standard-modules/tika-parser-html-module
 => 
tika-parsers-extended/tika-parser-tagsoup-module}/src/test/resources/test-documents/testHTMLNoisyMetaEncoding_2.html
 (100%)
 copy 
tika-parsers/{tika-parsers-standard/tika-parsers-standard-modules/tika-parser-html-module
 => 
tika-parsers-extended/tika-parser-tagsoup-module}/src/test/resources/test-documents/testHTMLNoisyMetaEncoding_3.html
 (100%)
 copy 
tika-parsers/{tika-parsers-standard/tika-parsers-standard-modules/tika-parser-html-module
 => 
tika-parsers-extended/tika-parser-tagsoup-module}/src/test/resources/test-documents/testHTMLNoisyMetaEncoding_4.html
 (100%)
 copy 
tika-parsers/{tika-parsers-standard/tika-parsers-standard-modules/tika-parser-html-module
 => 
tika-parsers-extended/tika-parser-tagsoup-module}/src/test/resources/test-documents/testHTML_charset_utf16le.html
 (100%)
 copy 
tika-parsers/{tika-parsers-standard/tika-parsers-standard-modules/tika-parser-html-module
 => 
tika-parsers-extended/tika-parser-tagsoup-module}/src/test/resources/test-documents/testHTML_charset_utf8.html
 (100%)
 copy 
tika-parsers/{tika-parsers-standard/tika-parsers-standard-modules/tika-parser-html-module
 => 
tika-parsers-extended/tika-parser-tagsoup-module}/src/test/resources/test-documents/testHTML_embedded_data_uri_js.html
 (100%)
 copy 
tika-parsers/{tika-parsers-standard/tika-parsers-standard-modules/tika-parser-html-module
 => 
tika-parsers-extended/tika-parser-tagsoup-module}/src/test/resources/test-documents/testHTML_embedded_img.html
 (100%)
 copy 
tika-parsers/{tika-parsers-standard/tika-parsers-standard-modules/tika-parser-html-module
 => 
tika-parsers-extended/tika-parser-tagsoup-module}/src/test/resources/test-documents/testHTML_embedded_img_in_js.html
 (100%)
 copy 
tika-parsers/{tika-parsers-standard/tika-parsers-standard-modules/tika-parser-html-module
 => 
tika-parsers-extended/tika-parser-tagsoup-module}/src/test/resources/test-documents/testHTML_head.html
 (100%)
 copy 
tika-parsers/{tika-parsers-standard/tika-parsers-standard-modules/tika-parser-html-module
 => 
tika-parsers-extended/tika-parser-tagsoup-module}/src/test/resources/test-documents/testHTML_metadata.html
 (100%)
 copy 
tika-parsers/{tika-parsers-standard/tika-parsers-standard-modules/tika-parser-html-module
 => 
tika-parsers-extended/tika-parser-tagsoup-module}/src/test/resources/test-documents/testHTML_metadata_two_titles.html
 (100%)
 copy 
tika-parsers/{tika-parsers-standard/tika-parsers-standard-modules/tika-parser-html-module
 => 
tika-parsers-extended/tika-parser-tagsoup-module}/src/test/resources/test-documents/testHTML_utf8.html
 (100%)
 copy 
tika-parsers/{tika-parsers-standard/tika-parsers-standard-modules/tika-parser-html-module
 => 
tika-parsers-extended/tika-parser-tagsoup-module}/src/test/resources/test-documents/testSrcDoc.html
 (100%)
 copy 
tika-parsers/{tika-parsers-standard/tika-parsers-standard-modules/tika-parser-html-module
 => 
tika-parsers-extended/tika-parser-tagsoup-module}/src/test/resources/test-documents/testUserDefinedCharset.mhtml
 (100%)
 copy 
tika-parsers/{tika-parsers-standard/tika-parsers-standard-modules/tika-parser-html-module
 => 
tika-parsers-extended/tika-parser-tagsoup-module}/src/test/resources/test-documents/testXHTML.html
 (100%)
 copy 
tika-parsers/{tika-parsers-standard/tika-parsers-standard-modules/tika-parser-html-module
 => 
tika-parsers-extended/tika-parser-tagsoup-module}/src/test/resources/test-documents/tika434.html
 (100%)
 copy tika-parsers/tika-parsers-extended/{tika-parser-sqlite3-package => 
tika-parser-tagsoup-package}/pom.xml (62%)

Reply via email to