This is an automated email from the ASF dual-hosted git repository.

tballison pushed a change to branch TIKA-4728-js-in-pdf
in repository https://gitbox.apache.org/repos/asf/tika.git


    from 571ffb5b23 TIKA-4728 - general xhtml fixes and tests
     add 155d2f6806 TIKA-4728 - add strict validation as an option

No new revisions were added by this update.

Summary of changes:
 .../tika/sax/BasicContentHandlerFactory.java       |  28 +++
 .../org/apache/tika/sax/StrictXHTMLValidator.java  | 227 +++++++++++++++++++++
 .../tika/AbstractXHTMLWellFormednessTest.java      |  84 --------
 .../src/test/java/org/apache/tika/TikaTest.java    |  61 +++---
 tika-parent/pom.xml                                |  23 ---
 .../tika/parser/XHTMLWellFormednessTest.java       |  22 --
 .../tika/parser/iwork/XHTMLWellFormednessTest.java |  22 --
 .../parser/audiovideo/XHTMLWellFormednessTest.java |  22 --
 .../tika/parser/dwg/XHTMLWellFormednessTest.java   |  22 --
 .../apache/tika/parser/code/SourceCodeParser.java  |  25 +++
 .../tika/parser/code/SourceCodeParserTest.java     |   1 -
 .../tika/parser/code/XHTMLWellFormednessTest.java  |  22 --
 .../parser/crypto/XHTMLWellFormednessTest.java     |  22 --
 .../tika/parser/font/XHTMLWellFormednessTest.java  |  22 --
 .../tika/parser/html/XHTMLWellFormednessTest.java  |  22 --
 .../tika/parser/image/XHTMLWellFormednessTest.java |  22 --
 .../tika/parser/mail/XHTMLWellFormednessTest.java  |  22 --
 .../parser/microsoft/XHTMLWellFormednessTest.java  |  22 --
 .../parser/miscoffice/XHTMLWellFormednessTest.java |  22 --
 .../tika/parser/feed/XHTMLWellFormednessTest.java  |  22 --
 .../tika/parser/ocr/XHTMLWellFormednessTest.java   |  22 --
 .../org/apache/tika/parser/pdf/PDFParserTest.java  |   5 +-
 .../tika/parser/pdf/XHTMLWellFormednessTest.java   |  22 --
 .../tika/parser/pkg/XHTMLWellFormednessTest.java   |  35 ----
 .../java/org/apache/tika/parser/txt/TXTParser.java |  27 ++-
 .../tika/parser/txt/XHTMLWellFormednessTest.java   |  22 --
 .../tika/parser/warc/XHTMLWellFormednessTest.java  |  22 --
 .../tika/parser/xml/XHTMLWellFormednessTest.java   |  22 --
 28 files changed, 328 insertions(+), 584 deletions(-)
 create mode 100644 
tika-core/src/main/java/org/apache/tika/sax/StrictXHTMLValidator.java
 delete mode 100644 
tika-core/src/test/java/org/apache/tika/AbstractXHTMLWellFormednessTest.java
 delete mode 100644 
tika-parsers/tika-parsers-standard/tika-parsers-standard-integration-tests/src/test/java/org/apache/tika/parser/XHTMLWellFormednessTest.java
 delete mode 100644 
tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-apple-module/src/test/java/org/apache/tika/parser/iwork/XHTMLWellFormednessTest.java
 delete mode 100644 
tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-audiovideo-module/src/test/java/org/apache/tika/parser/audiovideo/XHTMLWellFormednessTest.java
 delete mode 100644 
tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-cad-module/src/test/java/org/apache/tika/parser/dwg/XHTMLWellFormednessTest.java
 delete mode 100644 
tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-code-module/src/test/java/org/apache/tika/parser/code/XHTMLWellFormednessTest.java
 delete mode 100644 
tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-crypto-module/src/test/java/org/apache/tika/parser/crypto/XHTMLWellFormednessTest.java
 delete mode 100644 
tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-font-module/src/test/java/org/apache/tika/parser/font/XHTMLWellFormednessTest.java
 delete mode 100644 
tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-html-module/src/test/java/org/apache/tika/parser/html/XHTMLWellFormednessTest.java
 delete mode 100644 
tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-image-module/src/test/java/org/apache/tika/parser/image/XHTMLWellFormednessTest.java
 delete mode 100644 
tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-mail-module/src/test/java/org/apache/tika/parser/mail/XHTMLWellFormednessTest.java
 delete mode 100644 
tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/test/java/org/apache/tika/parser/microsoft/XHTMLWellFormednessTest.java
 delete mode 100644 
tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-miscoffice-module/src/test/java/org/apache/tika/parser/miscoffice/XHTMLWellFormednessTest.java
 delete mode 100644 
tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-news-module/src/test/java/org/apache/tika/parser/feed/XHTMLWellFormednessTest.java
 delete mode 100644 
tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-ocr-module/src/test/java/org/apache/tika/parser/ocr/XHTMLWellFormednessTest.java
 delete mode 100644 
tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/test/java/org/apache/tika/parser/pdf/XHTMLWellFormednessTest.java
 delete mode 100644 
tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pkg-module/src/test/java/org/apache/tika/parser/pkg/XHTMLWellFormednessTest.java
 delete mode 100644 
tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-text-module/src/test/java/org/apache/tika/parser/txt/XHTMLWellFormednessTest.java
 delete mode 100644 
tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-webarchive-module/src/test/java/org/apache/tika/parser/warc/XHTMLWellFormednessTest.java
 delete mode 100644 
tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-xml-module/src/test/java/org/apache/tika/parser/xml/XHTMLWellFormednessTest.java

Reply via email to