This is an automated email from the ASF dual-hosted git repository.

tallison pushed a change to branch 
TIKA-4636-simplify-embedded-extractor-handling
in repository https://gitbox.apache.org/repos/asf/tika.git


    from d64041cf2d Merge origin/main into simplify-embedded-extractor-handling
     add 222e0859e3 TIKA-4638 -- unify sax style configuration (#2557)
     add 1828685d31 merge main, fix conflicts

No new revisions were added by this update.

Summary of changes:
 .../ParsingEmbeddedDocumentExtractor.java          |  18 +--
 .../tika/extractor/StandardExtractorFactory.java   |  11 +-
 .../java/org/apache/tika/parser/EmptyParser.java   |   2 +-
 .../tika/parser/external/ExternalParser.java       |   2 +-
 .../tika/parser/external2/ExternalParser.java      |   2 +-
 .../java/org/apache/tika/sax/SAXOutputConfig.java  |  76 +++++++++++++
 .../org/apache/tika/sax/XHTMLContentHandler.java   |  87 +++++++++++----
 .../apache/tika/sax/XHTMLContentHandlerTest.java   | 123 +++++++++++++++++++++
 .../org/apache/tika/example/RollbackSoftware.java  |   2 +-
 .../org/apache/custom/parser/MyCustomParser.java   |   2 +-
 .../apache/tika/parser/envi/EnviHeaderParser.java  |   2 +-
 .../org/apache/tika/parser/gdal/GDALParser.java    |   8 +-
 .../geoinfo/GeographicInformationParser.java       |   2 +-
 .../org/apache/tika/parser/grib/GribParser.java    |   2 +-
 .../java/org/apache/tika/parser/hdf/HDFParser.java |   2 +-
 .../apache/tika/parser/isatab/ISArchiveParser.java |   2 +-
 .../apache/tika/parser/netcdf/NetCDFParser.java    |   2 +-
 .../apache/tika/parser/ner/NamedEntityParser.java  |   2 +-
 .../parser/transcribe/aws/AmazonTranscribe.java    |   2 +-
 .../tika/parser/apple/AppleSingleFileParser.java   |   2 +-
 .../org/apache/tika/parser/apple/PListParser.java  |   2 +-
 .../tika/parser/iwork/IWorkPackageParser.java      |   2 +-
 .../parser/iwork/iwana/IWork13PackageParser.java   |   2 +-
 .../org/apache/tika/parser/audio/AudioParser.java  |   2 +-
 .../org/apache/tika/parser/audio/MidiParser.java   |   2 +-
 .../java/org/apache/tika/parser/mp3/Mp3Parser.java |   2 +-
 .../java/org/apache/tika/parser/mp4/MP4Parser.java |   2 +-
 .../org/apache/tika/parser/ogg/FlacParser.java     |   2 +-
 .../java/org/apache/tika/parser/ogg/OggParser.java |   2 +-
 .../org/apache/tika/parser/ogg/OpusParser.java     |   2 +-
 .../org/apache/tika/parser/ogg/SpeexParser.java    |   2 +-
 .../org/apache/tika/parser/ogg/TheoraParser.java   |   2 +-
 .../org/apache/tika/parser/ogg/VorbisParser.java   |   2 +-
 .../org/apache/tika/parser/video/FLVParser.java    |   2 +-
 .../org/apache/tika/parser/dgn/DGN8Parser.java     |   2 +-
 .../java/org/apache/tika/parser/dwg/DWGParser.java |   2 +-
 .../org/apache/tika/parser/dwg/DWGReadParser.java  |   2 +-
 .../java/org/apache/tika/parser/prt/PRTParser.java |   2 +-
 .../org/apache/tika/parser/asm/ClassParser.java    |   2 +-
 .../apache/tika/parser/asm/XHTMLClassVisitor.java  |   5 +-
 .../apache/tika/parser/code/SourceCodeParser.java  |   2 +-
 .../tika/parser/executable/ExecutableParser.java   |   2 +-
 .../executable/UniversalExecutableParser.java      |   2 +-
 .../java/org/apache/tika/parser/mat/MatParser.java |   2 +-
 .../org/apache/tika/parser/sas/SAS7BDATParser.java |   2 +-
 .../org/apache/tika/parser/crypto/TSDParser.java   |   2 +-
 .../tika/parser/font/AdobeFontMetricParser.java    |   2 +-
 .../apache/tika/parser/font/TrueTypeParser.java    |   2 +-
 .../org/apache/tika/parser/html/HtmlHandler.java   |   2 +-
 .../tika/parser/image/AbstractImageParser.java     |   4 +-
 .../org/apache/tika/parser/image/ICNSParser.java   |   2 +-
 .../org/apache/tika/parser/image/PSDParser.java    |   2 +-
 .../org/apache/tika/parser/image/WebPParser.java   |   2 +-
 .../apache/tika/parser/jdbc/AbstractDBParser.java  |   2 +-
 .../org/apache/tika/parser/mail/RFC822Parser.java  |   2 +-
 .../org/apache/tika/parser/mbox/MboxParser.java    |   2 +-
 .../apache/tika/parser/microsoft/EMFParser.java    |   2 +-
 .../tika/parser/microsoft/JackcessParser.java      |   2 +-
 .../tika/parser/microsoft/MSOwnerFileParser.java   |   2 +-
 .../apache/tika/parser/microsoft/OfficeParser.java |   2 +-
 .../tika/parser/microsoft/OldExcelParser.java      |   2 +-
 .../apache/tika/parser/microsoft/TNEFParser.java   |   2 +-
 .../apache/tika/parser/microsoft/WMFParser.java    |   2 +-
 .../microsoft/activemime/ActiveMimeParser.java     |   2 +-
 .../tika/parser/microsoft/chm/ChmParser.java       |   2 +-
 .../tika/parser/microsoft/libpst/LibPstParser.java |   2 +-
 .../parser/microsoft/onenote/OneNoteParser.java    |   2 +-
 .../microsoft/ooxml/AbstractOOXMLExtractor.java    |   2 +-
 .../ooxml/xwpf/ml2006/Word2006MLParser.java        |   2 +-
 .../parser/microsoft/pst/OutlookPSTParser.java     |   2 +-
 .../parser/microsoft/pst/PSTMailItemParser.java    |   2 +-
 .../tika/parser/microsoft/rtf/RTFParser.java       |   2 +-
 .../microsoft/xml/AbstractXML2003Parser.java       |   2 +-
 .../java/org/apache/tika/parser/dbf/DBFParser.java |   2 +-
 .../java/org/apache/tika/parser/dif/DIFParser.java |   2 +-
 .../org/apache/tika/parser/epub/EpubParser.java    |   2 +-
 .../org/apache/tika/parser/hwp/HwpV5Parser.java    |   2 +-
 .../apache/tika/parser/indesign/IDMLParser.java    |   2 +-
 .../java/org/apache/tika/parser/mif/MIFParser.java |   2 +-
 .../tika/parser/odf/FlatOpenDocumentParser.java    |   2 +-
 .../tika/parser/odf/OpenDocumentContentParser.java |   2 +-
 .../apache/tika/parser/odf/OpenDocumentParser.java |   2 +-
 .../tika/parser/wordperfect/QuattroProParser.java  |   2 +-
 .../tika/parser/wordperfect/WordPerfectParser.java |   2 +-
 .../org/apache/tika/parser/feed/FeedParser.java    |   2 +-
 .../apache/tika/parser/iptc/IptcAnpaParser.java    |   2 +-
 .../apache/tika/parser/ocr/TesseractOCRParser.java |   2 +-
 .../apache/tika/parser/pdf/AbstractPDF2XHTML.java  |   2 +-
 .../java/org/apache/tika/parser/pdf/PDFParser.java |   2 +-
 .../apache/tika/parser/pkg/CompressorParser.java   |   2 +-
 .../org/apache/tika/parser/pkg/PackageParser.java  |   2 +-
 .../java/org/apache/tika/parser/pkg/RarParser.java |   2 +-
 .../org/apache/tika/parser/pkg/UnrarParser.java    |   2 +-
 .../apache/tika/parser/csv/TextAndCSVParser.java   |   8 +-
 .../tika/parser/strings/Latin1StringsParser.java   |   2 +-
 .../apache/tika/parser/strings/StringsParser.java  |   2 +-
 .../java/org/apache/tika/parser/txt/TXTParser.java |   2 +-
 .../org/apache/tika/parser/http/HttpParser.java    |   2 +-
 .../org/apache/tika/parser/wacz/WACZParser.java    |   2 +-
 .../org/apache/tika/parser/warc/WARCParser.java    |   2 +-
 .../java/org/apache/tika/parser/tmx/TMXParser.java |   2 +-
 .../apache/tika/parser/xliff/XLIFF12Parser.java    |   2 +-
 .../org/apache/tika/parser/xliff/XLZParser.java    |   2 +-
 .../java/org/apache/tika/parser/xml/XMLParser.java |   2 +-
 .../tika/parser/AutoDetectReaderParserTest.java    |   2 +-
 .../org/apache/tika/config/loader/TikaLoader.java  |   2 +
 106 files changed, 384 insertions(+), 150 deletions(-)
 create mode 100644 
tika-core/src/main/java/org/apache/tika/sax/SAXOutputConfig.java

Reply via email to