package com.kmslh.web.studio.source.impl;

import org.apache.tika.extractor.ParsingEmbeddedDocumentExtractor;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.parser.ParseContext;
import org.apache.tika.sax.XHTMLContentHandler;
import org.xml.sax.ContentHandler;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.AttributesImpl;

import java.io.IOException;
import java.io.InputStream;

public class MyEmbeddedDocumentExtractor extends ParsingEmbeddedDocumentExtractor {
    public MyEmbeddedDocumentExtractor(ParseContext context) {
        super(context);
    }

    @Override
    public void parseEmbedded(InputStream stream, ContentHandler handler, Metadata metadata, boolean outputHtml) throws SAXException, IOException {
        super.parseEmbedded(stream, handler, metadata, outputHtml);
        if (metadata.get("dataUri") != null) {
            String dataUri = metadata.get("dataUri");
            AttributesImpl attr = new AttributesImpl();
                attr.addAttribute("", "src", "src", "CDATA", dataUri);
            //attr.addAttribute("", "alt", "alt", "CDATA", filename);
            handler.startElement(XHTMLContentHandler.XHTML, "img", "img", attr);
            handler.endElement(XHTMLContentHandler.XHTML, "img", "img");
        }
    }
}
