This is an automated email from the ASF dual-hosted git repository. nick pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/tika.git
commit 4d3a43c1682ca88f8c5a88ea1b34cd6fb105f997 Author: Nick Burch <n...@gagravarr.org> AuthorDate: Thu Apr 27 15:31:16 2017 +0100 TIKA-2345 Tika Config Serialisation of EncodingDetector details --- .../apache/tika/config/TikaConfigSerializer.java | 36 +++++++++++++++++++++- 1 file changed, 35 insertions(+), 1 deletion(-) diff --git a/tika-core/src/main/java/org/apache/tika/config/TikaConfigSerializer.java b/tika-core/src/main/java/org/apache/tika/config/TikaConfigSerializer.java index 79e20ca..0701955 100644 --- a/tika-core/src/main/java/org/apache/tika/config/TikaConfigSerializer.java +++ b/tika-core/src/main/java/org/apache/tika/config/TikaConfigSerializer.java @@ -22,6 +22,7 @@ import javax.xml.transform.Transformer; import javax.xml.transform.TransformerFactory; import javax.xml.transform.dom.DOMSource; import javax.xml.transform.stream.StreamResult; + import java.io.Writer; import java.nio.charset.Charset; import java.util.Collections; @@ -30,8 +31,11 @@ import java.util.Set; import java.util.TreeSet; import org.apache.tika.detect.CompositeDetector; +import org.apache.tika.detect.CompositeEncodingDetector; import org.apache.tika.detect.DefaultDetector; +import org.apache.tika.detect.DefaultEncodingDetector; import org.apache.tika.detect.Detector; +import org.apache.tika.detect.EncodingDetector; import org.apache.tika.language.translate.DefaultTranslator; import org.apache.tika.language.translate.Translator; import org.apache.tika.mime.MediaType; @@ -79,6 +83,7 @@ public class TikaConfigSerializer { addMimeComment(mode, rootElement, doc); addServiceLoader(mode, rootElement, doc, config); addExecutorService(mode, rootElement, doc, config); + addEncodingDetectors(mode, rootElement, doc, config); addTranslator(mode, rootElement, doc, config); addDetectors(mode, rootElement, doc, config); addParsers(mode, rootElement, doc, config); @@ -97,7 +102,7 @@ public class TikaConfigSerializer { } private static void addExecutorService(Mode mode, Element rootElement, Document doc, TikaConfig config) { - //TODO + // TODO Implement the reverse of ExecutorServiceXmlLoader } private static void addServiceLoader(Mode mode, Element rootElement, Document doc, TikaConfig config) { @@ -146,6 +151,35 @@ public class TikaConfigSerializer { rootElement.appendChild(mimeComment); } + private static void addEncodingDetectors(Mode mode, Element rootElement, Document doc, TikaConfig config) throws Exception { + EncodingDetector encDetector = config.getEncodingDetector(); + + if (mode == Mode.MINIMAL && encDetector instanceof DefaultEncodingDetector) { + // Don't output anything, all using defaults + Node detComment = doc.createComment( + "for example: <encodingDetectors><encodingDetector class=\"" + + "org.apache.tika.detect.DefaultEncodingDetector\"></encodingDetectors>"); + rootElement.appendChild(detComment); + return; + } + + Element encDetectorsElement = doc.createElement("encodingDetectors"); + if (mode == Mode.CURRENT && encDetector instanceof DefaultEncodingDetector || + ! (encDetector instanceof CompositeEncodingDetector)) { + Element encDetectorElement = doc.createElement("encodingDetector"); + encDetectorElement.setAttribute("class", encDetector.getClass().getCanonicalName()); + encDetectorsElement.appendChild(encDetectorElement); + } else { + List<EncodingDetector> children = ((CompositeEncodingDetector)encDetector).getDetectors(); + for (EncodingDetector d : children) { + Element encDetectorElement = doc.createElement("encodingDetector"); + encDetectorElement.setAttribute("class", d.getClass().getCanonicalName()); + encDetectorsElement.appendChild(encDetectorElement); + } + } + rootElement.appendChild(encDetectorsElement); + } + private static void addDetectors(Mode mode, Element rootElement, Document doc, TikaConfig config) throws Exception { Detector detector = config.getDetector(); -- To stop receiving notification emails like this one, please contact "commits@tika.apache.org" <commits@tika.apache.org>.