Repository: tika Updated Branches: refs/heads/2.x c58af959b -> 74e998d0f
TIKA-1904 - Create Proxy Parser and Detectors Project: http://git-wip-us.apache.org/repos/asf/tika/repo Commit: http://git-wip-us.apache.org/repos/asf/tika/commit/74e998d0 Tree: http://git-wip-us.apache.org/repos/asf/tika/tree/74e998d0 Diff: http://git-wip-us.apache.org/repos/asf/tika/diff/74e998d0 Branch: refs/heads/2.x Commit: 74e998d0ff359813dc06c695a7e786694c818932 Parents: c58af95 Author: Bob Paulin <[email protected]> Authored: Sat Mar 19 15:48:42 2016 -0500 Committer: Bob Paulin <[email protected]> Committed: Sat Mar 19 15:48:42 2016 -0500 ---------------------------------------------------------------------- .../org/apache/tika/detect/DetectorProxy.java | 67 ++++++++++++++++ .../org/apache/tika/parser/ParserProxy.java | 83 ++++++++++++++++++++ .../apache/tika/detect/DetectorProxyTest.java | 54 +++++++++++++ .../apache/tika/detect/DummyProxyDetector.java | 31 ++++++++ .../apache/tika/parser/DummyProxyParser.java | 44 +++++++++++ .../org/apache/tika/parser/ParserProxyTest.java | 63 +++++++++++++++ 6 files changed, 342 insertions(+) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/tika/blob/74e998d0/tika-core/src/main/java/org/apache/tika/detect/DetectorProxy.java ---------------------------------------------------------------------- diff --git a/tika-core/src/main/java/org/apache/tika/detect/DetectorProxy.java b/tika-core/src/main/java/org/apache/tika/detect/DetectorProxy.java new file mode 100644 index 0000000..5714cd3 --- /dev/null +++ b/tika-core/src/main/java/org/apache/tika/detect/DetectorProxy.java @@ -0,0 +1,67 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.tika.detect; + +import java.io.IOException; +import java.io.InputStream; + +import org.apache.tika.config.LoadErrorHandler; +import org.apache.tika.metadata.Metadata; +import org.apache.tika.mime.MediaType; + +/** + * This detector is a proxy for another detector + * this allows modules to use detectors from other modules + * as optional dependencies since not including the classes + * simply does nothing rather than throwing a ClassNotFoundException. + * + * @since Apache Tika 2.0 + */ +public class DetectorProxy implements Detector +{ + private static final long serialVersionUID = 4534101565629801667L; + + private Detector detector; + + public DetectorProxy(String detectorClassName) + { + this(detectorClassName, LoadErrorHandler.WARN); + } + + public DetectorProxy(String detectorClassName, LoadErrorHandler handler) + { + try + { + this.detector = (Detector)Class.forName(detectorClassName).newInstance(); + } + catch (InstantiationException | IllegalAccessException | ClassNotFoundException e) + { + handler.handleLoadError(detectorClassName, e); + } + } + + @Override + public MediaType detect(InputStream input, Metadata metadata) throws IOException + { + if(detector != null) + { + return detector.detect(input, metadata); + } + return null; + } + +} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/tika/blob/74e998d0/tika-core/src/main/java/org/apache/tika/parser/ParserProxy.java ---------------------------------------------------------------------- diff --git a/tika-core/src/main/java/org/apache/tika/parser/ParserProxy.java b/tika-core/src/main/java/org/apache/tika/parser/ParserProxy.java new file mode 100644 index 0000000..b664c0a --- /dev/null +++ b/tika-core/src/main/java/org/apache/tika/parser/ParserProxy.java @@ -0,0 +1,83 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.tika.parser; + +import java.io.IOException; +import java.io.InputStream; +import java.util.Collections; +import java.util.Set; + +import org.apache.tika.config.LoadErrorHandler; +import org.apache.tika.exception.TikaException; +import org.apache.tika.metadata.Metadata; +import org.apache.tika.mime.MediaType; +import org.xml.sax.ContentHandler; +import org.xml.sax.SAXException; + +/** + * This parser is a proxy for another detector + * this allows modules to use parsers from other modules + * as optional dependencies since not including the classes + * simply does nothing rather than throwing a ClassNotFoundException. + * + * @since Apache Tika 2.0 + */ +public class ParserProxy extends AbstractParser +{ + + private static final long serialVersionUID = -4838436708916910179L; + private Parser parser; + + public ParserProxy(String parserClassName) + { + this(parserClassName, LoadErrorHandler.WARN); + } + + public ParserProxy(String parserClassName, LoadErrorHandler handler) + { + try + { + this.parser = (Parser)Class.forName(parserClassName).newInstance(); + } + catch (InstantiationException | IllegalAccessException | ClassNotFoundException e) + { + handler.handleLoadError(parserClassName, e); + } + + } + + @Override + public Set<MediaType> getSupportedTypes(ParseContext context) + { + if (parser == null) + { + return Collections.emptySet(); + } + return parser.getSupportedTypes(context); + } + + @Override + public void parse(InputStream stream, ContentHandler handler, Metadata metadata, ParseContext context) + throws IOException, SAXException, TikaException + { + if(parser != null) + { + parser.parse(stream, handler, metadata, context); + } + //Otherwise do nothing + } +} http://git-wip-us.apache.org/repos/asf/tika/blob/74e998d0/tika-core/src/test/java/org/apache/tika/detect/DetectorProxyTest.java ---------------------------------------------------------------------- diff --git a/tika-core/src/test/java/org/apache/tika/detect/DetectorProxyTest.java b/tika-core/src/test/java/org/apache/tika/detect/DetectorProxyTest.java new file mode 100644 index 0000000..800413d --- /dev/null +++ b/tika-core/src/test/java/org/apache/tika/detect/DetectorProxyTest.java @@ -0,0 +1,54 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.tika.detect; + +import static org.junit.Assert.*; + +import java.io.IOException; + +import org.apache.tika.config.LoadErrorHandler; +import org.apache.tika.mime.MediaType; +import org.junit.Test; + +public class DetectorProxyTest +{ + @Test + public void testDetectorProxyExists() throws IOException + { + Detector dummyDetector = new DetectorProxy("org.apache.tika.detect.DummyProxyDetector", + LoadErrorHandler.IGNORE); + + MediaType result = dummyDetector.detect(null, null); + + assertEquals("Detector being proxied exists so result should not be null", + MediaType.TEXT_PLAIN, result ); + + } + + @Test + public void testParserProxyNotExists() throws IOException + { + Detector dummyDetector = new DetectorProxy("org.apache.tika.detect.DoesNotExist", + LoadErrorHandler.IGNORE); + + MediaType result = dummyDetector.detect(null, null); + + assertNull("Detector being proxied does not exists so result should be null", result ); + + } + +} http://git-wip-us.apache.org/repos/asf/tika/blob/74e998d0/tika-core/src/test/java/org/apache/tika/detect/DummyProxyDetector.java ---------------------------------------------------------------------- diff --git a/tika-core/src/test/java/org/apache/tika/detect/DummyProxyDetector.java b/tika-core/src/test/java/org/apache/tika/detect/DummyProxyDetector.java new file mode 100644 index 0000000..a11b584 --- /dev/null +++ b/tika-core/src/test/java/org/apache/tika/detect/DummyProxyDetector.java @@ -0,0 +1,31 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.tika.detect; + +import java.io.IOException; +import java.io.InputStream; + +import org.apache.tika.metadata.Metadata; +import org.apache.tika.mime.MediaType; + +public class DummyProxyDetector implements Detector +{ + @Override + public MediaType detect(InputStream input, Metadata metadata) throws IOException { + return MediaType.TEXT_PLAIN; + } +} http://git-wip-us.apache.org/repos/asf/tika/blob/74e998d0/tika-core/src/test/java/org/apache/tika/parser/DummyProxyParser.java ---------------------------------------------------------------------- diff --git a/tika-core/src/test/java/org/apache/tika/parser/DummyProxyParser.java b/tika-core/src/test/java/org/apache/tika/parser/DummyProxyParser.java new file mode 100644 index 0000000..ca766c9 --- /dev/null +++ b/tika-core/src/test/java/org/apache/tika/parser/DummyProxyParser.java @@ -0,0 +1,44 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.tika.parser; + +import java.io.IOException; +import java.io.InputStream; +import java.util.Set; + +import org.apache.tika.exception.TikaException; +import org.apache.tika.metadata.Metadata; +import org.apache.tika.mime.MediaType; +import org.xml.sax.ContentHandler; +import org.xml.sax.SAXException; + +public class DummyProxyParser extends AbstractParser +{ + @Override + public Set<MediaType> getSupportedTypes(ParseContext context) + { + return null; + } + + @Override + public void parse(InputStream stream, ContentHandler handler, Metadata metadata, ParseContext context) + throws IOException, SAXException, TikaException + { + metadata.add("Test", "value"); + + } +} http://git-wip-us.apache.org/repos/asf/tika/blob/74e998d0/tika-core/src/test/java/org/apache/tika/parser/ParserProxyTest.java ---------------------------------------------------------------------- diff --git a/tika-core/src/test/java/org/apache/tika/parser/ParserProxyTest.java b/tika-core/src/test/java/org/apache/tika/parser/ParserProxyTest.java new file mode 100644 index 0000000..13a8665 --- /dev/null +++ b/tika-core/src/test/java/org/apache/tika/parser/ParserProxyTest.java @@ -0,0 +1,63 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.tika.parser; + +import static org.junit.Assert.*; + +import java.io.IOException; + +import org.apache.tika.config.LoadErrorHandler; +import org.apache.tika.exception.TikaException; +import org.apache.tika.metadata.Metadata; +import org.junit.Test; +import org.xml.sax.SAXException; + +public class ParserProxyTest +{ + + @Test + public void testParserProxyExists() throws IOException, SAXException, TikaException + { + Parser dummyParser = new ParserProxy("org.apache.tika.parser.DummyProxyParser", + LoadErrorHandler.IGNORE); + + Metadata metadata = new Metadata(); + + dummyParser.parse(null, null, metadata, null); + + assertEquals("Parser being proxied exists so metadata should be added", + 1, metadata.size()); + + } + + @Test + public void testParserProxyNotExists() throws IOException, SAXException, TikaException + { + Parser dummyParser = new ParserProxy("org.apache.tika.parser.NotExists", + LoadErrorHandler.IGNORE); + + Metadata metadata = new Metadata(); + + dummyParser.parse(null, null, metadata, null); + + assertEquals("Parser being proxied doesn't exist so metadata not change", + 0, metadata.size()); + + } + + +}
