Removed GRPC implementation as it is redundant over REST
Project: http://git-wip-us.apache.org/repos/asf/tika/repo Commit: http://git-wip-us.apache.org/repos/asf/tika/commit/f77eb2b5 Tree: http://git-wip-us.apache.org/repos/asf/tika/tree/f77eb2b5 Diff: http://git-wip-us.apache.org/repos/asf/tika/diff/f77eb2b5 Branch: refs/heads/TIKA-1508 Commit: f77eb2b5ab4e09208b77d3d860e6520151d20653 Parents: 3deea1b Author: Thamme Gowda <[email protected]> Authored: Fri Aug 12 19:25:32 2016 -0700 Committer: Thamme Gowda <[email protected]> Committed: Fri Aug 12 19:25:32 2016 -0700 ---------------------------------------------------------------------- .../tf/TensorflowGrpcRecogniser.java | 148 ------------------- .../tf/TensorflowImageRecParser.java | 2 +- .../ObjectRecognitionParserTest.java | 24 --- .../recognition/tika-config-tflow-addon.xml | 30 ---- 4 files changed, 1 insertion(+), 203 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/tika/blob/f77eb2b5/tika-parsers/src/main/java/org/apache/tika/parser/recognition/tf/TensorflowGrpcRecogniser.java ---------------------------------------------------------------------- diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/recognition/tf/TensorflowGrpcRecogniser.java b/tika-parsers/src/main/java/org/apache/tika/parser/recognition/tf/TensorflowGrpcRecogniser.java deleted file mode 100644 index 4a45587..0000000 --- a/tika-parsers/src/main/java/org/apache/tika/parser/recognition/tf/TensorflowGrpcRecogniser.java +++ /dev/null @@ -1,148 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.tika.parser.recognition.tf; - -import org.apache.tika.config.Field; -import org.apache.tika.config.Param; -import org.apache.tika.exception.TikaConfigException; -import org.apache.tika.exception.TikaException; -import org.apache.tika.metadata.Metadata; -import org.apache.tika.mime.MediaType; -import org.apache.tika.parser.ParseContext; -import org.apache.tika.parser.recognition.ObjectRecogniser; -import org.apache.tika.parser.recognition.RecognisedObject; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; -import org.xml.sax.ContentHandler; -import org.xml.sax.SAXException; - -import java.io.Closeable; -import java.io.File; -import java.io.IOException; -import java.io.InputStream; -import java.lang.reflect.InvocationTargetException; -import java.lang.reflect.Method; -import java.net.URL; -import java.net.URLClassLoader; -import java.util.ArrayList; -import java.util.List; -import java.util.Map; -import java.util.Set; - -/** - * Tensor Flow image recogniser which has high performance. - * This implementation takes addon jar and binds it using reflections without - * without corrupting classpath with incompatible version of dependencies. - * <p> - * The addon jar can be built from https://github.com/thammegowda/tensorflow-grpc-java - * - * @since Apache Tika 1.14 - */ -public class TensorflowGrpcRecogniser implements ObjectRecogniser, Closeable { - - private static final Logger LOG = LoggerFactory.getLogger(TensorflowGrpcRecogniser.class); - private static final String LABEL_LANG = "en"; - private static ClassLoader PARENT_CL = TensorflowGrpcRecogniser.class.getClassLoader(); - - static { - while (PARENT_CL.getParent() != null) { - PARENT_CL = PARENT_CL.getParent(); //move up the heighrarchy until we get the JDK classloader - } - } - - @Field - private String recogniserClass = "edu.usc.irds.tensorflow.grpc.TensorflowObjectRecogniser"; - - @Field - private String host = "localhost"; - - @Field - private int port = 9000; - - @Field(name = "addon", required = true) - private File addon; - - private boolean available; - - private Object instance; - private Method recogniseMethod; - private Method closeMethod; - - @Override - public Set<MediaType> getSupportedMimes() { - return TensorflowImageRecParser.SUPPORTED_MIMES; - } - - @Override - public boolean isAvailable() { - return available; - } - - @Override - public void initialize(Map<String, Param> params) throws TikaConfigException { - try { - if (!addon.exists()) { - throw new TikaConfigException("File " + addon + " doesnt exists"); - } - URL[] urls = {addon.getAbsoluteFile().toURI().toURL()}; - URLClassLoader loader = new URLClassLoader(urls, PARENT_CL); - Class<?> clazz = Class.forName(recogniserClass, true, loader); - instance = clazz.getConstructor(String.class, int.class) - .newInstance(host, port); - recogniseMethod = clazz.getMethod("recognise", InputStream.class); - closeMethod = clazz.getMethod("close"); - available = true; - } catch (Exception e) { - throw new TikaConfigException(e.getMessage(), e); - } - } - - @Override - public List<RecognisedObject> recognise(InputStream stream, - ContentHandler handler, Metadata metadata, ParseContext context) - throws IOException, SAXException, TikaException { - List<RecognisedObject> recObjs = new ArrayList<>(); - try { - Object result = recogniseMethod.invoke(instance, stream); - if (result != null) { - List<Map.Entry<String, Double>> objects = (List<Map.Entry<String, Double>>) result; - for (Map.Entry<String, Double> object : objects) { - RecognisedObject recObj = new RecognisedObject(object.getKey(), - LABEL_LANG, object.getKey(), object.getValue()); - recObjs.add(recObj); - } - } else { - LOG.warn("Result is null"); - } - } catch (IllegalAccessException | InvocationTargetException e) { - LOG.debug(e.getMessage(), e); - } - return recObjs; - } - - @Override - public void close() throws IOException { - if (closeMethod != null) { - try { - closeMethod.invoke(instance); - } catch (IllegalAccessException | InvocationTargetException e) { - LOG.debug(e.getMessage(), e); - } - } - } -} http://git-wip-us.apache.org/repos/asf/tika/blob/f77eb2b5/tika-parsers/src/main/java/org/apache/tika/parser/recognition/tf/TensorflowImageRecParser.java ---------------------------------------------------------------------- diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/recognition/tf/TensorflowImageRecParser.java b/tika-parsers/src/main/java/org/apache/tika/parser/recognition/tf/TensorflowImageRecParser.java index 7ed8ccb..5da88f6 100644 --- a/tika-parsers/src/main/java/org/apache/tika/parser/recognition/tf/TensorflowImageRecParser.java +++ b/tika-parsers/src/main/java/org/apache/tika/parser/recognition/tf/TensorflowImageRecParser.java @@ -60,7 +60,7 @@ import java.util.regex.Pattern; * <li> All dependencies of tensor flow (such as numpy) must also be available. <a href="https://www.tensorflow.org/versions/r0.9/tutorials/image_recognition/index.html#image-recognition">Follow the image recognition guide and make sure it works</a></li> * </ol> * </p> - * @see TensorflowGrpcRecogniser + * @see TensorflowRESTRecogniser * @since Apache Tika 1.14 */ public class TensorflowImageRecParser extends ExternalParser implements ObjectRecogniser { http://git-wip-us.apache.org/repos/asf/tika/blob/f77eb2b5/tika-parsers/src/test/java/org/apache/tika/parser/recognition/ObjectRecognitionParserTest.java ---------------------------------------------------------------------- diff --git a/tika-parsers/src/test/java/org/apache/tika/parser/recognition/ObjectRecognitionParserTest.java b/tika-parsers/src/test/java/org/apache/tika/parser/recognition/ObjectRecognitionParserTest.java index aaa458b..8d9bf49 100644 --- a/tika-parsers/src/test/java/org/apache/tika/parser/recognition/ObjectRecognitionParserTest.java +++ b/tika-parsers/src/test/java/org/apache/tika/parser/recognition/ObjectRecognitionParserTest.java @@ -39,7 +39,6 @@ import java.util.List; public class ObjectRecognitionParserTest { private static final String CONFIG_FILE = "org/apache/tika/parser/recognition/tika-config-tflow.xml"; - private static final String CONFIG_ADDON_FILE = "org/apache/tika/parser/recognition/tika-config-tflow-addon.xml"; private static final String CONFIG_REST_FILE = "org/apache/tika/parser/recognition/tika-config-tflow-rest.xml"; private static final String CAT_IMAGE = "test-documents/testJPEG.jpg"; private static final ClassLoader loader = ObjectRecognitionParserTest.class.getClassLoader(); @@ -67,29 +66,6 @@ public class ObjectRecognitionParserTest { } } - @Ignore("Configure addon path in tika-config.xml") - @Test - public void testAddonJar() throws Exception { - - try (InputStream stream = loader.getResourceAsStream(CONFIG_ADDON_FILE)){ - assert stream != null; - Tika tika = new Tika(new TikaConfig(stream)); - Metadata metadata = new Metadata(); - try (InputStream imageStream = loader.getResourceAsStream(CAT_IMAGE)){ - Reader reader = tika.parse(imageStream, metadata); - List<String> lines = IOUtils.readLines(reader); - String text = StringUtils.join(lines, " "); - String[] expectedObjects = {"Egyptian cat", "tabby cat"}; - String metaValues = StringUtils.join(metadata.getValues(ObjectRecognitionParser.MD_KEY), " "); - for (String expectedObject : expectedObjects) { - String message = "'" + expectedObject + "' must have been detected"; - Assert.assertTrue(message, text.contains(expectedObject)); - Assert.assertTrue(message, metaValues.contains(expectedObject)); - } - } - } - } - @Ignore("Configure Rest API service") @Test public void testREST() throws Exception { http://git-wip-us.apache.org/repos/asf/tika/blob/f77eb2b5/tika-parsers/src/test/resources/org/apache/tika/parser/recognition/tika-config-tflow-addon.xml ---------------------------------------------------------------------- diff --git a/tika-parsers/src/test/resources/org/apache/tika/parser/recognition/tika-config-tflow-addon.xml b/tika-parsers/src/test/resources/org/apache/tika/parser/recognition/tika-config-tflow-addon.xml deleted file mode 100644 index 349c7d4..0000000 --- a/tika-parsers/src/test/resources/org/apache/tika/parser/recognition/tika-config-tflow-addon.xml +++ /dev/null @@ -1,30 +0,0 @@ -<?xml version="1.0" encoding="UTF-8"?> -<!-- - ~ Licensed to the Apache Software Foundation (ASF) under one or more - ~ contributor license agreements. See the NOTICE file distributed with - ~ this work for additional information regarding copyright ownership. - ~ The ASF licenses this file to You under the Apache License, Version 2.0 - ~ (the "License"); you may not use this file except in compliance with - ~ the License. You may obtain a copy of the License at - ~ - ~ http://www.apache.org/licenses/LICENSE-2.0 - ~ - ~ Unless required by applicable law or agreed to in writing, software - ~ distributed under the License is distributed on an "AS IS" BASIS, - ~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ~ See the License for the specific language governing permissions and - ~ limitations under the License. - --> -<properties> - <parsers> - <parser class="org.apache.tika.parser.recognition.ObjectRecognitionParser"> - <mime>image/jpeg</mime> - <params> - <param name="topN" type="int">5</param> - <param name="minConfidence" type="double">0.015</param> - <param name="class" type="string">org.apache.tika.parser.recognition.tf.TensorflowGrpcRecogniser</param> - <param name="addon" type="file">../tensorflow-java-1.0-jar-with-dependencies.jar</param> - </params> - </parser> - </parsers> -</properties> \ No newline at end of file
