[ https://issues.apache.org/jira/browse/TIKA-2400?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16261065#comment-16261065 ]
ASF GitHub Bot commented on TIKA-2400: -------------------------------------- chrismattmann closed pull request #208: Fix for TIKA-2400 Standardizing current Object Recognition REST parsers URL: https://github.com/apache/tika/pull/208 This is a PR merged from a forked repository. As GitHub hides the original diff on merge, it is displayed below for the sake of provenance: As this is a foreign pull request (from a fork), the diff is supplied below (as it won't show otherwise due to GitHub magic): diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/captioning/tf/TensorflowRESTCaptioner.java b/tika-parsers/src/main/java/org/apache/tika/parser/captioning/tf/TensorflowRESTCaptioner.java index d49ef0fed..5fd9d9a97 100644 --- a/tika-parsers/src/main/java/org/apache/tika/parser/captioning/tf/TensorflowRESTCaptioner.java +++ b/tika-parsers/src/main/java/org/apache/tika/parser/captioning/tf/TensorflowRESTCaptioner.java @@ -72,16 +72,16 @@ MediaType.image("gif") }))); - private static final String LABEL_LANG = "en"; + private static final String LABEL_LANG = "eng"; @Field - private URI apiBaseUri; + private URI apiBaseUri = URI.create("http://localhost:8764/inception/v3"); @Field - private int captions; + private int captions = 5; @Field - private int maxCaptionLength; + private int maxCaptionLength = 15; private URI apiUri; @@ -107,7 +107,7 @@ public boolean isAvailable() { public void initialize(Map<String, Param> params) throws TikaConfigException { try { healthUri = URI.create(apiBaseUri + "/ping"); - apiUri = URI.create(apiBaseUri + String.format(Locale.getDefault(), "/captions?beam_size=%1$d&max_caption_length=%2$d", + apiUri = URI.create(apiBaseUri + String.format(Locale.getDefault(), "/caption/image?beam_size=%1$d&max_caption_length=%2$d", captions, maxCaptionLength)); DefaultHttpClient client = new DefaultHttpClient(); diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/recognition/ObjectRecognitionParser.java b/tika-parsers/src/main/java/org/apache/tika/parser/recognition/ObjectRecognitionParser.java index 37caf4538..a5a126ba9 100644 --- a/tika-parsers/src/main/java/org/apache/tika/parser/recognition/ObjectRecognitionParser.java +++ b/tika-parsers/src/main/java/org/apache/tika/parser/recognition/ObjectRecognitionParser.java @@ -55,11 +55,9 @@ * <properties> * <parsers> * <parser class="org.apache.tika.parser.recognition.ObjectRecognitionParser"> - * <mime>image/jpeg</mime> * <params> - * <param name="topN" type="int">2</param> - * <param name="minConfidence" type="double">0.015</param> * <param name="class" type="string">org.apache.tika.parser.recognition.tf.TensorflowRESTRecogniser</param> + * <param name="class" type="string">org.apache.tika.parser.captioning.tf.TensorflowRESTCaptioner</param> * </params> * </parser> * </parsers> @@ -83,12 +81,6 @@ public int compare(RecognisedObject o1, RecognisedObject o2) { } }; - @Field - private double minConfidence = 0.05; - - @Field - private int topN = 2; - private ObjectRecogniser recogniser; @Field(name = "class") @@ -102,7 +94,6 @@ public void initialize(Map<String, Param> params) throws TikaConfigException { recogniser.initialize(params); LOG.info("Recogniser = {}", recogniser.getClass().getName()); LOG.info("Recogniser Available = {}", recogniser.isAvailable()); - LOG.info("minConfidence = {}, topN={}", minConfidence, topN); } @Override @@ -140,29 +131,17 @@ public synchronized void parse(InputStream stream, ContentHandler handler, Metad for (RecognisedObject object : objects) { if (object instanceof CaptionObject) { if (xhtmlStartVal == null) xhtmlStartVal = "captions"; - LOG.debug("Add {}", object); - String mdValue = String.format(Locale.ENGLISH, "%s (%.5f)", - object.getLabel(), object.getConfidence()); - metadata.add(MD_KEY_IMG_CAP, mdValue); - acceptedObjects.add(object); + String labelAndConfidence = String.format(Locale.ENGLISH, "%s (%.5f)", object.getLabel(), object.getConfidence()); + metadata.add(MD_KEY_IMG_CAP, labelAndConfidence); xhtmlIds.add(String.valueOf(count++)); } else { if (xhtmlStartVal == null) xhtmlStartVal = "objects"; - if (object.getConfidence() >= minConfidence) { - count++; - LOG.info("Add {}", object); - String mdValue = String.format(Locale.ENGLISH, "%s (%.5f)", - object.getLabel(), object.getConfidence()); - metadata.add(MD_KEY_OBJ_REC, mdValue); - acceptedObjects.add(object); - xhtmlIds.add(object.getId()); - if (count >= topN) { - break; - } - } else { - LOG.warn("Object {} confidence {} less than min {}", object, object.getConfidence(), minConfidence); - } + String labelAndConfidence = String.format(Locale.ENGLISH, "%s (%.5f)", object.getLabel(), object.getConfidence()); + metadata.add(MD_KEY_OBJ_REC, labelAndConfidence); + xhtmlIds.add(object.getId()); } + LOG.info("Add {}", object); + acceptedObjects.add(object); } XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata); xhtml.startDocument(); @@ -183,4 +162,4 @@ public synchronized void parse(InputStream stream, ContentHandler handler, Metad metadata.add("no.objects", Boolean.TRUE.toString()); } } -} +} \ No newline at end of file diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/recognition/tf/TensorflowRESTRecogniser.java b/tika-parsers/src/main/java/org/apache/tika/parser/recognition/tf/TensorflowRESTRecogniser.java index f45c2a95c..c83ea1eb0 100644 --- a/tika-parsers/src/main/java/org/apache/tika/parser/recognition/tf/TensorflowRESTRecogniser.java +++ b/tika-parsers/src/main/java/org/apache/tika/parser/recognition/tf/TensorflowRESTRecogniser.java @@ -23,6 +23,7 @@ import java.net.URI; import java.util.ArrayList; import java.util.List; +import java.util.Locale; import java.util.Map; import java.util.Set; import java.util.Collections; @@ -56,11 +57,16 @@ * Tensor Flow image recogniser which has high performance. * This implementation uses Tensorflow via REST API. * <p> - * NOTE : //TODO: link to wiki page here + * NOTE : https://wiki.apache.org/tika/TikaAndVision * * @since Apache Tika 1.14 */ public class TensorflowRESTRecogniser implements ObjectRecogniser { + + /** + * Some variables are protected, because this class is extended by TensorflowRESTVideoRecognizer class + */ + private static final Logger LOG = LoggerFactory.getLogger(TensorflowRESTRecogniser.class); private static final Set<MediaType> SUPPORTED_MIMES = Collections.unmodifiableSet( @@ -70,22 +76,27 @@ MediaType.image("gif") }))); - /** - * Maximum buffer size for image - */ - private static final String LABEL_LANG = "en"; + protected static final String LABEL_LANG = "eng"; + + @Field + protected URI apiBaseUri = URI.create("http://localhost:8764/inception/v4"); @Field - private URI apiUri = URI.create("http://localhost:8764/inception/v4/classify?topk=10"); + protected int topN = 2; + @Field - private URI healthUri = URI.create("http://localhost:8764/inception/v4/ping"); + protected double minConfidence = 0.015; - private boolean available; - - protected URI getApiUri(Metadata metadata){ - return apiUri; + protected URI apiUri; + + protected URI healthUri; + + protected boolean available; + + protected URI getApiUri(Metadata metadata) { + return apiUri; } - + @Override public Set<MediaType> getSupportedMimes() { return SUPPORTED_MIMES; @@ -99,10 +110,16 @@ public boolean isAvailable() { @Override public void initialize(Map<String, Param> params) throws TikaConfigException { try { + healthUri = URI.create(apiBaseUri + "/ping"); + apiUri = URI.create(apiBaseUri + String.format(Locale.getDefault(), "/classify/image?topn=%1$d&min_confidence=%2$f", + topN, minConfidence)); + DefaultHttpClient client = new DefaultHttpClient(); HttpResponse response = client.execute(new HttpGet(healthUri)); available = response.getStatusLine().getStatusCode() == 200; + LOG.info("Available = {}, API Status = {}", available, response.getStatusLine()); + LOG.info("topN = {}, minConfidence = {}", topN, minConfidence); } catch (Exception e) { available = false; throw new TikaConfigException(e.getMessage(), e); @@ -140,9 +157,9 @@ public void checkInitialization(InitializableProblemHandler handler) JSONObject jReply = new JSONObject(replyMessage); JSONArray jClasses = jReply.getJSONArray("classnames"); JSONArray jConfidence = jReply.getJSONArray("confidence"); - if (jClasses.length() != jConfidence.length()) { - LOG.warn("Classes of size {} is not equal to confidence of size {}", jClasses.length(), jConfidence.length()); - } + if (jClasses.length() != jConfidence.length()) { + LOG.warn("Classes of size {} is not equal to confidence of size {}", jClasses.length(), jConfidence.length()); + } assert jClasses.length() == jConfidence.length(); for (int i = 0; i < jClasses.length(); i++) { RecognisedObject recObj = new RecognisedObject(jClasses.getString(i), @@ -160,4 +177,4 @@ public void checkInitialization(InitializableProblemHandler handler) LOG.debug("Num Objects found {}", recObjs.size()); return recObjs; } -} +} \ No newline at end of file diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/recognition/tf/TensorflowRESTVideoRecogniser.java b/tika-parsers/src/main/java/org/apache/tika/parser/recognition/tf/TensorflowRESTVideoRecogniser.java index ec16c7895..2e7b1f156 100644 --- a/tika-parsers/src/main/java/org/apache/tika/parser/recognition/tf/TensorflowRESTVideoRecogniser.java +++ b/tika-parsers/src/main/java/org/apache/tika/parser/recognition/tf/TensorflowRESTVideoRecogniser.java @@ -17,63 +17,91 @@ package org.apache.tika.parser.recognition.tf; +import java.io.ByteArrayOutputStream; import java.io.IOException; import java.io.InputStream; import java.net.URI; -import java.util.Collections; +import java.util.Locale; +import java.util.Map; import java.util.Set; +import java.util.Collections; +import java.util.HashSet; import javax.ws.rs.core.UriBuilder; +import org.apache.http.HttpResponse; +import org.apache.http.client.methods.HttpGet; +import org.apache.http.client.methods.HttpPost; +import org.apache.http.entity.ByteArrayEntity; +import org.apache.http.impl.client.DefaultHttpClient; import org.apache.tika.Tika; import org.apache.tika.config.Field; +import org.apache.tika.config.Param; import org.apache.tika.config.TikaConfig; +import org.apache.tika.exception.TikaConfigException; +import org.apache.tika.exception.TikaException; +import org.apache.tika.io.IOUtils; import org.apache.tika.metadata.Metadata; import org.apache.tika.mime.MediaType; import org.apache.tika.mime.MimeType; import org.apache.tika.mime.MimeTypeException; +import org.apache.tika.parser.ParseContext; +import org.apache.tika.parser.recognition.RecognisedObject; +import org.json.JSONArray; +import org.json.JSONObject; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import org.xml.sax.ContentHandler; +import org.xml.sax.SAXException; /** * Tensor Flow video recogniser which has high performance. * This implementation uses Tensorflow via REST API. * <p> - * NOTE : //TODO: link to wiki page here + * NOTE : https://wiki.apache.org/tika/TikaAndVisionVideo * * @since Apache Tika 1.15 */ -public class TensorflowRESTVideoRecogniser extends TensorflowRESTRecogniser{ +public class TensorflowRESTVideoRecogniser extends TensorflowRESTRecogniser { - private static final Logger LOG = LoggerFactory.getLogger(TensorflowRESTRecogniser.class); + private static final Logger LOG = LoggerFactory.getLogger(TensorflowRESTVideoRecogniser.class); - private static final Set<MediaType> SUPPORTED_MIMES = Collections.singleton(MediaType.video("mp4"));; + private static final Set<MediaType> SUPPORTED_MIMES = Collections.singleton(MediaType.video("mp4")); @Field - private URI apiUri = URI.create("http://localhost:8764/inception/v4/classify/video?topk=10"); + private String mode = "fixed"; @Override - public Set<MediaType> getSupportedMimes() { - return SUPPORTED_MIMES; + protected URI getApiUri(Metadata metadata) { + TikaConfig config = TikaConfig.getDefaultConfig(); + String ext = null; + //Find extension for video. It's required for OpenCV in InceptionAPI to decode video + try { + MimeType mimeType = config.getMimeRepository().forName(metadata.get("Content-Type")); + ext = mimeType.getExtension(); + return UriBuilder.fromUri(apiUri).queryParam("ext", ext).build(); + } catch (MimeTypeException e) { + LOG.error("Can't find extension from metadata"); + return apiUri; + } } - + @Override - protected URI getApiUri(Metadata metadata){ - - TikaConfig config = TikaConfig.getDefaultConfig(); - String ext = null; - //Find extension for video. It's required for OpenCv in InceptionAPI to decode video - try { - MimeType mimeType = config.getMimeRepository().forName(metadata.get("Content-Type")); - ext = mimeType.getExtension(); - - return UriBuilder.fromUri(apiUri).queryParam("ext", ext).build(); - - } catch (MimeTypeException e) { - LOG.error("Can't find extension from metadata"); - return apiUri; - } + public void initialize(Map<String, Param> params) throws TikaConfigException { + try { + healthUri = URI.create(apiBaseUri + "/ping"); + apiUri = URI.create(apiBaseUri + String.format(Locale.getDefault(), "/classify/video?topn=%1$d&min_confidence=%2$f&mode=%3$s", + topN, minConfidence, mode)); + + DefaultHttpClient client = new DefaultHttpClient(); + HttpResponse response = client.execute(new HttpGet(healthUri)); + available = response.getStatusLine().getStatusCode() == 200; + + LOG.info("Available = {}, API Status = {}", available, response.getStatusLine()); + LOG.info("topN = {}, minConfidence = {}", topN, minConfidence); + } catch (Exception e) { + available = false; + throw new TikaConfigException(e.getMessage(), e); + } } - - } diff --git a/tika-parsers/src/main/resources/org/apache/tika/parser/captioning/tf/Im2txtRestDockerfile b/tika-parsers/src/main/resources/org/apache/tika/parser/captioning/tf/Im2txtRestDockerfile index 93354c46b..1d9016dd6 100644 --- a/tika-parsers/src/main/resources/org/apache/tika/parser/captioning/tf/Im2txtRestDockerfile +++ b/tika-parsers/src/main/resources/org/apache/tika/parser/captioning/tf/Im2txtRestDockerfile @@ -15,54 +15,46 @@ # limitations under the License. # -FROM inception-rest-tika +FROM uscdatascience/tf-tika-base MAINTAINER Apache Tika Team -# Install python pillow. TODO: Remove this with the fix for TIKA 2398 -RUN pip install pillow - # Download the pretrained im2txt checkpoint WORKDIR /usr/share/apache-tika/models/dl/image/caption/ -RUN \ +RUN echo "We're downloading the checkpoint file for image captioning, the shell might look unresponsive. Please be patient." && \ # To get rid of early EOF error git config --global http.postBuffer 1048576000 && \ - echo "We're downloading the checkpoint file for image captioning, the shell might look unresponsive. Please be patient." && \ - git clone -b models https://github.com/USCDataScience/img2text.git && \ + git clone https://github.com/USCDataScience/img2text.git && \ # Join the parts - cat img2text/models/1M_iters_ckpt_parts_a* >1M_iters_ckpt.tar.gz && \ - tar -xzvf 1M_iters_ckpt.tar.gz && \ - # Delete all files except 1M_iters_ckpt - rm -rf {1M_iters_ckpt.tar.gz,img2text} + cat img2text/models/1M_iters_ckpt_parts_* >1M_iters_ckpt.tar.gz && \ + tar -xzvf 1M_iters_ckpt.tar.gz && rm -rf 1M_iters_ckpt.tar.gz -RUN \ - wget https://raw.githubusercontent.com/apache/tika/master/tika-parsers/src/main/resources/org/apache/tika/parser/captioning/tf/model_info.xml \ - -O model_info.xml +RUN curl -O https://raw.githubusercontent.com/apache/tika/master/tika-parsers/src/main/resources/org/apache/tika/parser/captioning/tf/model_info.xml # Download server related source files WORKDIR /usr/share/apache-tika/src/dl/image/caption/ -RUN \ - wget https://raw.githubusercontent.com/apache/tika/master/tika-parsers/src/main/resources/org/apache/tika/parser/captioning/tf/caption_generator.py \ - -O caption_generator.py && \ - - wget https://raw.githubusercontent.com/apache/tika/master/tika-parsers/src/main/resources/org/apache/tika/parser/captioning/tf/im2txtapi.py \ - -O im2txtapi.py && \ - - wget https://raw.githubusercontent.com/apache/tika/master/tika-parsers/src/main/resources/org/apache/tika/parser/captioning/tf/model_wrapper.py \ - -O model_wrapper.py && \ - - wget https://raw.githubusercontent.com/apache/tika/master/tika-parsers/src/main/resources/org/apache/tika/parser/captioning/tf/vocabulary.py \ - -O vocabulary.py && \ - +RUN curl -O https://raw.githubusercontent.com/ThejanW/tika/master/tika-parsers/src/main/resources/org/apache/tika/parser/captioning/tf/caption_generator.py && \ + curl -O https://raw.githubusercontent.com/ThejanW/tika/master/tika-parsers/src/main/resources/org/apache/tika/parser/captioning/tf/im2txtapi.py && \ + curl -O https://raw.githubusercontent.com/ThejanW/tika/master/tika-parsers/src/main/resources/org/apache/tika/parser/captioning/tf/model_wrapper.py && \ + curl -O https://raw.githubusercontent.com/ThejanW/tika/master/tika-parsers/src/main/resources/org/apache/tika/parser/captioning/tf/vocabulary.py && \ chmod +x im2txtapi.py WORKDIR / +# Remove unnecessary packages comes with base +RUN apt-get -y remove \ + apt-utils \ + curl \ + git \ + software-properties-common \ + unzip \ + wget + # Add symbolic link to im2txtapi.py RUN ln -s /usr/share/apache-tika/src/dl/image/caption/im2txtapi.py /usr/bin/im2txtapi # expose API port, this is the default port EXPOSE 8764 -CMD im2txtapi \ No newline at end of file +CMD im2txtapi diff --git a/tika-parsers/src/main/resources/org/apache/tika/parser/captioning/tf/im2txtapi.py b/tika-parsers/src/main/resources/org/apache/tika/parser/captioning/tf/im2txtapi.py index 75f5e8775..97f1f2afd 100644 --- a/tika-parsers/src/main/resources/org/apache/tika/parser/captioning/tf/im2txtapi.py +++ b/tika-parsers/src/main/resources/org/apache/tika/parser/captioning/tf/im2txtapi.py @@ -34,16 +34,19 @@ from __future__ import division from __future__ import print_function +import json +import logging import math import requests -import xml.etree.ElementTree as ET -import logging import sys -from time import time -from PIL import Image + +from flask import Flask, request, Response, jsonify from io import BytesIO -import flask +from PIL import Image +from time import time + import tensorflow as tf +import xml.etree.ElementTree as ET import model_wrapper import vocabulary @@ -94,7 +97,7 @@ tf.logging.set_verbosity(tf.logging.INFO) -class Initializer(flask.Flask): +class Initializer(Flask): """ Class to initialize the REST API, this class loads the model from the given checkpoint path in model_info.xml and prepares a caption_generator object @@ -123,9 +126,6 @@ def current_time(): return int(1000 * time()) -from flask import request, Response, jsonify -import json - app = Initializer(__name__) @@ -158,17 +158,17 @@ def index(): <li> <code>/inception/v3/ping </code> - <br/> <b> Description : </b> checks availability of the service. returns "pong" with status 200 when it is available </li> - <li> <code>/inception/v3/captions</code> - <br/> + <li> <code>/inception/v3/caption/image</code> - <br/> <table> <tr><th align="left"> Description </th><td> This is a service that can caption images</td></tr> <tr><th align="left"> How to supply Image Content </th></tr> <tr><th align="left"> With HTTP GET : </th> <td> Include a query parameter <code>url </code> which is an http url of JPEG image <br/> - Example: <code> curl "localhost:8764/inception/v3/captions?url=http://xyz.com/example.jpg"</code> + Example: <code> curl "localhost:8764/inception/v3/caption/image?url=http://xyz.com/example.jpg"</code> </td></tr> <tr><th align="left"> With HTTP POST :</th><td> POST JPEG image content as binary data in request body. <br/> - Example: <code> curl -X POST "localhost:8764/inception/v3/captions" --data-binary @example.jpg </code> + Example: <code> curl -X POST "localhost:8764/inception/v3/caption/image" --data-binary @example.jpg </code> </td></tr> </table> </li> @@ -184,8 +184,8 @@ def ping_pong(): return "pong" -@app.route("/inception/v3/captions", methods=["GET", "POST"]) -def gen_captions(): +@app.route("/inception/v3/caption/image", methods=["GET", "POST"]) +def caption_image(): """API to caption images""" image_format = "not jpeg" @@ -201,7 +201,7 @@ def gen_captions(): url = request.args.get("url") c_type, image_data = get_remote_file(url) if not image_data: - return flask.Response(status=400, response=jsonify(error="Could not HTTP GET %s" % url)) + return Response(status=400, response=jsonify(error="Could not HTTP GET %s" % url)) if 'image/jpeg' in c_type: image_format = "jpeg" @@ -217,7 +217,7 @@ def gen_captions(): rgb_image = image.convert("RGB") # convert the RGB image to jpeg image_bytes = BytesIO() - rgb_image.save(image_bytes, format="jpeg") + rgb_image.save(image_bytes, format="jpeg", quality=95) jpg_image = image_bytes.getvalue() image_bytes.close() diff --git a/tika-parsers/src/main/resources/org/apache/tika/parser/recognition/tf/BaseTFDockerfile b/tika-parsers/src/main/resources/org/apache/tika/parser/recognition/tf/BaseTFDockerfile new file mode 100644 index 000000000..b2cc4114e --- /dev/null +++ b/tika-parsers/src/main/resources/org/apache/tika/parser/recognition/tf/BaseTFDockerfile @@ -0,0 +1,50 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +FROM ubuntu:16.04 +MAINTAINER Apache Tika Team + +RUN apt-get update && apt-get install -y --no-install-recommends \ + apt-utils \ + curl \ + git \ + python \ + python2.7-dev \ + software-properties-common \ + unzip \ + wget \ + && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists/* + +# Install pip +RUN curl -O https://bootstrap.pypa.io/get-pip.py && \ + python get-pip.py && \ + rm get-pip.py + +# Install basic python requirements +RUN pip --no-cache-dir install \ + flask \ + numpy \ + pillow \ + requests + +# Install tensorflow:1.3.0 for python 2.7 +ENV TF_BINARY_URL https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.4.0-cp27-none-linux_x86_64.whl +RUN pip --no-cache-dir install $TF_BINARY_URL + +CMD ["python"] \ No newline at end of file diff --git a/tika-parsers/src/main/resources/org/apache/tika/parser/recognition/tf/InceptionRestDockerfile b/tika-parsers/src/main/resources/org/apache/tika/parser/recognition/tf/InceptionRestDockerfile index 1c5ad4d7f..355c262be 100644 --- a/tika-parsers/src/main/resources/org/apache/tika/parser/recognition/tf/InceptionRestDockerfile +++ b/tika-parsers/src/main/resources/org/apache/tika/parser/recognition/tf/InceptionRestDockerfile @@ -14,37 +14,40 @@ # See the License for the specific language governing permissions and # limitations under the License. # -FROM ubuntu -MAINTAINER Thamme Gowda <thammego...@apache.org> and Kranthi Kiran GV <kkran...@student.nitw.ac.in> +FROM uscdatascience/tf-tika-base +MAINTAINER Apache Tika Team -# Install missing part of ubuntu core + python stuff -RUN apt-get update && \ - apt-get install -y python-pip python-dev wget libtcmalloc-minimal4 git unzip +# Download the pretrained inception v4 checkpoint & other meta files +WORKDIR /usr/share/apache-tika/models/dl/image-video/recognition/ -# Install tensorflow and other dependencies -RUN \ - pip install --upgrade https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.0.1-cp27-none-linux_x86_64.whl && \ - pip install flask requests pillow +RUN curl -O http://download.tensorflow.org/models/inception_v4_2016_09_09.tar.gz && \ + tar -xzvf inception_v4_2016_09_09.tar.gz && rm -rf inception_v4_2016_09_09.tar.gz && \ + curl -O https://raw.githubusercontent.com/tensorflow/models/master/research/inception/inception/data/imagenet_lsvrc_2015_synsets.txt && \ + curl -O https://raw.githubusercontent.com/tensorflow/models/master/research/inception/inception/data/imagenet_metadata.txt -# Get the TF-slim dependencies -# Downloading from a specific commit for future compatibility -RUN wget https://github.com/tensorflow/models/archive/c15fada28113eca32dc98d6e3bec4755d0d5b4c2.zip && \ - unzip c15fada28113eca32dc98d6e3bec4755d0d5b4c2.zip +# Download server related source files +WORKDIR /usr/share/apache-tika/src/dl/image-video/recognition/ -RUN \ - wget https://raw.githubusercontent.com/ThejanW/tika/master/tika-parsers/src/main/resources/org/apache/tika/parser/recognition/tf/inceptionapi.py -O /usr/bin/inceptionapi.py && \ - chmod +x /usr/bin/inceptionapi.py +RUN curl -O https://raw.githubusercontent.com/ThejanW/tika/master/tika-parsers/src/main/resources/org/apache/tika/parser/recognition/tf/inceptionapi.py && \ + curl -O https://raw.githubusercontent.com/ThejanW/tika/master/tika-parsers/src/main/resources/org/apache/tika/parser/recognition/tf/inception_v4.py && \ + chmod +x inceptionapi.py -# clean up cache, so we can publish smaller image to hub -RUN apt-get clean -ENV PYTHONPATH="$PYTHONPATH:/models-c15fada28113eca32dc98d6e3bec4755d0d5b4c2/slim" -ENV LD_PRELOAD="/usr/lib/libtcmalloc_minimal.so.4" +WORKDIR / -ENV PYTHONPATH="$PYTHONPATH:$PATH" -RUN python -c "import inceptionapi" +# Remove unnecessary packages comes with base +RUN apt-get -y remove \ + apt-utils \ + curl \ + git \ + software-properties-common \ + unzip \ + wget -# expose API port, this is the default port +# Add symbolic link to inceptionapi.py +RUN ln -s /usr/share/apache-tika/src/dl/image-video/recognition/inceptionapi.py /usr/bin/inceptionapi + +# Expose API port, this is the default port EXPOSE 8764 -CMD inceptionapi.py +CMD inceptionapi \ No newline at end of file diff --git a/tika-parsers/src/main/resources/org/apache/tika/parser/recognition/tf/InceptionVideoRestDockerfile b/tika-parsers/src/main/resources/org/apache/tika/parser/recognition/tf/InceptionVideoRestDockerfile index 2d1cac2be..367045141 100644 --- a/tika-parsers/src/main/resources/org/apache/tika/parser/recognition/tf/InceptionVideoRestDockerfile +++ b/tika-parsers/src/main/resources/org/apache/tika/parser/recognition/tf/InceptionVideoRestDockerfile @@ -15,37 +15,29 @@ # limitations under the License. # -FROM ubuntu:16.04 -MAINTAINER Thamme Gowda <thammego...@apache.org> and Kranthi Kiran GV <kkran...@student.nitw.ac.in> and \ - Madhav Sharan <msha...@usc.edu> and Chris Mattmann <mattm...@apache.org> - -# Install missing part of ubuntu core + python + building dependencies -RUN apt-get update && \ - apt-get install -y wget git unzip curl \ - libtcmalloc-minimal4 software-properties-common apt-utils \ - build-essential cmake pkg-config \ - libjpeg8-dev libtiff5-dev libjasper-dev libpng12-dev \ - libavcodec-dev libavformat-dev libswscale-dev libv4l-dev \ - libxvidcore-dev libx264-dev \ - libgtk2.0-dev \ - libatlas-base-dev gfortran \ - python2.7-dev \ - python-pip - -# Install ffmpeg -RUN apt-get install -y ffmpeg - -RUN \ - pip install --upgrade pip && \ - pip install numpy +FROM uscdatascience/tf-tika-base +MAINTAINER Apache Tika Team + +RUN apt-get update && apt-get install -y --no-install-recommends \ + build-essential \ + cmake \ + libavcodec-dev \ + libavformat-dev \ + libswscale-dev \ + libv4l-dev \ + libavutil-dev \ + pkg-config \ + && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists/* # Download OpenCV + OpenCV_Contrib WORKDIR /usr/local/src RUN \ - wget -O opencv.zip https://github.com/Itseez/opencv/archive/3.2.0.zip && \ + wget -O opencv.zip https://github.com/opencv/opencv/archive/3.2.0.zip && \ unzip opencv.zip && \ - wget -O opencv_contrib.zip https://github.com/Itseez/opencv_contrib/archive/3.2.0.zip && \ + wget -O opencv_contrib.zip https://github.com/opencv/opencv_contrib/archive/3.2.0.zip && \ unzip opencv_contrib.zip # Build + Install OpenCV @@ -53,42 +45,64 @@ RUN mkdir -p opencv-3.2.0/build WORKDIR /usr/local/src/opencv-3.2.0/build RUN cmake -D CMAKE_BUILD_TYPE=RELEASE \ -D CMAKE_INSTALL_PREFIX=/usr/local \ - -D INSTALL_PYTHON_EXAMPLES=ON \ - -D INSTALL_C_EXAMPLES=OFF \ -D OPENCV_EXTRA_MODULES_PATH=../../opencv_contrib-3.2.0/modules \ - -D BUILD_EXAMPLES=ON .. -RUN make -j4 -RUN make install + -D BUILD_PYTHON_SUPPORT=ON \ + -D BUILD_EXAMPLES=OFF \ + -D PYTHON_DEFAULT_EXECUTABLE=/usr/bin/python3 \ + -D BUILD_opencv_python3=OFF \ + -D BUILD_opencv_python2=ON \ + -D WITH_IPP=OFF \ + -D WITH_FFMPEG=ON \ + -D WITH_V4L=ON .. -WORKDIR / +RUN make -j$(nproc) && make install -# Install tensorflow and other dependencies -RUN \ - pip install --upgrade https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.0.1-cp27-none-linux_x86_64.whl --ignore-installed && \ - pip install flask requests pillow +# Download the pretrained inception v4 checkpoint & other meta files +WORKDIR /usr/share/apache-tika/models/dl/image-video/recognition/ -# Get the TF-slim dependencies -# Downloading from a specific commit for future compatibility -RUN wget https://github.com/tensorflow/models/archive/c15fada28113eca32dc98d6e3bec4755d0d5b4c2.zip +RUN curl -O http://download.tensorflow.org/models/inception_v4_2016_09_09.tar.gz && \ + tar -xzvf inception_v4_2016_09_09.tar.gz && rm -rf inception_v4_2016_09_09.tar.gz && \ + curl -O https://raw.githubusercontent.com/tensorflow/models/master/research/inception/inception/data/imagenet_lsvrc_2015_synsets.txt && \ + curl -O https://raw.githubusercontent.com/tensorflow/models/master/research/inception/inception/data/imagenet_metadata.txt -RUN unzip c15fada28113eca32dc98d6e3bec4755d0d5b4c2.zip +# Download server related source files +WORKDIR /usr/share/apache-tika/src/dl/image-video/recognition/ -RUN \ - wget https://raw.githubusercontent.com/apache/tika/master/tika-parsers/src/main/resources/org/apache/tika/parser/recognition/tf/inceptionapi.py -O /usr/bin/inceptionapi.py && \ - wget https://raw.githubusercontent.com/apache/tika/master/tika-parsers/src/main/resources/org/apache/tika/parser/recognition/tf/video_util.py -O /usr/bin/video_util.py && \ - chmod +x /usr/bin/inceptionapi.py && \ - chmod +x /usr/bin/video_util.py +RUN curl -O https://raw.githubusercontent.com/ThejanW/tika/master/tika-parsers/src/main/resources/org/apache/tika/parser/recognition/tf/inceptionapi.py && \ + curl -O https://raw.githubusercontent.com/ThejanW/tika/master/tika-parsers/src/main/resources/org/apache/tika/parser/recognition/tf/inception_v4.py && \ + curl -O https://raw.githubusercontent.com/ThejanW/tika/master/tika-parsers/src/main/resources/org/apache/tika/parser/recognition/tf/video_util.py && \ + chmod +x inceptionapi.py + +WORKDIR / -# clean up cache + delete OpenCV source files, so we can publish smaller image to hub -RUN apt-get clean +# Remove unnecessary packages comes with base +RUN apt-get -y remove \ + apt-utils \ + curl \ + git \ + software-properties-common \ + unzip \ + wget + +# Remove unnecessary packages +RUN apt-get -y remove \ + apt-utils \ + build-essential \ + cmake \ + libavcodec-dev \ + libavformat-dev \ + libswscale-dev \ + libv4l-dev \ + libavutil-dev \ + pkg-config + +# Delete OpenCV source files RUN rm -rf /usr/local/src -ENV PYTHONPATH="$PYTHONPATH:/models-c15fada28113eca32dc98d6e3bec4755d0d5b4c2/slim" -ENV LD_PRELOAD="/usr/lib/libtcmalloc_minimal.so.4" -ENV PYTHONPATH="$PYTHONPATH:$PATH" -RUN python -c "import inceptionapi" +# Add symbolic link to inceptionapi.py +RUN ln -s /usr/share/apache-tika/src/dl/image-video/recognition/inceptionapi.py /usr/bin/inceptionapi -# expose API port, this is the default port +# Expose API port, this is the default port EXPOSE 8764 -CMD inceptionapi.py +CMD inceptionapi \ No newline at end of file diff --git a/tika-parsers/src/main/resources/org/apache/tika/parser/recognition/tf/classify_image.py b/tika-parsers/src/main/resources/org/apache/tika/parser/recognition/tf/classify_image.py deleted file mode 100644 index 26a4f2353..000000000 --- a/tika-parsers/src/main/resources/org/apache/tika/parser/recognition/tf/classify_image.py +++ /dev/null @@ -1,253 +0,0 @@ -# Copyright 2015 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Simple image classification with Inception. - -Run image classification with Inception trained on ImageNet 2012 Challenge data -set. - -This program creates a graph from a saved GraphDef protocol buffer, -and runs inference on an input JPEG image. It outputs human readable -strings of the top 5 predictions along with their probabilities. - -Change the --image_file argument to any jpg image to compute a -classification of that image. - -Please see the tutorial and website for a detailed description of how -to use this script to perform image recognition. - -https://tensorflow.org/tutorials/image_recognition/ - -Requirements: - tensorflow - tensorflow models (for TF-slim) - -""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import os.path -import re -import sys -import tarfile - -import numpy as np -from six.moves import urllib -import tensorflow as tf - -from datasets import imagenet, dataset_utils -from nets import inception -from preprocessing import inception_preprocessing - -slim = tf.contrib.slim - -FLAGS = tf.app.flags.FLAGS - -# inception_v4.ckpt -# Inception V4 checkpoint file. -# imagenet_metadata.txt -# Map from synset ID to a human readable string. -# imagenet_lsvrc_2015_synsets.txt -# Text representation of a protocol buffer mapping a label to synset ID. -tf.app.flags.DEFINE_string( - 'model_dir', '/tmp/imagenet', - """Path to inception_v4.ckpt, """ - """imagenet_lsvrc_2015_synsets.txt, and """ - """imagenet_metadata.txt.""") -tf.app.flags.DEFINE_string('image_file', '', - """Absolute path to image file.""") -tf.app.flags.DEFINE_integer('num_top_predictions', 5, - """Display this many predictions.""") - -# pylint: disable=line-too-long -DATA_URL = 'http://download.tensorflow.org/models/inception_v4_2016_09_09.tar.gz' -# pylint: enable=line-too-long - - -def create_readable_names_for_imagenet_labels(): - """Create a dict mapping label id to human readable string. - - Returns: - labels_to_names: dictionary where keys are integers from to 1000 - and values are human-readable names. - - We retrieve a synset file, which contains a list of valid synset labels used - by ILSVRC competition. There is one synset one per line, eg. - # n01440764 - # n01443537 - We also retrieve a synset_to_human_file, which contains a mapping from synsets - to human-readable names for every synset in Imagenet. These are stored in a - tsv format, as follows: - # n02119247 black fox - # n02119359 silver fox - We assign each synset (in alphabetical order) an integer, starting from 1 - (since 0 is reserved for the background class). - - Code is based on - https://github.com/tensorflow/models/blob/master/inception/inception/data/build_imagenet_data.py#L463 - """ - - # pylint: disable=line-too-long - - dest_directory = FLAGS.model_dir - - synset_list = [s.strip() for s in open(os.path.join( - dest_directory, 'imagenet_lsvrc_2015_synsets.txt')).readlines()] - num_synsets_in_ilsvrc = len(synset_list) - assert num_synsets_in_ilsvrc == 1000 - - synset_to_human_list = open(os.path.join( - dest_directory, 'imagenet_metadata.txt')).readlines() - num_synsets_in_all_imagenet = len(synset_to_human_list) - assert num_synsets_in_all_imagenet == 21842 - - synset_to_human = {} - for s in synset_to_human_list: - parts = s.strip().split('\t') - assert len(parts) == 2 - synset = parts[0] - human = parts[1] - synset_to_human[synset] = human - - label_index = 1 - labels_to_names = {0: 'background'} - for synset in synset_list: - name = synset_to_human[synset] - labels_to_names[label_index] = name - label_index += 1 - - return labels_to_names - - -def run_inference_on_image(image): - """Runs inference on an image. - - Args: - image: Image file name. - - Returns: - Nothing - """ - dest_directory = FLAGS.model_dir - - image_size = inception.inception_v4.default_image_size - - if not tf.gfile.Exists(image): - tf.logging.fatal('File does not exist %s', image) - image_string = tf.gfile.FastGFile(image, 'rb').read() - - with tf.Graph().as_default(): - image = tf.image.decode_jpeg(image_string, channels=3) - processed_image = inception_preprocessing.preprocess_image( - image, image_size, image_size, is_training=False) - processed_images = tf.expand_dims(processed_image, 0) - - # Create the model, use the default arg scope to configure the batch - # norm parameters. - with slim.arg_scope(inception.inception_v4_arg_scope()): - logits, _ = inception.inception_v4( - processed_images, num_classes=1001, is_training=False) - probabilities = tf.nn.softmax(logits) - - init_fn = slim.assign_from_checkpoint_fn( - os.path.join(dest_directory, 'inception_v4.ckpt'), - slim.get_model_variables('InceptionV4')) - - with tf.Session() as sess: - init_fn(sess) - probabilities = sess.run(probabilities) - probabilities = probabilities[0, 0:] - sorted_inds = [i[0] for i in sorted( - enumerate(-probabilities), key=lambda x:x[1])] - - names = create_readable_names_for_imagenet_labels() - top_k = FLAGS.num_top_predictions - for i in range(top_k): - index = sorted_inds[i] - print('%s (score = %.5f)' % (names[index], probabilities[index])) - - -def util_download(url, dest_directory): - """Downloads the file. - - Args: - url: URL to download the file from. - dest_directory: Destination directory - Returns: - Nothing - """ - filename = url.split('/')[-1] - filepath = os.path.join(dest_directory, filename) - - def _progress(count, block_size, total_size): - sys.stdout.write('\r>> Downloading %s %.1f%%' % ( - filename, float(count * block_size) / float(total_size) * 100.0)) - sys.stdout.flush() - filepath, _ = urllib.request.urlretrieve(url, filepath, _progress) - print() - statinfo = os.stat(filepath) - print('Successfully downloaded', filename, statinfo.st_size, 'bytes.') - - -def util_download_tar(url, dest_directory): - """Downloads a file and extracts it. - - Args: - url: URL to download the file from. - dest_directory: Destination directory - Returns: - Nothing - """ - filename = url.split('/')[-1] - filepath = os.path.join(dest_directory, filename) - - def _progress(count, block_size, total_size): - sys.stdout.write('\r>> Downloading %s %.1f%%' % ( - filename, float(count * block_size) / float(total_size) * 100.0)) - sys.stdout.flush() - filepath, _ = urllib.request.urlretrieve(url, filepath, _progress) - print() - statinfo = os.stat(filepath) - print('Successfully downloaded', filename, statinfo.st_size, 'bytes.') - tarfile.open(filepath, 'r:gz').extractall(dest_directory) - - -def maybe_download_and_extract(): - """Download and extract model tar file.""" - dest_directory = FLAGS.model_dir - if not tf.gfile.Exists(dest_directory): - tf.gfile.MakeDirs(dest_directory) - if not tf.gfile.Exists(os.path.join(dest_directory, 'inception_v4.ckpt')): - util_download_tar(DATA_URL, dest_directory) - # pylint: disable=line-too-long - if not tf.gfile.Exists(os.path.join(dest_directory, 'imagenet_lsvrc_2015_synsets.txt')): - util_download( - 'https://raw.githubusercontent.com/tensorflow/models/master/inception/inception/data/imagenet_lsvrc_2015_synsets.txt', dest_directory) - if not tf.gfile.Exists(os.path.join(dest_directory, 'imagenet_metadata.txt')): - util_download( - 'https://raw.githubusercontent.com/tensorflow/models/master/inception/inception/data/imagenet_metadata.txt', dest_directory) - # pylint: enable=line-too-long - - -def main(_): - maybe_download_and_extract() - image = FLAGS.image_file - run_inference_on_image(image) - - -if __name__ == '__main__': - tf.app.run() diff --git a/tika-parsers/src/main/resources/org/apache/tika/parser/recognition/tf/inception_v4.py b/tika-parsers/src/main/resources/org/apache/tika/parser/recognition/tf/inception_v4.py new file mode 100644 index 000000000..b6644160f --- /dev/null +++ b/tika-parsers/src/main/resources/org/apache/tika/parser/recognition/tf/inception_v4.py @@ -0,0 +1,362 @@ +#!/usr/bin/env python +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +"""Contains the definition of the Inception V4 architecture. + +As described in http://arxiv.org/abs/1602.07261. + + Inception-v4, Inception-ResNet and the Impact of Residual Connections + on Learning + Christian Szegedy, Sergey Ioffe, Vincent Vanhoucke, Alex Alemi +""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import tensorflow as tf + +slim = tf.contrib.slim + + +def block_inception_a(inputs, scope=None, reuse=None): + """Builds Inception-A block for Inception v4 network.""" + # By default use stride=1 and SAME padding + with slim.arg_scope([slim.conv2d, slim.avg_pool2d, slim.max_pool2d], + stride=1, padding='SAME'): + with tf.variable_scope(scope, 'BlockInceptionA', [inputs], reuse=reuse): + with tf.variable_scope('Branch_0'): + branch_0 = slim.conv2d(inputs, 96, [1, 1], scope='Conv2d_0a_1x1') + with tf.variable_scope('Branch_1'): + branch_1 = slim.conv2d(inputs, 64, [1, 1], scope='Conv2d_0a_1x1') + branch_1 = slim.conv2d(branch_1, 96, [3, 3], scope='Conv2d_0b_3x3') + with tf.variable_scope('Branch_2'): + branch_2 = slim.conv2d(inputs, 64, [1, 1], scope='Conv2d_0a_1x1') + branch_2 = slim.conv2d(branch_2, 96, [3, 3], scope='Conv2d_0b_3x3') + branch_2 = slim.conv2d(branch_2, 96, [3, 3], scope='Conv2d_0c_3x3') + with tf.variable_scope('Branch_3'): + branch_3 = slim.avg_pool2d(inputs, [3, 3], scope='AvgPool_0a_3x3') + branch_3 = slim.conv2d(branch_3, 96, [1, 1], scope='Conv2d_0b_1x1') + return tf.concat(axis=3, values=[branch_0, branch_1, branch_2, branch_3]) + + +def block_reduction_a(inputs, scope=None, reuse=None): + """Builds Reduction-A block for Inception v4 network.""" + # By default use stride=1 and SAME padding + with slim.arg_scope([slim.conv2d, slim.avg_pool2d, slim.max_pool2d], + stride=1, padding='SAME'): + with tf.variable_scope(scope, 'BlockReductionA', [inputs], reuse=reuse): + with tf.variable_scope('Branch_0'): + branch_0 = slim.conv2d(inputs, 384, [3, 3], stride=2, padding='VALID', + scope='Conv2d_1a_3x3') + with tf.variable_scope('Branch_1'): + branch_1 = slim.conv2d(inputs, 192, [1, 1], scope='Conv2d_0a_1x1') + branch_1 = slim.conv2d(branch_1, 224, [3, 3], scope='Conv2d_0b_3x3') + branch_1 = slim.conv2d(branch_1, 256, [3, 3], stride=2, + padding='VALID', scope='Conv2d_1a_3x3') + with tf.variable_scope('Branch_2'): + branch_2 = slim.max_pool2d(inputs, [3, 3], stride=2, padding='VALID', + scope='MaxPool_1a_3x3') + return tf.concat(axis=3, values=[branch_0, branch_1, branch_2]) + + +def block_inception_b(inputs, scope=None, reuse=None): + """Builds Inception-B block for Inception v4 network.""" + # By default use stride=1 and SAME padding + with slim.arg_scope([slim.conv2d, slim.avg_pool2d, slim.max_pool2d], + stride=1, padding='SAME'): + with tf.variable_scope(scope, 'BlockInceptionB', [inputs], reuse=reuse): + with tf.variable_scope('Branch_0'): + branch_0 = slim.conv2d(inputs, 384, [1, 1], scope='Conv2d_0a_1x1') + with tf.variable_scope('Branch_1'): + branch_1 = slim.conv2d(inputs, 192, [1, 1], scope='Conv2d_0a_1x1') + branch_1 = slim.conv2d(branch_1, 224, [1, 7], scope='Conv2d_0b_1x7') + branch_1 = slim.conv2d(branch_1, 256, [7, 1], scope='Conv2d_0c_7x1') + with tf.variable_scope('Branch_2'): + branch_2 = slim.conv2d(inputs, 192, [1, 1], scope='Conv2d_0a_1x1') + branch_2 = slim.conv2d(branch_2, 192, [7, 1], scope='Conv2d_0b_7x1') + branch_2 = slim.conv2d(branch_2, 224, [1, 7], scope='Conv2d_0c_1x7') + branch_2 = slim.conv2d(branch_2, 224, [7, 1], scope='Conv2d_0d_7x1') + branch_2 = slim.conv2d(branch_2, 256, [1, 7], scope='Conv2d_0e_1x7') + with tf.variable_scope('Branch_3'): + branch_3 = slim.avg_pool2d(inputs, [3, 3], scope='AvgPool_0a_3x3') + branch_3 = slim.conv2d(branch_3, 128, [1, 1], scope='Conv2d_0b_1x1') + return tf.concat(axis=3, values=[branch_0, branch_1, branch_2, branch_3]) + + +def block_reduction_b(inputs, scope=None, reuse=None): + """Builds Reduction-B block for Inception v4 network.""" + # By default use stride=1 and SAME padding + with slim.arg_scope([slim.conv2d, slim.avg_pool2d, slim.max_pool2d], + stride=1, padding='SAME'): + with tf.variable_scope(scope, 'BlockReductionB', [inputs], reuse=reuse): + with tf.variable_scope('Branch_0'): + branch_0 = slim.conv2d(inputs, 192, [1, 1], scope='Conv2d_0a_1x1') + branch_0 = slim.conv2d(branch_0, 192, [3, 3], stride=2, + padding='VALID', scope='Conv2d_1a_3x3') + with tf.variable_scope('Branch_1'): + branch_1 = slim.conv2d(inputs, 256, [1, 1], scope='Conv2d_0a_1x1') + branch_1 = slim.conv2d(branch_1, 256, [1, 7], scope='Conv2d_0b_1x7') + branch_1 = slim.conv2d(branch_1, 320, [7, 1], scope='Conv2d_0c_7x1') + branch_1 = slim.conv2d(branch_1, 320, [3, 3], stride=2, + padding='VALID', scope='Conv2d_1a_3x3') + with tf.variable_scope('Branch_2'): + branch_2 = slim.max_pool2d(inputs, [3, 3], stride=2, padding='VALID', + scope='MaxPool_1a_3x3') + return tf.concat(axis=3, values=[branch_0, branch_1, branch_2]) + + +def block_inception_c(inputs, scope=None, reuse=None): + """Builds Inception-C block for Inception v4 network.""" + # By default use stride=1 and SAME padding + with slim.arg_scope([slim.conv2d, slim.avg_pool2d, slim.max_pool2d], + stride=1, padding='SAME'): + with tf.variable_scope(scope, 'BlockInceptionC', [inputs], reuse=reuse): + with tf.variable_scope('Branch_0'): + branch_0 = slim.conv2d(inputs, 256, [1, 1], scope='Conv2d_0a_1x1') + with tf.variable_scope('Branch_1'): + branch_1 = slim.conv2d(inputs, 384, [1, 1], scope='Conv2d_0a_1x1') + branch_1 = tf.concat(axis=3, values=[ + slim.conv2d(branch_1, 256, [1, 3], scope='Conv2d_0b_1x3'), + slim.conv2d(branch_1, 256, [3, 1], scope='Conv2d_0c_3x1')]) + with tf.variable_scope('Branch_2'): + branch_2 = slim.conv2d(inputs, 384, [1, 1], scope='Conv2d_0a_1x1') + branch_2 = slim.conv2d(branch_2, 448, [3, 1], scope='Conv2d_0b_3x1') + branch_2 = slim.conv2d(branch_2, 512, [1, 3], scope='Conv2d_0c_1x3') + branch_2 = tf.concat(axis=3, values=[ + slim.conv2d(branch_2, 256, [1, 3], scope='Conv2d_0d_1x3'), + slim.conv2d(branch_2, 256, [3, 1], scope='Conv2d_0e_3x1')]) + with tf.variable_scope('Branch_3'): + branch_3 = slim.avg_pool2d(inputs, [3, 3], scope='AvgPool_0a_3x3') + branch_3 = slim.conv2d(branch_3, 256, [1, 1], scope='Conv2d_0b_1x1') + return tf.concat(axis=3, values=[branch_0, branch_1, branch_2, branch_3]) + + +def inception_v4_base(inputs, final_endpoint='Mixed_7d', scope=None): + """Creates the Inception V4 network up to the given final endpoint. + + Args: + inputs: a 4-D tensor of size [batch_size, height, width, 3]. + final_endpoint: specifies the endpoint to construct the network up to. + It can be one of [ 'Conv2d_1a_3x3', 'Conv2d_2a_3x3', 'Conv2d_2b_3x3', + 'Mixed_3a', 'Mixed_4a', 'Mixed_5a', 'Mixed_5b', 'Mixed_5c', 'Mixed_5d', + 'Mixed_5e', 'Mixed_6a', 'Mixed_6b', 'Mixed_6c', 'Mixed_6d', 'Mixed_6e', + 'Mixed_6f', 'Mixed_6g', 'Mixed_6h', 'Mixed_7a', 'Mixed_7b', 'Mixed_7c', + 'Mixed_7d'] + scope: Optional variable_scope. + + Returns: + logits: the logits outputs of the model. + end_points: the set of end_points from the inception model. + + Raises: + ValueError: if final_endpoint is not set to one of the predefined values, + """ + end_points = {} + + def add_and_check_final(name, net): + end_points[name] = net + return name == final_endpoint + + with tf.variable_scope(scope, 'InceptionV4', [inputs]): + with slim.arg_scope([slim.conv2d, slim.max_pool2d, slim.avg_pool2d], + stride=1, padding='SAME'): + # 299 x 299 x 3 + net = slim.conv2d(inputs, 32, [3, 3], stride=2, + padding='VALID', scope='Conv2d_1a_3x3') + if add_and_check_final('Conv2d_1a_3x3', net): return net, end_points + # 149 x 149 x 32 + net = slim.conv2d(net, 32, [3, 3], padding='VALID', + scope='Conv2d_2a_3x3') + if add_and_check_final('Conv2d_2a_3x3', net): return net, end_points + # 147 x 147 x 32 + net = slim.conv2d(net, 64, [3, 3], scope='Conv2d_2b_3x3') + if add_and_check_final('Conv2d_2b_3x3', net): return net, end_points + # 147 x 147 x 64 + with tf.variable_scope('Mixed_3a'): + with tf.variable_scope('Branch_0'): + branch_0 = slim.max_pool2d(net, [3, 3], stride=2, padding='VALID', + scope='MaxPool_0a_3x3') + with tf.variable_scope('Branch_1'): + branch_1 = slim.conv2d(net, 96, [3, 3], stride=2, padding='VALID', + scope='Conv2d_0a_3x3') + net = tf.concat(axis=3, values=[branch_0, branch_1]) + if add_and_check_final('Mixed_3a', net): return net, end_points + + # 73 x 73 x 160 + with tf.variable_scope('Mixed_4a'): + with tf.variable_scope('Branch_0'): + branch_0 = slim.conv2d(net, 64, [1, 1], scope='Conv2d_0a_1x1') + branch_0 = slim.conv2d(branch_0, 96, [3, 3], padding='VALID', + scope='Conv2d_1a_3x3') + with tf.variable_scope('Branch_1'): + branch_1 = slim.conv2d(net, 64, [1, 1], scope='Conv2d_0a_1x1') + branch_1 = slim.conv2d(branch_1, 64, [1, 7], scope='Conv2d_0b_1x7') + branch_1 = slim.conv2d(branch_1, 64, [7, 1], scope='Conv2d_0c_7x1') + branch_1 = slim.conv2d(branch_1, 96, [3, 3], padding='VALID', + scope='Conv2d_1a_3x3') + net = tf.concat(axis=3, values=[branch_0, branch_1]) + if add_and_check_final('Mixed_4a', net): return net, end_points + + # 71 x 71 x 192 + with tf.variable_scope('Mixed_5a'): + with tf.variable_scope('Branch_0'): + branch_0 = slim.conv2d(net, 192, [3, 3], stride=2, padding='VALID', + scope='Conv2d_1a_3x3') + with tf.variable_scope('Branch_1'): + branch_1 = slim.max_pool2d(net, [3, 3], stride=2, padding='VALID', + scope='MaxPool_1a_3x3') + net = tf.concat(axis=3, values=[branch_0, branch_1]) + if add_and_check_final('Mixed_5a', net): return net, end_points + + # 35 x 35 x 384 + # 4 x Inception-A blocks + for idx in range(4): + block_scope = 'Mixed_5' + chr(ord('b') + idx) + net = block_inception_a(net, block_scope) + if add_and_check_final(block_scope, net): return net, end_points + + # 35 x 35 x 384 + # Reduction-A block + net = block_reduction_a(net, 'Mixed_6a') + if add_and_check_final('Mixed_6a', net): return net, end_points + + # 17 x 17 x 1024 + # 7 x Inception-B blocks + for idx in range(7): + block_scope = 'Mixed_6' + chr(ord('b') + idx) + net = block_inception_b(net, block_scope) + if add_and_check_final(block_scope, net): return net, end_points + + # 17 x 17 x 1024 + # Reduction-B block + net = block_reduction_b(net, 'Mixed_7a') + if add_and_check_final('Mixed_7a', net): return net, end_points + + # 8 x 8 x 1536 + # 3 x Inception-C blocks + for idx in range(3): + block_scope = 'Mixed_7' + chr(ord('b') + idx) + net = block_inception_c(net, block_scope) + if add_and_check_final(block_scope, net): return net, end_points + raise ValueError('Unknown final endpoint %s' % final_endpoint) + + +def inception_v4(inputs, num_classes=1001, is_training=True, + dropout_keep_prob=0.8, + reuse=None, + scope='InceptionV4', + create_aux_logits=True): + """Creates the Inception V4 model. + + Args: + inputs: a 4-D tensor of size [batch_size, height, width, 3]. + num_classes: number of predicted classes. + is_training: whether is training or not. + dropout_keep_prob: float, the fraction to keep before final layer. + reuse: whether or not the network and its variables should be reused. To be + able to reuse 'scope' must be given. + scope: Optional variable_scope. + create_aux_logits: Whether to include the auxiliary logits. + + Returns: + logits: the logits outputs of the model. + end_points: the set of end_points from the inception model. + """ + end_points = {} + with tf.variable_scope(scope, 'InceptionV4', [inputs], reuse=reuse) as scope: + with slim.arg_scope([slim.batch_norm, slim.dropout], + is_training=is_training): + net, end_points = inception_v4_base(inputs, scope=scope) + + with slim.arg_scope([slim.conv2d, slim.max_pool2d, slim.avg_pool2d], + stride=1, padding='SAME'): + # Auxiliary Head logits + if create_aux_logits: + with tf.variable_scope('AuxLogits'): + # 17 x 17 x 1024 + aux_logits = end_points['Mixed_6h'] + aux_logits = slim.avg_pool2d(aux_logits, [5, 5], stride=3, + padding='VALID', + scope='AvgPool_1a_5x5') + aux_logits = slim.conv2d(aux_logits, 128, [1, 1], + scope='Conv2d_1b_1x1') + aux_logits = slim.conv2d(aux_logits, 768, + aux_logits.get_shape()[1:3], + padding='VALID', scope='Conv2d_2a') + aux_logits = slim.flatten(aux_logits) + aux_logits = slim.fully_connected(aux_logits, num_classes, + activation_fn=None, + scope='Aux_logits') + end_points['AuxLogits'] = aux_logits + + # Final pooling and prediction + with tf.variable_scope('Logits'): + # 8 x 8 x 1536 + net = slim.avg_pool2d(net, net.get_shape()[1:3], padding='VALID', + scope='AvgPool_1a') + # 1 x 1 x 1536 + net = slim.dropout(net, dropout_keep_prob, scope='Dropout_1b') + net = slim.flatten(net, scope='PreLogitsFlatten') + end_points['PreLogitsFlatten'] = net + # 1536 + logits = slim.fully_connected(net, num_classes, activation_fn=None, + scope='Logits') + end_points['Logits'] = logits + end_points['Predictions'] = tf.nn.softmax(logits, name='Predictions') + return logits, end_points + + +def inception_v4_arg_scope(weight_decay=0.00004, + use_batch_norm=True, + batch_norm_decay=0.9997, + batch_norm_epsilon=0.001): + """Defines the default arg scope for inception models. + Args: + weight_decay: The weight decay to use for regularizing the model. + use_batch_norm: "If `True`, batch_norm is applied after each convolution. + batch_norm_decay: Decay for batch norm moving average. + batch_norm_epsilon: Small float added to variance to avoid dividing by zero + in batch norm. + Returns: + An `arg_scope` to use for the inception models. + """ + batch_norm_params = { + # Decay for the moving averages. + 'decay': batch_norm_decay, + # epsilon to prevent 0s in variance. + 'epsilon': batch_norm_epsilon, + # collection containing update_ops. + 'updates_collections': tf.GraphKeys.UPDATE_OPS, + } + if use_batch_norm: + normalizer_fn = slim.batch_norm + normalizer_params = batch_norm_params + else: + normalizer_fn = None + normalizer_params = {} + # Set weight_decay for weights in Conv and FC layers. + with slim.arg_scope([slim.conv2d, slim.fully_connected], + weights_regularizer=slim.l2_regularizer(weight_decay)): + with slim.arg_scope( + [slim.conv2d], + weights_initializer=slim.variance_scaling_initializer(), + activation_fn=tf.nn.relu, + normalizer_fn=normalizer_fn, + normalizer_params=normalizer_params) as sc: + return sc + + +default_image_size = 299 diff --git a/tika-parsers/src/main/resources/org/apache/tika/parser/recognition/tf/inceptionapi.py b/tika-parsers/src/main/resources/org/apache/tika/parser/recognition/tf/inceptionapi.py index a72900b49..09d830c0d 100755 --- a/tika-parsers/src/main/resources/org/apache/tika/parser/recognition/tf/inceptionapi.py +++ b/tika-parsers/src/main/resources/org/apache/tika/parser/recognition/tf/inceptionapi.py @@ -15,111 +15,129 @@ # under the License. """ -Image classification with Inception. + Image classification with Inception. -This script exposes the tensorflow's inception classification service over REST API. + This script exposes the tensorflow's inception classification service over REST API. -For more details, visit: - https://tensorflow.org/tutorials/image_recognition/ + For more details, visit: + https://tensorflow.org/tutorials/image_recognition/ -Requirements : - Flask - tensorflow - numpy - requests + Requirements : + Flask + tensorflow + numpy + requests + pillow """ from __future__ import absolute_import from __future__ import division from __future__ import print_function -import os.path -import re -import sys -import tarfile +import os +import tempfile +import json +import logging +import requests + +from flask import Flask, request, Response, jsonify +from io import BytesIO +from logging.handlers import RotatingFileHandler +from PIL import Image +from time import time + +import tensorflow as tf + +from inception_v4 import default_image_size, inception_v4_arg_scope, inception_v4 try: - #This import is placed inside here to ensure that video_util and OpenCV is not required for image recognition APIs + # This import is placed inside here to ensure that video_util and OpenCV is not required for image recognition APIs from video_util import get_center_frame, get_frames_interval, get_n_frames except: print("Can't import video libraries, No video functionality is available") - -import numpy as np -from six.moves import urllib -import tensorflow as tf -from datasets import imagenet, dataset_utils -from nets import inception -from preprocessing import inception_preprocessing +json.encoder.FLOAT_REPR = lambda o: format(o, '.2f') # JSON serialization of floats slim = tf.contrib.slim - -import requests -import json -json.encoder.FLOAT_REPR = lambda o: format( - o, '.2f') # JSON serialization of floats -from time import time -from PIL import Image -from io import BytesIO -import tempfile -import flask - - FLAGS = tf.app.flags.FLAGS -# inception_v4.ckpt -# Inception V4 checkpoint file. -# imagenet_metadata.txt -# Map from synset ID to a human readable string. -# imagenet_lsvrc_2015_synsets.txt -# Text representation of a protocol buffer mapping a label to synset ID. -tf.app.flags.DEFINE_string( - 'model_dir', '/tmp/imagenet', - """Path to inception_v4.ckpt, """ - """imagenet_lsvrc_2015_synsets.txt, and """ - """imagenet_metadata.txt.""") -tf.app.flags.DEFINE_integer('port', '8764', """Server PORT, default:8764""") -tf.app.flags.DEFINE_string('log', 'inception.log', +tf.app.flags.DEFINE_string('model_dir', + '/usr/share/apache-tika/models/dl/image-video/recognition/', + """Path to inception_v4.ckpt & meta files""") +tf.app.flags.DEFINE_integer('port', + '8764', + """Server PORT, default:8764""") +tf.app.flags.DEFINE_string('log', + 'inception.log', """Log file name, default: inception.log""") -# pylint: disable=line-too-long -DATA_URL = 'http://download.tensorflow.org/models/inception_v4_2016_09_09.tar.gz' -# pylint: enable=line-too-long - - -def create_readable_names_for_imagenet_labels(): - """Create a dict mapping label id to human readable string. +def preprocess_image(image, height, width, central_fraction=0.875, scope=None): + """Prepare one image for evaluation. + If height and width are specified it would output an image with that size by + applying resize_bilinear. + If central_fraction is specified it would crop the central fraction of the + input image. + Args: + image: 3-D Tensor of image. If dtype is tf.float32 then the range should be + [0, 1], otherwise it would converted to tf.float32 assuming that the range + is [0, MAX], where MAX is largest positive representable number for + int(8/16/32) data type (see `tf.image.convert_image_dtype` for details). + height: integer + width: integer + central_fraction: Optional Float, fraction of the image to crop. + scope: Optional scope for name_scope. Returns: - labels_to_names: dictionary where keys are integers from to 1000 - and values are human-readable names. - - We retrieve a synset file, which contains a list of valid synset labels used - by ILSVRC competition. There is one synset one per line, eg. - # n01440764 - # n01443537 - We also retrieve a synset_to_human_file, which contains a mapping from synsets - to human-readable names for every synset in Imagenet. These are stored in a - tsv format, as follows: - # n02119247 black fox - # n02119359 silver fox - We assign each synset (in alphabetical order) an integer, starting from 1 - (since 0 is reserved for the background class). - - Code is based on - https://github.com/tensorflow/models/blob/master/inception/inception/data/build_imagenet_data.py#L463 + 3-D float Tensor of prepared image. """ + with tf.name_scope(scope, 'eval_image', [image, height, width]): + if image.dtype != tf.float32: + image = tf.image.convert_image_dtype(image, dtype=tf.float32) + # Crop the central region of the image with an area containing 87.5% of + # the original image. + if central_fraction: + image = tf.image.central_crop(image, central_fraction=central_fraction) + + if height and width: + # Resize the image to the specified height and width. + image = tf.expand_dims(image, 0) + image = tf.image.resize_bilinear(image, [height, width], + align_corners=False) + image = tf.squeeze(image, [0]) + image = tf.subtract(image, 0.5) + image = tf.multiply(image, 2.0) + return image - # pylint: disable=line-too-long + +def create_readable_names_for_imagenet_labels(): + """ + Create a dict mapping label id to human readable string. + Returns: + labels_to_names: dictionary where keys are integers from to 1000 + and values are human-readable names. + + We retrieve a synset file, which contains a list of valid synset labels used + by ILSVRC competition. There is one synset one per line, eg. + # n01440764 + # n01443537 + We also retrieve a synset_to_human_file, which contains a mapping from synsets + to human-readable names for every synset in Imagenet. These are stored in a + tsv format, as follows: + # n02119247 black fox + # n02119359 silver fox + We assign each synset (in alphabetical order) an integer, starting from 1 + (since 0 is reserved for the background class). + + Code is based on + https://github.com/tensorflow/models/blob/master/inception/inception/data/build_imagenet_data.py + """ dest_directory = FLAGS.model_dir - synset_list = [s.strip() for s in open(os.path.join( - dest_directory, 'imagenet_lsvrc_2015_synsets.txt')).readlines()] + synset_list = [s.strip() for s in open(os.path.join(dest_directory, 'imagenet_lsvrc_2015_synsets.txt')).readlines()] num_synsets_in_ilsvrc = len(synset_list) assert num_synsets_in_ilsvrc == 1000 - synset_to_human_list = open(os.path.join( - dest_directory, 'imagenet_metadata.txt')).readlines() + synset_to_human_list = open(os.path.join(dest_directory, 'imagenet_metadata.txt')).readlines() num_synsets_in_all_imagenet = len(synset_to_human_list) assert num_synsets_in_all_imagenet == 21842 @@ -141,105 +159,48 @@ def create_readable_names_for_imagenet_labels(): return labels_to_names -def util_download(url, dest_directory): - """Downloads the file. - - Args: - url: URL to download the file from. - dest_directory: Destination directory - Returns: - Nothing +def get_remote_file(url, success=200, timeout=10): """ - filename = url.split('/')[-1] - filepath = os.path.join(dest_directory, filename) - - def _progress(count, block_size, total_size): - sys.stdout.write('\r>> Downloading %s %.1f%%' % ( - filename, float(count * block_size) / float(total_size) * 100.0)) - sys.stdout.flush() - filepath, _ = urllib.request.urlretrieve(url, filepath, _progress) - print() - statinfo = os.stat(filepath) - print('Successfully downloaded', filename, statinfo.st_size, 'bytes.') - - -def util_download_tar(url, dest_directory): - """Downloads a file and extracts it. - - Args: - url: URL to download the file from. - dest_directory: Destination directory - Returns: - Nothing + Given HTTP URL, this api gets the content of it + returns (Content-Type, image_content) """ - filename = url.split('/')[-1] - filepath = os.path.join(dest_directory, filename) - - def _progress(count, block_size, total_size): - sys.stdout.write('\r>> Downloading %s %.1f%%' % ( - filename, float(count * block_size) / float(total_size) * 100.0)) - sys.stdout.flush() - filepath, _ = urllib.request.urlretrieve(url, filepath, _progress) - print() - statinfo = os.stat(filepath) - print('Successfully downloaded', filename, statinfo.st_size, 'bytes.') - tarfile.open(filepath, 'r:gz').extractall(dest_directory) - - -def maybe_download_and_extract(): - """Download and extract model tar file.""" - dest_directory = FLAGS.model_dir - if not tf.gfile.Exists(dest_directory): - tf.gfile.MakeDirs(dest_directory) - if not tf.gfile.Exists(os.path.join(dest_directory, 'inception_v4.ckpt')): - util_download_tar(DATA_URL, dest_directory) - # pylint: disable=line-too-long - if not tf.gfile.Exists(os.path.join(dest_directory, 'imagenet_lsvrc_2015_synsets.txt')): - util_download( - 'https://raw.githubusercontent.com/tensorflow/models/master/inception/inception/data/imagenet_lsvrc_2015_synsets.txt', dest_directory) - if not tf.gfile.Exists(os.path.join(dest_directory, 'imagenet_metadata.txt')): - util_download( - 'https://raw.githubusercontent.com/tensorflow/models/master/inception/inception/data/imagenet_metadata.txt', dest_directory) - # pylint: enable=line-too-long + try: + app.logger.info("GET: %s" % url) + auth = None + res = requests.get(url, stream=True, timeout=timeout, auth=auth) + if res.status_code == success: + return res.headers.get('Content-Type', 'application/octet-stream'), res.raw.data + except: + pass + return None, None def current_time(): - """ - Returns current time in milli seconds - """ + """Returns current time in milli seconds""" + return int(1000 * time()) -class Classifier(flask.Flask): - ''' - Classifier Service class - ''' +class Classifier(Flask): + """Classifier Service class""" def __init__(self, name): super(Classifier, self).__init__(name) - maybe_download_and_extract() - import logging - from logging.handlers import RotatingFileHandler - file_handler = RotatingFileHandler( - FLAGS.log, maxBytes=1024 * 1024 * 100, backupCount=20) + file_handler = RotatingFileHandler(FLAGS.log, maxBytes=1024 * 1024 * 100, backupCount=20) file_handler.setLevel(logging.INFO) - formatter = logging.Formatter( - "%(asctime)s - %(name)s - %(levelname)s - %(message)s") + formatter = logging.Formatter("%(asctime)s - %(name)s - %(levelname)s - %(message)s") file_handler.setFormatter(formatter) self.logger.addHandler(file_handler) - self.names = imagenet.create_readable_names_for_imagenet_labels() - self.image_size = inception.inception_v4.default_image_size + self.names = create_readable_names_for_imagenet_labels() + self.image_size = default_image_size self.image_str_placeholder = tf.placeholder(tf.string) image = tf.image.decode_jpeg(self.image_str_placeholder, channels=3) - processed_image = inception_preprocessing.preprocess_image( - image, self.image_size, self.image_size, is_training=False) + processed_image = preprocess_image(image, self.image_size, self.image_size) processed_images = tf.expand_dims(processed_image, 0) - # Create the model, use the default arg scope to configure the - # batch norm parameters. - with slim.arg_scope(inception.inception_v4_arg_scope()): - logits, _ = inception.inception_v4( - processed_images, num_classes=1001, is_training=False) + # create the model, use the default arg scope to configure the batch norm parameters. + with slim.arg_scope(inception_v4_arg_scope()): + logits, _ = inception_v4(processed_images, num_classes=1001, is_training=False) self.probabilities = tf.nn.softmax(logits) dest_directory = FLAGS.model_dir @@ -250,49 +211,36 @@ def __init__(self, name): self.sess = tf.Session() init_fn(self.sess) - def classify(self, image_string, topk): - eval_probabilities = self.sess.run(self.probabilities, feed_dict={ - self.image_str_placeholder: image_string}) + def classify(self, image_string, topn, min_confidence): + eval_probabilities = self.sess.run(self.probabilities, feed_dict={self.image_str_placeholder: image_string}) eval_probabilities = eval_probabilities[0, 0:] - sorted_inds = [i[0] for i in sorted( - enumerate(-eval_probabilities), key=lambda x:x[1])] + sorted_inds = [i[0] for i in sorted(enumerate(-eval_probabilities), key=lambda x: x[1])] + + if topn is None: + topn = len(sorted_inds) - if topk == None: - topk = len(sorted_inds) - res = [] - for i in range(topk): + for i in range(topn): index = sorted_inds[i] score = float(eval_probabilities[index]) - res.append((index, self.names[index], score)) + if min_confidence is None: + res.append((index, self.names[index], score)) + else: + if score >= min_confidence: + res.append((index, self.names[index], score)) + else: + # the scores are in sorted order, so we can break the loop whenever we get a low score object + break return res -from flask import Flask, request, abort, g, Response, jsonify app = Classifier(__name__) -def get_remotefile(url, success=200, timeout=10): - """ - Given HTTP URL, this api gets the content of it - returns (Content-Type, image_content) - """ - try: - app.logger.info("GET: %s" % url) - auth = None - res = requests.get(url, stream=True, timeout=timeout, auth=auth) - if res.status_code == success: - return res.headers.get('Content-Type', 'application/octet-stream'), res.raw.data - except: - pass - return None, None - - @app.route("/") def index(): - """ - The index page which provide information about other API end points - """ + """The index page which provide information about other API end points""" + return """ <div> <h1> Inception REST API </h1> @@ -302,21 +250,22 @@ def index(): <li> <code>/inception/v4/ping </code> - <br/> <b> Description : </b> checks availability of the service. returns "pong" with status 200 when it is available </li> - <li> <code>/inception/v4/classify</code> - <br/> + <li> <code>/inception/v4/classify/image</code> - <br/> <table> <tr><th align="left"> Description </th><td> This is a classifier service that can classify images</td></tr> <tr><td></td> <td>Query Params : <br/> - <code>topk </code>: type = int : top classes to get; default : 10 <br/> + <code>topn </code>: type = int : top classes to get; default : 5 <br/> + <code>min_confidence </code>: type = float : minimum confidence that a label should have to exist in topn; default : 0.015 <br/> <code>human </code>: type = boolean : human readable class names; default : true <br/> </td></tr> <tr><th align="left"> How to supply Image Content </th></tr> <tr><th align="left"> With HTTP GET : </th> <td> Include a query parameter <code>url </code> which is an http url of JPEG image <br/> - Example: <code> curl "localhost:8764/inception/v4/classify?url=http://xyz.com/example.jpg"</code> + Example: <code> curl "localhost:8764/inception/v4/classify/image?url=http://xyz.com/example.jpg"</code> </td></tr> <tr><th align="left"> With HTTP POST :</th><td> POST JPEG image content as binary data in request body. <br/> - Example: <code> curl -X POST "localhost:8764/inception/v4/classify?topk=10&human=false" --data-binary @example.jpg </code> + Example: <code> curl -X POST "localhost:8764/inception/v4/classify/image?topn=5&min_confidence=0.015&human=false" --data-binary @example.jpg </code> </td></tr> </table> </li> @@ -324,7 +273,8 @@ def index(): <table> <tr><th align="left"> Description </th><td> This is a classifier service that can classify videos</td></tr> <tr><td></td> <td>Query Params : <br/> - <code>topk </code>: type = int : top classes to get; default : 10 <br/> + <code>topn </code>: type = int : top classes to get; default : 5 <br/> + <code>min_confidence </code>: type = float : minimum confidence that a label should have to exist in topn; default : 0.015 <br/> <code>human </code>: type = boolean : human readable class names; default : true <br/> <code>mode </code>: options = <code>{"center", "interval", "fixed"}</code> : Modes of frame extraction; default : center <br/>   <code>"center"</code> - Just one frame in center. <br/> @@ -332,7 +282,7 @@ def index():   <code>"fixed"</code> - Extract fixed number of frames.<br/> <code>frame-interval </code>: type = int : Interval for frame extraction to be used with INTERVAL mode. If frame_interval=10 then every 10th frame will be extracted; default : 10 <br/> <code>num-frame </code>: type = int : Number of frames to be extracted from video while using FIXED model. If num_frame=10 then 10 frames equally distant from each other will be extracted; default : 10 <br/> - + </td></tr> <tr><th align="left"> How to supply Video Content </th></tr> <tr><th align="left"> With HTTP GET : </th> <td> @@ -342,7 +292,7 @@ def index(): <tr><th align="left"> With HTTP POST :</th><td> POST video content as binary data in request body. If video can be decoded by OpenCV it should be fine. It's tested on mp4 and avi on mac <br/> Include a query parameter <code>ext </code>this extension is needed to tell OpenCV which decoder to use, default is ".mp4" </br> - Example: <code> curl -X POST "localhost:8764/inception/v4/classify/video?topk=10&human=false" --data-binary @example.mp4 </code> + Example: <code> curl -X POST "localhost:8764/inception/v4/classify/video?topn=5&min_confidence=0.015&human=false" --data-binary @example.mp4 </code> </td></tr> </table> </li> @@ -353,28 +303,28 @@ def index(): @app.route("/inception/v4/ping", methods=["GET"]) def ping_pong(): - """API to do health check. If this says status code 200, then healthy - """ + """API to do health check. If this says status code 200, then healthy""" + return "pong" -@app.route("/inception/v4/classify", methods=["GET", "POST"]) +@app.route("/inception/v4/classify/image", methods=["GET", "POST"]) def classify_image(): - """ - API to classify images - """ + """API to classify images""" + image_format = "not jpeg" st = current_time() - topk = int(request.args.get("topk", "10")) + topn = int(request.args.get("topn", "5")) + min_confidence = float(request.args.get("min_confidence", "0.015")) human = request.args.get("human", "true").lower() in ("true", "1", "yes") if request.method == 'POST': image_data = request.get_data() else: url = request.args.get("url") - c_type, image_data = get_remotefile(url) + c_type, image_data = get_remote_file(url) if not image_data: - return flask.Response(status=400, response=jsonify(error="Couldnot HTTP GET %s" % url)) + return Response(status=400, response=jsonify(error="Could not HTTP GET %s" % url)) if 'image/jpeg' in c_type: image_format = "jpeg" @@ -397,11 +347,14 @@ def classify_image(): read_time = current_time() - st st = current_time() # reset start time try: - classes = app.classify(image_string=jpg_image, topk=topk) + classes = app.classify(image_string=jpg_image, topn=topn, min_confidence=min_confidence) except Exception as e: app.logger.error(e) return Response(status=400, response=str(e)) classids, classnames, confidence = zip(*classes) + + print(classnames, confidence) + classifier_time = current_time() - st app.logger.info("Classifier time : %d" % classifier_time) res = { @@ -417,104 +370,100 @@ def classify_image(): res['classnames'] = classnames return Response(response=json.dumps(res), status=200, mimetype="application/json") -CENTER = "center" -INTERVAL = "interval" -FIXED = "fixed" - -ALLOWED_MODE = set([CENTER ,INTERVAL , FIXED]) @app.route("/inception/v4/classify/video", methods=["GET", "POST"]) def classify_video(): """ - API to classify videos - Request args - - url - PATH of file - topk - number of labels - human - human readable or not - mode - Modes of frame extraction {"center", "interval", "fixed"} - "center" - Just one frame in center. <Default option> - "interval" - Extracts frames after fixed interval. - "fixed" - Extract fixed number of frames. - frame-interval - Interval for frame extraction to be used with INTERVAL mode. If frame_interval=10 then every 10th frame will be extracted. - num-frame - Number of frames to be extracted from video while using FIXED model. If num_frame=10 then 10 frames equally distant from each other will be extracted - - ext - If video is sent in binary format, then ext is needed to tell OpenCV which decoder to use. eg ".mp4" + API to classify videos + Request args - + url - PATH of file + topn - number of top scoring labels + min_confidence - minimum confidence that a label should have to exist in topn + human - human readable or not + mode - Modes of frame extraction {"center", "interval", "fixed"} + "center" - Just one frame in center. <Default option> + "interval" - Extracts frames after fixed interval. + "fixed" - Extract fixed number of frames. + frame-interval - Interval for frame extraction to be used with INTERVAL mode. If frame_interval=10 then every 10th frame will be extracted. + num-frame - Number of frames to be extracted from video while using FIXED model. If num_frame=10 then 10 frames equally distant from each other will be extracted + + ext - If video is sent in binary format, then ext is needed to tell OpenCV which decoder to use. eg ".mp4" """ st = current_time() - topk = int(request.args.get("topk", "10")) + topn = int(request.args.get("topn", "5")) + min_confidence = float(request.args.get("min_confidence", "0.015")) human = request.args.get("human", "true").lower() in ("true", "1", "yes") - - mode = request.args.get("mode", CENTER).lower() - if mode not in ALLOWED_MODE: + + mode = request.args.get("mode", "center").lower() + if mode not in {"center", "interval", "fixed"}: ''' Throw invalid request error ''' - return flask.Response(status=400, response=jsonify(error="not a valid mode. Available mode %s" % str(ALLOWED_MODE))) - + return Response(status=400, response=jsonify(error="not a valid mode. Available mode %s" % str(ALLOWED_MODE))) + frame_interval = int(request.args.get("frame-interval", "10")) num_frame = int(request.args.get("num-frame", "10")) - + if request.method == 'POST': video_data = request.get_data() ext = request.args.get("ext", ".mp4").lower() - + temp_file = tempfile.NamedTemporaryFile(suffix=ext) temp_file.file.write(video_data) temp_file.file.close() - + url = temp_file.name - else: url = request.args.get("url") - + read_time = current_time() - st - st = current_time() # reset start time - - if mode == CENTER: + st = current_time() # reset start time + + if mode == "center": image_data_arr = [get_center_frame(url)] - elif mode == INTERVAL: + elif mode == "interval": image_data_arr = get_frames_interval(url, frame_interval) else: image_data_arr = get_n_frames(url, num_frame) - + classes = [] for image_data in image_data_arr: try: - _classes = app.classify(image_data , topk=None) + _classes = app.classify(image_data, topn=None, min_confidence=None) except Exception as e: app.logger.error(e) return Response(status=400, response=str(e)) - + _classes.sort() if len(classes) == 0: classes = _classes else: - for idx,_c in enumerate(_classes): + for idx, _c in enumerate(_classes): c = list(classes[idx]) c[2] += _c[2] classes[idx] = tuple(c) - - - # avg out confidence score - for idx,c in enumerate(classes): + + top_classes = [] + for c in classes: c = list(c) - c[2] = c[2]/len(image_data_arr) - - classes[idx] = tuple(c) - - classes = sorted(classes, key=lambda tup: tup[2])[-topk:][::-1] + # avg out confidence score + avg_score = c[2] / len(image_data_arr) + c[2] = avg_score + if avg_score >= min_confidence: + top_classes.append(tuple(c)) + + top_classes = sorted(top_classes, key=lambda tup: tup[2])[-topn:][::-1] + + classids, classnames, confidence = zip(*top_classes) - classids, classnames, confidence = zip(*classes) - - classifier_time = current_time() - st app.logger.info("Classifier time : %d" % classifier_time) res = { - 'classids' : classids, + 'classids': classids, 'confidence': confidence, 'time': { - 'read' : read_time, + 'read': read_time, 'classification': classifier_time, 'units': 'ms' } @@ -523,10 +472,12 @@ def classify_video(): res['classnames'] = classnames return Response(response=json.dumps(res), status=200, mimetype="application/json") + def main(_): if not app.debug: print("Serving on port %d" % FLAGS.port) app.run(host="0.0.0.0", port=FLAGS.port) + if __name__ == '__main__': tf.app.run() diff --git a/tika-parsers/src/main/resources/org/apache/tika/parser/recognition/tf/video_util.py b/tika-parsers/src/main/resources/org/apache/tika/parser/recognition/tf/video_util.py index 453f61f0f..a4c208bf9 100644 --- a/tika-parsers/src/main/resources/org/apache/tika/parser/recognition/tf/video_util.py +++ b/tika-parsers/src/main/resources/org/apache/tika/parser/recognition/tf/video_util.py @@ -1,40 +1,39 @@ #!/usr/bin/env python -# -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. import cv2 -import os -import sys import ntpath import numpy as np -print "cv2.__version__", cv2.__version__ +print("cv2.__version__", cv2.__version__) CV_FRAME_COUNT = None -if hasattr(cv2,"cv"): +if hasattr(cv2, "cv"): CV_FRAME_COUNT = cv2.cv.CV_CAP_PROP_FRAME_COUNT else: CV_FRAME_COUNT = cv2.CAP_PROP_FRAME_COUNT + def _get_image_from_array(image_array): - #JPG to support tensorflow - byte_arr = cv2.imencode(".jpg", image_array )[1] + # JPG to support tensorflow + byte_arr = cv2.imencode(".jpg", image_array)[1] return "".join(map(chr, byte_arr)) - + + def _path_leaf(path): """ Returns file name from path. Path should not end with slash(/) @@ -42,72 +41,73 @@ def _path_leaf(path): head, tail = ntpath.split(path) return tail or ntpath.basename(head) + def get_center_frame(video_path): """ Traverse till half of video and saves center snapshot @param video_path: Path to video file on system """ cap = cv2.VideoCapture(video_path) - + length = int(cap.get(CV_FRAME_COUNT)) - - success,image = cap.read() + + success, image = cap.read() count = 0 - - while(success and count < length/2): - success,image = cap.read() - count += 1 - + + while success and count < length / 2: + success, image = cap.read() + count += 1 + return _get_image_from_array(image) - + + def get_frames_interval(video_path, frame_interval): """ - Selects one frames after every frame_interval + Selects one frames after every frame_interval @param video_path: Path to video file on system @param frame_interval: Interval after which frame should be picked. If frame_interval=10 then every 10th frame will be extracted """ cap = cv2.VideoCapture(video_path) - + length = int(cap.get(CV_FRAME_COUNT)) - - success,image = cap.read() + + success, image = cap.read() count = 0 - + image_arr = [] - while(success and count < length): - success,image = cap.read() + while success and count < length: + success, image = cap.read() if count % frame_interval == 0: - image = _get_image_from_array(image ) + image = _get_image_from_array(image) image_arr.append(image) - + count += 1 - + return image_arr - + + def get_n_frames(video_path, num_frame): """ - Get N frames equidistant to each other in a video + Get N frames equidistant to each other in a video @param video_path: Path to video file on system @param num_frame: Number of frames to be extracted from video. If num_frame=10 then 10 frames equally distant from each other will be extracted """ cap = cv2.VideoCapture(video_path) - + length = int(cap.get(CV_FRAME_COUNT)) - - op_frame_idx = set(np.linspace(0, length - 2, num_frame, dtype=int)) - + + op_frame_idx = set(np.linspace(0, length - 2, num_frame, dtype=int)) + success, image = cap.read() count = 0 - + image_arr = [] - while(success and count < length): + while success and count < length: success, image = cap.read() if success and count in op_frame_idx: - - image = _get_image_from_array(image ) + image = _get_image_from_array(image) image_arr.append(image) - count += 1 - - return image_arr + count += 1 + return image_arr diff --git a/tika-parsers/src/test/resources/org/apache/tika/parser/recognition/tika-config-tflow-rest.xml b/tika-parsers/src/test/resources/org/apache/tika/parser/recognition/tika-config-tflow-rest.xml index b0e5099b1..69a65d076 100644 --- a/tika-parsers/src/test/resources/org/apache/tika/parser/recognition/tika-config-tflow-rest.xml +++ b/tika-parsers/src/test/resources/org/apache/tika/parser/recognition/tika-config-tflow-rest.xml @@ -23,6 +23,7 @@ <mime>image/png</mime> <mime>image/gif</mime> <params> + <param name="apiBaseUri" type="uri">http://localhost:8764/inception/v4</param> <param name="topN" type="int">2</param> <param name="minConfidence" type="double">0.015</param> <param name="class" type="string">org.apache.tika.parser.recognition.tf.TensorflowRESTRecogniser</param> diff --git a/tika-parsers/src/test/resources/org/apache/tika/parser/recognition/tika-config-tflow-video-rest.xml b/tika-parsers/src/test/resources/org/apache/tika/parser/recognition/tika-config-tflow-video-rest.xml index c6c50ed83..eb634a391 100644 --- a/tika-parsers/src/test/resources/org/apache/tika/parser/recognition/tika-config-tflow-video-rest.xml +++ b/tika-parsers/src/test/resources/org/apache/tika/parser/recognition/tika-config-tflow-video-rest.xml @@ -21,11 +21,11 @@ <parser class="org.apache.tika.parser.recognition.ObjectRecognitionParser"> <mime>video/mp4</mime> <params> - <param name="topN" type="int">4</param> + <param name="apiBaseUri" type="uri">http://localhost:8764/inception/v4</param> + <param name="topN" type="int">2</param> <param name="minConfidence" type="double">0.015</param> + <param name="mode" type="string">fixed</param> <param name="class" type="string">org.apache.tika.parser.recognition.tf.TensorflowRESTVideoRecogniser</param> - <param name="healthUri" type="uri">http://localhost:8764/inception/v4/ping</param> - <param name="apiUri" type="uri">http://localhost:8764/inception/v4/classify/video?mode=fixed</param> </params> </parser> </parsers> ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org > Standardizing current Object Recognition REST parsers > ----------------------------------------------------- > > Key: TIKA-2400 > URL: https://issues.apache.org/jira/browse/TIKA-2400 > Project: Tika > Issue Type: Sub-task > Components: parser > Reporter: Thejan Wijesinghe > Priority: Minor > Fix For: 1.17 > > > # This involves adding apiBaseUris and refactoring current Object Recognition > REST parsers, > # Refactoring dockerfiles related to those parsers. > # Moving the logic related to checking minimum confidence into servers -- This message was sent by Atlassian JIRA (v6.4.14#64029)