This is an automated email from the ASF dual-hosted git repository.

tallison pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tika.git


The following commit(s) were added to refs/heads/main by this push:
     new 5f2e30215 TIKA-4499 - remove tika-dl module (#2347)
5f2e30215 is described below

commit 5f2e30215e6f60174ca7fcd814a55adc51e2452a
Author: Tim Allison <[email protected]>
AuthorDate: Fri Oct 3 12:39:55 2025 -0400

    TIKA-4499 - remove tika-dl module (#2347)
---
 CHANGES.txt                                        |   2 +
 tika-bom/pom.xml                                   |   5 -
 tika-parsers/tika-parsers-ml/pom.xml               |   1 -
 tika-parsers/tika-parsers-ml/tika-dl/pom.xml       | 195 ------------
 .../tika/dl/imagerec/DL4JInceptionV3Net.java       | 340 ---------------------
 .../org/apache/tika/dl/imagerec/DL4JVGG16Net.java  | 158 ----------
 .../tika/dl/imagerec/DL4JInceptionV3NetTest.java   |  63 ----
 .../apache/tika/dl/imagerec/DL4JVGG16NetTest.java  |  64 ----
 .../resources/org/apache/tika/dl/imagerec/cat.jpg  | Bin 7686 -> 0 bytes
 .../tika/dl/imagerec/dl4j-inception3-config.xml    |  35 ---
 .../apache/tika/dl/imagerec/dl4j-vgg16-config.xml  |  32 --
 .../resources/org/apache/tika/dl/imagerec/lion.jpg | Bin 44441 -> 0 bytes
 12 files changed, 2 insertions(+), 893 deletions(-)

diff --git a/CHANGES.txt b/CHANGES.txt
index 9e19666ad..bf320ab3a 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -13,6 +13,8 @@ Release 4.0.0-BETA1 - ???
 
    * Removed the dotnet module (TIKA-4332).
 
+   * Remove the tika-dl module (TIKA-4499).
+
   OTHER CHANGES
 
    * Fix concurrency bug in TikaToXMP (TIKA-4393)
diff --git a/tika-bom/pom.xml b/tika-bom/pom.xml
index ff9380841..d57fad573 100644
--- a/tika-bom/pom.xml
+++ b/tika-bom/pom.xml
@@ -275,11 +275,6 @@
         <artifactId>tika-age-recogniser</artifactId>
         <version>4.0.0-SNAPSHOT</version>
       </dependency>
-      <dependency>
-        <groupId>org.apache.tika</groupId>
-        <artifactId>tika-dl</artifactId>
-        <version>4.0.0-SNAPSHOT</version>
-      </dependency>
       <dependency>
         <groupId>org.apache.tika</groupId>
         <artifactId>tika-parser-advancedmedia-module</artifactId>
diff --git a/tika-parsers/tika-parsers-ml/pom.xml 
b/tika-parsers/tika-parsers-ml/pom.xml
index dbdb41248..0f9a5f048 100644
--- a/tika-parsers/tika-parsers-ml/pom.xml
+++ b/tika-parsers/tika-parsers-ml/pom.xml
@@ -102,7 +102,6 @@
         <module>tika-age-recogniser</module>
         <module>tika-parser-advancedmedia-module</module>
         <module>tika-parser-advancedmedia-package</module>
-        <module>tika-dl</module>
       </modules>
     </profile>
   </profiles>
diff --git a/tika-parsers/tika-parsers-ml/tika-dl/pom.xml 
b/tika-parsers/tika-parsers-ml/tika-dl/pom.xml
deleted file mode 100644
index ca2b189ea..000000000
--- a/tika-parsers/tika-parsers-ml/tika-dl/pom.xml
+++ /dev/null
@@ -1,195 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<!--
-  Licensed to the Apache Software Foundation (ASF) under one
-  or more contributor license agreements.  See the NOTICE file
-  distributed with this work for additional information
-  regarding copyright ownership.  The ASF licenses this file
-  to you under the Apache License, Version 2.0 (the
-  "License"); you may not use this file except in compliance
-  with the License.  You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-  Unless required by applicable law or agreed to in writing,
-  software distributed under the License is distributed on an
-  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-  KIND, either express or implied.  See the License for the
-  specific language governing permissions and limitations
-  under the License.
--->
-<project xmlns="http://maven.apache.org/POM/4.0.0"; 
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"; 
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 
https://maven.apache.org/xsd/maven-4.0.0.xsd";>
-  <modelVersion>4.0.0</modelVersion>
-  <parent>
-    <groupId>org.apache.tika</groupId>
-    <artifactId>tika-parsers-ml</artifactId>
-    <version>4.0.0-SNAPSHOT</version>
-  </parent>
-
-  <artifactId>tika-dl</artifactId>
-  <packaging>jar</packaging>
-
-  <name>Apache Tika Deep Learning (powered by DL4J)</name>
-  <url>http://maven.apache.org</url>
-
-
-  <dependencyManagement>
-      <dependencies>
-          <dependency>
-              <groupId>org.bytedeco</groupId>
-              <artifactId>openblas-platform</artifactId>
-              <version>0.3.30-1.5.12</version>
-          </dependency>
-          <dependency>
-              <groupId>org.bytedeco</groupId>
-              <artifactId>openblas</artifactId>
-              <version>0.3.30-1.5.12</version>
-          </dependency>
-          <!-- can't update above 4.5.5-1.5.7, UnsatisfiedLinkError: no 
jniopencv_core in java.library.path -->
-          <dependency>
-              <groupId>org.bytedeco</groupId>
-              <artifactId>opencv</artifactId>
-              <version>4.11.0-1.5.12</version>
-          </dependency>
-          <dependency>
-              <groupId>it.unimi.dsi</groupId>
-              <artifactId>fastutil</artifactId>
-              <version>${fastutil.version}</version>
-          </dependency>
-      </dependencies>
-  </dependencyManagement>
-
-  <dependencies>
-    <dependency>
-      <groupId>${project.groupId}</groupId>
-      <artifactId>tika-parser-advancedmedia-module</artifactId>
-      <version>${project.version}</version>
-      <scope>provided</scope>
-    </dependency>
-    <dependency>
-      <groupId>org.datavec</groupId>
-      <artifactId>datavec-data-image</artifactId>
-      <version>${dl4j.version}</version>
-      <scope>provided</scope>
-      <exclusions>
-        <exclusion>
-          <groupId>org.bytedeco</groupId>
-          <artifactId>ffmpeg</artifactId>
-        </exclusion>
-      </exclusions>
-    </dependency>
-    <!-- TODO restore after ffmpeg has been updated -->
-    <!--
-    <dependency>
-      <groupId>org.bytedeco</groupId>
-      <artifactId>ffmpeg</artifactId>
-      <version>7.1-1.5.11</version>
-      <scope>provided</scope>
-    </dependency>
-    -->
-    <dependency>
-      <groupId>org.deeplearning4j</groupId>
-      <artifactId>deeplearning4j-zoo</artifactId>
-      <version>${dl4j.version}</version>
-      <scope>provided</scope>
-    </dependency>
-    <dependency>
-      <groupId>org.deeplearning4j</groupId>
-      <artifactId>deeplearning4j-modelimport</artifactId>
-      <version>${dl4j.version}</version>
-      <scope>provided</scope>
-    </dependency>
-    <dependency>
-      <groupId>org.deeplearning4j</groupId>
-      <artifactId>deeplearning4j-nn</artifactId>
-      <version>${dl4j.version}</version>
-      <scope>provided</scope>
-    </dependency>
-    <dependency>
-      <groupId>org.nd4j</groupId>
-      <artifactId>nd4j-api</artifactId>
-      <version>${dl4j.version}</version>
-      <scope>provided</scope>
-    </dependency>
-    <dependency>
-      <groupId>org.nd4j</groupId>
-      <artifactId>nd4j-native-platform</artifactId>
-      <version>${dl4j.version}</version>
-      <scope>provided</scope>
-    </dependency>
-    <dependency>
-      <groupId>org.apache.logging.log4j</groupId>
-      <artifactId>log4j-slf4j2-impl</artifactId>
-      <scope>test</scope>
-    </dependency>
-    <dependency>
-      <groupId>commons-net</groupId>
-      <artifactId>commons-net</artifactId>
-      <version>${commons.net.version}</version>
-      <scope>provided</scope>
-    </dependency>
-    <dependency>
-      <groupId>org.bytedeco</groupId>
-      <artifactId>javacpp</artifactId>
-      <version>${javacpp.version}</version>
-    </dependency>
-    <dependency>
-      <groupId>org.bytedeco</groupId>
-      <artifactId>javacpp-platform</artifactId>
-      <version>${javacpp.version}</version>
-    </dependency>
-  </dependencies>
-
-  <build>
-    <plugins>
-      <plugin>
-        <!--
-          To reduce the size of jar by excluding unnecessary native libs,
-          set `-Djavacpp.platform=<target>`
-          possible targets = {android-arm, linux-x86_64, macosx-x86_64, 
windows-x86_64}
-          More details here 
https://github.com/bytedeco/javacpp-presets/blob/master/README.md#downloads
-
-          By default, native libs for all major native platforms are included.
-         -->
-        <artifactId>maven-assembly-plugin</artifactId>
-        <configuration>
-          <descriptorRefs>
-            <descriptorRef>jar-with-dependencies</descriptorRef>
-          </descriptorRefs>
-        </configuration>
-        <executions>
-          <execution>
-            <id>make-assembly</id>
-            <phase>package</phase>
-            <goals>
-              <goal>single</goal>
-            </goals>
-          </execution>
-        </executions>
-      </plugin>
-      <plugin>
-        <groupId>org.apache.maven.plugins</groupId>
-        <artifactId>maven-jar-plugin</artifactId>
-        <configuration>
-          <archive>
-            <manifestEntries>
-              <Automatic-Module-Name>org.apache.tika.dl</Automatic-Module-Name>
-            </manifestEntries>
-          </archive>
-        </configuration>
-      </plugin>
-
-      <plugin>
-        <groupId>org.apache.rat</groupId>
-        <artifactId>apache-rat-plugin</artifactId>
-        <version>${rat.version}</version>
-        <configuration>
-        </configuration>
-      </plugin>
-
-    </plugins>
-  </build>
-
-  <scm>
-    <tag>3.0.0-rc1</tag>
-  </scm>
-</project>
diff --git 
a/tika-parsers/tika-parsers-ml/tika-dl/src/main/java/org/apache/tika/dl/imagerec/DL4JInceptionV3Net.java
 
b/tika-parsers/tika-parsers-ml/tika-dl/src/main/java/org/apache/tika/dl/imagerec/DL4JInceptionV3Net.java
deleted file mode 100644
index 69b6b34dd..000000000
--- 
a/tika-parsers/tika-parsers-ml/tika-dl/src/main/java/org/apache/tika/dl/imagerec/DL4JInceptionV3Net.java
+++ /dev/null
@@ -1,340 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.tika.dl.imagerec;
-
-import java.io.File;
-import java.io.FileInputStream;
-import java.io.FileNotFoundException;
-import java.io.IOException;
-import java.io.InputStream;
-import java.net.URI;
-import java.net.URISyntaxException;
-import java.net.URL;
-import java.nio.charset.StandardCharsets;
-import java.util.ArrayList;
-import java.util.Collections;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-import java.util.Set;
-
-import org.apache.commons.io.FileUtils;
-import org.apache.commons.io.IOUtils;
-import org.datavec.image.loader.NativeImageLoader;
-import org.deeplearning4j.nn.graph.ComputationGraph;
-import org.deeplearning4j.nn.modelimport.keras.KerasModel;
-import 
org.deeplearning4j.nn.modelimport.keras.exceptions.InvalidKerasConfigurationException;
-import 
org.deeplearning4j.nn.modelimport.keras.exceptions.UnsupportedKerasConfigurationException;
-import org.deeplearning4j.nn.modelimport.keras.utils.KerasModelBuilder;
-import org.json.simple.JSONArray;
-import org.json.simple.JSONObject;
-import org.json.simple.parser.JSONParser;
-import org.json.simple.parser.ParseException;
-import org.nd4j.linalg.api.ndarray.INDArray;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-import org.xml.sax.ContentHandler;
-import org.xml.sax.SAXException;
-
-import org.apache.tika.config.Field;
-import org.apache.tika.config.InitializableProblemHandler;
-import org.apache.tika.config.Param;
-import org.apache.tika.exception.TikaConfigException;
-import org.apache.tika.exception.TikaException;
-import org.apache.tika.metadata.Metadata;
-import org.apache.tika.mime.MediaType;
-import org.apache.tika.parser.ParseContext;
-import org.apache.tika.parser.recognition.ObjectRecogniser;
-import org.apache.tika.parser.recognition.RecognisedObject;
-
-/**
- * {@link DL4JInceptionV3Net} is an implementation of {@link ObjectRecogniser}.
- * This object recogniser is powered by <a 
href="http://deeplearning4j.org";>Deeplearning4j</a>.
- * This implementation is pre configured to use <a 
href="https://arxiv.org/abs/1512.00567";>
- *     Google's InceptionV3 model </a> pre trained on
- * ImageNet corpus. The models references in default settings are originally 
trained and exported
- * from <a href="http://keras.io";>Keras </a> and imported using DL4J's 
importer tools.
- * <p>
- * Although this implementation is made to work out of the box without user 
attention,
- * for advances users who are interested in tweaking the settings, the 
following fields are
- * configurable:
- * <ul>
- * <li>{@link #modelWeightsPath}</li>
- * <li>{@link #labelFile}</li>
- * <li>{@link #labelLang}</li>
- * <li>{@link #cacheDir}</li>
- * <li>{@link #imgWidth}</li>
- * <li>{@link #imgHeight}</li>
- * <li>{@link #imgChannels}</li>
- * <li>{@link #minConfidence}</li>
- * </ul>
- * </p>
- *
- * @see ObjectRecogniser
- * @see org.apache.tika.parser.recognition.ObjectRecognitionParser
- * @see org.apache.tika.parser.recognition.tf.TensorflowImageRecParser
- * @see org.apache.tika.parser.recognition.tf.TensorflowRESTRecogniser
- * @since Tika 1.15
- */
-public class DL4JInceptionV3Net implements ObjectRecogniser {
-
-    private static final Set<MediaType> MEDIA_TYPES =
-            Collections.singleton(MediaType.image("jpeg"));
-    private static final Logger LOG = 
LoggerFactory.getLogger(DL4JInceptionV3Net.class);
-    private static final String DEF_WEIGHTS_URL =
-            
"https://github.com/USCDataScience/tika-dockers/releases/download/v0.2/inception_v3_keras_2.h5";;
-    private static final String DEF_LABEL_MAPPING_URL =
-            
"https://github.com/USCDataScience/tika-dockers/releases/download/v0.2/imagenet_class_index.json";;
-    private static final String BASE_DIR =
-            System.getProperty("user.home") + File.separator + ".tika-dl" + 
File.separator +
-                    "models" + File.separator + "keras";
-    private static final String MODEL_DIR = BASE_DIR + File.separator + 
"inception-v3";
-
-    /**
-     * Cache dir to be used for downloading the weights file.
-     * This is used to download the model.
-     */
-    @Field
-    private File cacheDir = new File(MODEL_DIR);
-
-    /**
-     * Path to a HDF5 file that contains weights of the Keras network
-     * that was obtained by training the network on a labelled dataset.
-     * <br/>
-     * Note: when the value is set to &lt;download&gt;, the default model will 
be
-     * downloaded from {@value #DEF_WEIGHTS_URL}
-     */
-    @Field
-    private String modelWeightsPath = DEF_WEIGHTS_URL;
-
-    /***
-     * Path to file that tells how to map node index to human readable label 
names
-     * <br/>
-     * The default is retrieved from {@value DEF_LABEL_MAPPING_URL}
-     */
-    @Field
-    private String labelFile = DEF_LABEL_MAPPING_URL;
-
-    /**
-     * Language name of the labels.
-     * <br/>
-     * Default is 'en'
-     */
-    @Field
-    private String labelLang = "en";
-
-    @Field
-    private int imgHeight = 299;
-
-    @Field
-    private int imgWidth = 299;
-
-    @Field
-    private int imgChannels = 3;
-    /***
-     * Ignores the labels that are below this confidence score
-     */
-    @Field
-    private double minConfidence = 0.005;
-
-    private ComputationGraph graph;
-    private NativeImageLoader imageLoader;
-    private Map<Integer, String> labelMap;
-
-    private static synchronized File cachedDownload(File cacheDir, URI uri) 
throws IOException {
-
-        if ("file".equals(uri.getScheme()) || uri.getScheme() == null) {
-            return new File(uri);
-        }
-        if (!cacheDir.exists()) {
-            cacheDir.mkdirs();
-        }
-        String[] parts = uri.toASCIIString().split("/");
-        File cacheFile = new File(cacheDir, parts[parts.length - 1]);
-        File successFlag = new File(cacheFile.getAbsolutePath() + ".success");
-
-        if (cacheFile.exists() && successFlag.exists()) {
-            LOG.info("Cache exist at {}. Not downloading it", 
cacheFile.getAbsolutePath());
-        } else {
-            if (successFlag.exists()) {
-                successFlag.delete();
-            }
-            LOG.info("Cache doesn't exist. Going to make a copy");
-            LOG.info("This might take a while! GET {}", uri);
-            FileUtils.copyURLToFile(uri.toURL(), cacheFile, 5000, 60000);
-            //restore the success flag again
-            FileUtils.write(successFlag, "CopiedAt:" + 
System.currentTimeMillis(),
-                    StandardCharsets.UTF_8);
-        }
-        return cacheFile;
-    }
-
-    @Override
-    public Set<MediaType> getSupportedMimes() {
-        return MEDIA_TYPES;
-    }
-
-    /***
-     *
-     * @param path path to resolve the file
-     * @return File or null
-     */
-    private File retrieveFile(String path) {
-        File file = new File(path);
-        if (!file.exists()) {
-            LOG.warn("File {} not found in local file system. Asking the 
classloader", path);
-            URL url = getClass().getClassLoader().getResource(path);
-            if (url == null) {
-                LOG.debug("Classloader does not know the file {}", path);
-                file = null;
-            } else {
-                LOG.debug("Classloader knows the file {}", path);
-                try {
-                    file = cachedDownload(cacheDir, url.toURI());
-                } catch (URISyntaxException | IOException e) {
-                    LOG.warn(e.getMessage(), e);
-                }
-            }
-        }
-        return file;
-    }
-
-    private InputStream retrieveResource(String path) throws 
FileNotFoundException {
-        File file = new File(path);
-        if (file.exists()) {
-            return new FileInputStream(file);
-        }
-        LOG.warn("File {} not found in local file system. Asking the 
classloader", path);
-        return getClass().getClassLoader().getResourceAsStream(path);
-    }
-
-    private String mayBeDownloadFile(String path) throws TikaConfigException {
-        String resolvedFilePath;
-        if (path.startsWith("http://";) || path.startsWith("https://";)) {
-            LOG.debug("Config instructed to download the file, doing so.");
-            try {
-                resolvedFilePath = cachedDownload(cacheDir, 
URI.create(path)).getAbsolutePath();
-            } catch (IOException e) {
-                throw new TikaConfigException(e.getMessage(), e);
-            }
-        } else {
-            File file = retrieveFile(path);
-            if (!file.exists()) {
-                LOG.error("File does not exist at :: {}", path);
-            }
-            resolvedFilePath = file.getAbsolutePath();
-        }
-        return resolvedFilePath;
-    }
-
-    @Override
-    public void initialize(Map<String, Param> params) throws 
TikaConfigException {
-
-        //STEP 1: resolve weights file, download if necessary
-        modelWeightsPath = mayBeDownloadFile(modelWeightsPath);
-
-        //STEP 2: Load labels map
-        try (InputStream stream = 
retrieveResource(mayBeDownloadFile(labelFile))) {
-            this.labelMap = loadClassIndex(stream);
-        } catch (IOException | ParseException e) {
-            LOG.error("Could not load labels map", e);
-            return;
-        }
-
-        //STEP 3: initialize the graph
-        try {
-            this.imageLoader = new NativeImageLoader(imgHeight, imgWidth, 
imgChannels);
-            LOG.info("Going to load Inception network...");
-            long st = System.currentTimeMillis();
-
-            try (KerasModelBuilder builder =
-                    new 
KerasModel().modelBuilder().modelHdf5Filename(modelWeightsPath)
-                            .enforceTrainingConfig(false)) {
-
-                builder.inputShape(new int[]{imgHeight, imgWidth, 3});
-                KerasModel model = builder.buildModel();
-                this.graph = model.getComputationGraph();
-                long time = System.currentTimeMillis() - st;
-                LOG.info("Loaded the Inception model. Time taken={}ms", time);
-            }
-        } catch (IOException | InvalidKerasConfigurationException |
-                UnsupportedKerasConfigurationException e) {
-            throw new TikaConfigException(e.getMessage(), e);
-        }
-    }
-
-    @Override
-    public void checkInitialization(InitializableProblemHandler problemHandler)
-            throws TikaConfigException {
-        //TODO: what do we want to check here?
-    }
-
-    @Override
-    public boolean isAvailable() {
-        return graph != null;
-    }
-
-    /**
-     * Pre process image to reduce to make it feedable to inception network
-     *
-     * @param input Input image
-     * @return processed image
-     */
-    public INDArray preProcessImage(INDArray input) {
-        // Transform to [-1.0, 1.0] range
-        return input.div(255.0).sub(0.5).mul(2.0);
-    }
-
-    /**
-     * Loads the class to
-     *
-     * @param stream label index stream
-     * @return Map of integer -&gt; label name
-     * @throws IOException    when the stream breaks unexpectedly
-     * @throws ParseException when the input doesn't contain a valid JSON map
-     */
-    public Map<Integer, String> loadClassIndex(InputStream stream)
-            throws IOException, ParseException {
-        String content = IOUtils.toString(stream, StandardCharsets.UTF_8);
-        JSONObject jIndex = (JSONObject) new JSONParser().parse(content);
-        Map<Integer, String> classMap = new HashMap<>();
-        for (Object key : jIndex.keySet()) {
-            JSONArray names = (JSONArray) jIndex.get(key);
-            classMap.put(Integer.parseInt(key.toString()), 
names.get(names.size() - 1).toString());
-        }
-        return classMap;
-    }
-
-    @Override
-    public List<RecognisedObject> recognise(InputStream stream, ContentHandler 
handler,
-                                            Metadata metadata, ParseContext 
context)
-            throws IOException, SAXException, TikaException {
-
-        INDArray image = preProcessImage(imageLoader.asImageMatrix(stream, 
false).getImage());
-        INDArray scores = graph.outputSingle(image);
-        List<RecognisedObject> result = new ArrayList<>();
-        for (int i = 0; i < scores.length(); i++) {
-            if (scores.getDouble(i) > minConfidence) {
-                String label = labelMap.get(i);
-                String id = i + "";
-                result.add(new RecognisedObject(label, labelLang, id, 
scores.getDouble(i)));
-                LOG.debug("Found Object {}", label);
-            }
-        }
-        return result;
-    }
-}
diff --git 
a/tika-parsers/tika-parsers-ml/tika-dl/src/main/java/org/apache/tika/dl/imagerec/DL4JVGG16Net.java
 
b/tika-parsers/tika-parsers-ml/tika-dl/src/main/java/org/apache/tika/dl/imagerec/DL4JVGG16Net.java
deleted file mode 100644
index a6f6ca81a..000000000
--- 
a/tika-parsers/tika-parsers-ml/tika-dl/src/main/java/org/apache/tika/dl/imagerec/DL4JVGG16Net.java
+++ /dev/null
@@ -1,158 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.tika.dl.imagerec;
-
-import java.io.File;
-import java.io.IOException;
-import java.io.InputStream;
-import java.util.ArrayList;
-import java.util.Collections;
-import java.util.List;
-import java.util.Map;
-import java.util.Set;
-
-import org.datavec.image.loader.NativeImageLoader;
-import org.deeplearning4j.nn.graph.ComputationGraph;
-import org.deeplearning4j.util.ModelSerializer;
-import org.deeplearning4j.zoo.PretrainedType;
-import org.deeplearning4j.zoo.ZooModel;
-import org.deeplearning4j.zoo.model.VGG16;
-import org.deeplearning4j.zoo.util.imagenet.ImageNetLabels;
-import org.nd4j.linalg.api.ndarray.INDArray;
-import org.nd4j.linalg.dataset.api.preprocessor.DataNormalization;
-import org.nd4j.linalg.dataset.api.preprocessor.VGG16ImagePreProcessor;
-import org.nd4j.linalg.factory.Nd4j;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-import org.xml.sax.ContentHandler;
-import org.xml.sax.SAXException;
-
-import org.apache.tika.config.Field;
-import org.apache.tika.config.InitializableProblemHandler;
-import org.apache.tika.config.Param;
-import org.apache.tika.exception.TikaConfigException;
-import org.apache.tika.exception.TikaException;
-import org.apache.tika.metadata.Metadata;
-import org.apache.tika.mime.MediaType;
-import org.apache.tika.parser.ParseContext;
-import org.apache.tika.parser.recognition.ObjectRecogniser;
-import org.apache.tika.parser.recognition.RecognisedObject;
-
-public class DL4JVGG16Net implements ObjectRecogniser {
-
-    public static final Set<MediaType> SUPPORTED_MIMES =
-            Collections.singleton(MediaType.image("jpeg"));
-    private static final Logger LOG = 
LoggerFactory.getLogger(DL4JVGG16Net.class);
-    private static final String BASE_DIR =
-            System.getProperty("user.home") + File.separator + ".tika-dl" + 
File.separator +
-                    "models" + File.separator + "dl4j";
-    private static final String MODEL_DIR = BASE_DIR + File.separator + 
"vgg-16";
-
-    @Field
-    private File cacheDir = new File(MODEL_DIR + File.separator + "vgg16.zip");
-
-    @Field
-    private boolean serialize = true;
-
-    @Field
-    private int topN;
-
-    private NativeImageLoader imageLoader = new NativeImageLoader(224, 224, 3);
-    private DataNormalization preProcessor = new VGG16ImagePreProcessor();
-    private boolean available = false;
-    private ComputationGraph model;
-    private ImageNetLabels imageNetLabels;
-
-    public Set<MediaType> getSupportedMimes() {
-        return SUPPORTED_MIMES;
-    }
-
-    @Override
-    public boolean isAvailable() {
-        return available;
-    }
-
-    @Override
-    public void checkInitialization(InitializableProblemHandler problemHandler)
-            throws TikaConfigException {
-        //TODO: what do we want to check here?
-    }
-
-    @Override
-    public void initialize(Map<String, Param> params) throws 
TikaConfigException {
-        try {
-            if (serialize) {
-                if (cacheDir.exists()) {
-                    model = ModelSerializer.restoreComputationGraph(cacheDir);
-                    LOG.info("Preprocessed Model Loaded from {}", cacheDir);
-                } else {
-                    LOG.warn("Preprocessed Model doesn't exist at {}", 
cacheDir);
-                    cacheDir.getParentFile().mkdirs();
-                    ZooModel zooModel = VGG16.builder().build();
-                    model = (ComputationGraph) 
zooModel.initPretrained(PretrainedType.IMAGENET);
-                    LOG.info(
-                            "Saving the Loaded model for future use. Saved 
models" +
-                                    " are more optimised to consume less 
resources.");
-                    ModelSerializer.writeModel(model, cacheDir, true);
-                }
-            } else {
-                LOG.info("Weight graph model loaded via dl4j Helper 
functions");
-                ZooModel zooModel = VGG16.builder().build();
-                model = (ComputationGraph) 
zooModel.initPretrained(PretrainedType.IMAGENET);
-            }
-            imageNetLabels = new ImageNetLabels();
-            available = true;
-        } catch (Exception e) {
-            available = false;
-            LOG.warn(e.getMessage(), e);
-            throw new TikaConfigException(e.getMessage(), e);
-        }
-    }
-
-    @Override
-    public List<RecognisedObject> recognise(InputStream stream, ContentHandler 
handler,
-                                            Metadata metadata, ParseContext 
context)
-            throws IOException, SAXException, TikaException {
-        INDArray image = imageLoader.asMatrix(stream);
-        preProcessor.transform(image);
-        INDArray[] output = model.output(false, image);
-        return predict(output[0]);
-    }
-
-    private List<RecognisedObject> predict(INDArray predictions) {
-        List<RecognisedObject> objects = new ArrayList<>();
-        int[] topNPredictions = new int[topN];
-        float[] topNProb = new float[topN];
-        String[] outLabels = new String[topN];
-        //brute force collect top N
-        int i = 0;
-        for (int batch = 0; batch < predictions.size(0); batch++) {
-            INDArray currentBatch = predictions.getRow(batch).dup();
-            while (i < topN) {
-                topNPredictions[i] = Nd4j.argMax(currentBatch, 1).getInt(0);
-                topNProb[i] = currentBatch.getFloat(batch, topNPredictions[i]);
-                currentBatch.putScalar(0, topNPredictions[i], 0);
-                outLabels[i] = imageNetLabels.getLabel(topNPredictions[i]);
-                objects.add(new RecognisedObject(outLabels[i], "eng",
-                        outLabels[i], topNProb[i]));
-                i++;
-            }
-        }
-        return objects;
-    }
-}
diff --git 
a/tika-parsers/tika-parsers-ml/tika-dl/src/test/java/org/apache/tika/dl/imagerec/DL4JInceptionV3NetTest.java
 
b/tika-parsers/tika-parsers-ml/tika-dl/src/test/java/org/apache/tika/dl/imagerec/DL4JInceptionV3NetTest.java
deleted file mode 100644
index 18508bd4e..000000000
--- 
a/tika-parsers/tika-parsers-ml/tika-dl/src/test/java/org/apache/tika/dl/imagerec/DL4JInceptionV3NetTest.java
+++ /dev/null
@@ -1,63 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.tika.dl.imagerec;
-
-import static org.junit.jupiter.api.Assertions.assertTrue;
-import static org.junit.jupiter.api.Assumptions.assumeFalse;
-import static org.junit.jupiter.api.Assumptions.assumeTrue;
-
-import java.io.InputStream;
-
-import org.apache.commons.lang3.SystemUtils;
-import org.junit.jupiter.api.Test;
-
-import org.apache.tika.Tika;
-import org.apache.tika.config.TikaConfig;
-import org.apache.tika.metadata.Metadata;
-
-public class DL4JInceptionV3NetTest {
-
-    @Test
-    public void recognise() throws Exception {
-        assumeFalse(SystemUtils.OS_ARCH.equals("aarch64"), "doesn't yet work 
on aarch64");
-        TikaConfig config = null;
-        try (InputStream is = 
getClass().getResourceAsStream("dl4j-inception3-config.xml")) {
-            config = new TikaConfig(is);
-        } catch (Exception e) {
-            if (e.getMessage() != null && (e.getMessage().contains("Connection 
refused") ||
-                    e.getMessage().contains("connect timed out") || 
e.getMessage().contains("403 for URL"))) {
-                assumeTrue(false, "skipping test because of connection issue");
-            }
-            throw e;
-        }
-        assumeTrue(config != null, "something went wrong loading tika config");
-        Tika tika = new Tika(config);
-        Metadata md = new Metadata();
-        try (InputStream is = getClass().getResourceAsStream("cat.jpg")) {
-            tika.parse(is, md);
-        }
-        String[] objects = md.getValues("OBJECT");
-        boolean found = false;
-        for (String object : objects) {
-            if (object.contains("_cat")) {
-                found = true;
-                break;
-            }
-        }
-        assertTrue(found);
-    }
-}
diff --git 
a/tika-parsers/tika-parsers-ml/tika-dl/src/test/java/org/apache/tika/dl/imagerec/DL4JVGG16NetTest.java
 
b/tika-parsers/tika-parsers-ml/tika-dl/src/test/java/org/apache/tika/dl/imagerec/DL4JVGG16NetTest.java
deleted file mode 100644
index f3666630f..000000000
--- 
a/tika-parsers/tika-parsers-ml/tika-dl/src/test/java/org/apache/tika/dl/imagerec/DL4JVGG16NetTest.java
+++ /dev/null
@@ -1,64 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.tika.dl.imagerec;
-
-import static org.junit.jupiter.api.Assertions.assertTrue;
-import static org.junit.jupiter.api.Assumptions.assumeFalse;
-import static org.junit.jupiter.api.Assumptions.assumeTrue;
-
-import java.io.InputStream;
-
-import org.apache.commons.lang3.SystemUtils;
-import org.junit.jupiter.api.Test;
-
-import org.apache.tika.Tika;
-import org.apache.tika.config.TikaConfig;
-import org.apache.tika.metadata.Metadata;
-
-public class DL4JVGG16NetTest {
-
-    @Test
-    public void recognise() throws Exception {
-        assumeFalse(SystemUtils.OS_ARCH.equals("aarch64"), "doesn't yet work 
on aarch64");
-        TikaConfig config = null;
-        try (InputStream is = 
getClass().getResourceAsStream("dl4j-vgg16-config.xml")) {
-            config = new TikaConfig(is);
-        } catch (Exception e) {
-            if (e.getMessage() != null && (e.getMessage().contains("Connection 
refused") ||
-                    e.getMessage().contains("connect timed out") || 
e.getMessage().contains("403 for URL"))) {
-                assumeTrue(false, "skipping test because of connection issue");
-            }
-            throw e;
-        }
-
-        assumeTrue(false, "something went wrong loading tika config");
-        Tika tika = new Tika(config);
-        Metadata md = new Metadata();
-        try (InputStream is = getClass().getResourceAsStream("lion.jpg")) {
-            tika.parse(is, md);
-        }
-        String[] objects = md.getValues("OBJECT");
-        boolean found = false;
-        for (String object : objects) {
-            if (object.contains("lion")) {
-                found = true;
-                break;
-            }
-        }
-        assertTrue(found);
-    }
-}
diff --git 
a/tika-parsers/tika-parsers-ml/tika-dl/src/test/resources/org/apache/tika/dl/imagerec/cat.jpg
 
b/tika-parsers/tika-parsers-ml/tika-dl/src/test/resources/org/apache/tika/dl/imagerec/cat.jpg
deleted file mode 100644
index 09d45d460..000000000
Binary files 
a/tika-parsers/tika-parsers-ml/tika-dl/src/test/resources/org/apache/tika/dl/imagerec/cat.jpg
 and /dev/null differ
diff --git 
a/tika-parsers/tika-parsers-ml/tika-dl/src/test/resources/org/apache/tika/dl/imagerec/dl4j-inception3-config.xml
 
b/tika-parsers/tika-parsers-ml/tika-dl/src/test/resources/org/apache/tika/dl/imagerec/dl4j-inception3-config.xml
deleted file mode 100644
index 27280638b..000000000
--- 
a/tika-parsers/tika-parsers-ml/tika-dl/src/test/resources/org/apache/tika/dl/imagerec/dl4j-inception3-config.xml
+++ /dev/null
@@ -1,35 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-
-<!--
-  Licensed to the Apache Software Foundation (ASF) under one
-  or more contributor license agreements.  See the NOTICE file
-  distributed with this work for additional information
-  regarding copyright ownership.  The ASF licenses this file
-  to you under the Apache License, Version 2.0 (the
-  "License"); you may not use this file except in compliance
-  with the License.  You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-  Unless required by applicable law or agreed to in writing,
-  software distributed under the License is distributed on an
-  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-  KIND, either express or implied.  See the License for the
-  specific language governing permissions and limitations
-  under the License.
--->
-
-<properties>
-  <parsers>
-    <parser class="org.apache.tika.parser.recognition.ObjectRecognitionParser">
-      <mime>image/jpeg</mime>
-      <params>
-        <param name="modelWeightsPath" 
type="string">https://github.com/USCDataScience/tika-dockers/releases/download/v0.2/inception_v3_keras_2.h5</param>
-        <param name="labelFile" 
type="string">https://github.com/USCDataScience/tika-dockers/releases/download/v0.2/imagenet_class_index.json</param>
-        <param name="topN" type="int">10</param>
-        <param name="minConfidence" type="double">0.015</param>
-        <param name="class" 
type="string">org.apache.tika.dl.imagerec.DL4JInceptionV3Net</param>
-      </params>
-    </parser>
-  </parsers>
-</properties>
diff --git 
a/tika-parsers/tika-parsers-ml/tika-dl/src/test/resources/org/apache/tika/dl/imagerec/dl4j-vgg16-config.xml
 
b/tika-parsers/tika-parsers-ml/tika-dl/src/test/resources/org/apache/tika/dl/imagerec/dl4j-vgg16-config.xml
deleted file mode 100644
index 940a4b63f..000000000
--- 
a/tika-parsers/tika-parsers-ml/tika-dl/src/test/resources/org/apache/tika/dl/imagerec/dl4j-vgg16-config.xml
+++ /dev/null
@@ -1,32 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-
-<!--
-  ~ Licensed to the Apache Software Foundation (ASF) under one or more
-  ~ contributor license agreements.  See the NOTICE file distributed with
-  ~ this work for additional information regarding copyright ownership.
-  ~ The ASF licenses this file to You under the Apache License, Version 2.0
-  ~ (the "License"); you may not use this file except in compliance with
-  ~ the License.  You may obtain a copy of the License at
-  ~
-  ~    http://www.apache.org/licenses/LICENSE-2.0
-  ~
-  ~ Unless required by applicable law or agreed to in writing, software
-  ~ distributed under the License is distributed on an "AS IS" BASIS,
-  ~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-  ~ See the License for the specific language governing permissions and
-  ~ limitations under the License.
-  -->
-<properties>
-    <parsers>
-        <parser 
class="org.apache.tika.parser.recognition.ObjectRecognitionParser">
-            <mime>image/jpeg</mime>
-            <params>
-                <param name="topN" type="int">3</param>
-                <param name="minConfidence" type="double">0.015</param>
-                <param name="class" 
type="string">org.apache.tika.dl.imagerec.DL4JVGG16Net</param>
-                <param name="modelType" type="string">VGG16</param>
-                <param name="serialize" type="bool">true</param>
-            </params>
-        </parser>
-    </parsers>
-</properties>
diff --git 
a/tika-parsers/tika-parsers-ml/tika-dl/src/test/resources/org/apache/tika/dl/imagerec/lion.jpg
 
b/tika-parsers/tika-parsers-ml/tika-dl/src/test/resources/org/apache/tika/dl/imagerec/lion.jpg
deleted file mode 100644
index a25942645..000000000
Binary files 
a/tika-parsers/tika-parsers-ml/tika-dl/src/test/resources/org/apache/tika/dl/imagerec/lion.jpg
 and /dev/null differ


Reply via email to