This is an automated email from the ASF dual-hosted git repository.

tallison pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tika.git


The following commit(s) were added to refs/heads/main by this push:
     new 2b9ba8612 TIKA-3885: Add a tika-async-cli module
2b9ba8612 is described below

commit 2b9ba8612b20d2779863f08b908e89cc001b483f
Author: tballison <[email protected]>
AuthorDate: Wed Oct 19 10:49:52 2022 -0400

    TIKA-3885: Add a tika-async-cli module
---
 CHANGES.txt                                        |   2 +
 pom.xml                                            |   2 +-
 tika-app/pom.xml                                   |   5 +
 .../src/main/java/org/apache/tika/cli/TikaCLI.java |   4 +-
 tika-bom/pom.xml                                   |   6 +-
 .../apache/tika/pipes/async/AsyncProcessor.java    |  24 ----
 tika-pipes/pom.xml                                 |   1 +
 tika-pipes/tika-async-cli/pom.xml                  | 136 +++++++++++++++++++++
 .../org/apache/tika/async/cli/TikaAsyncCLI.java    |  60 +++++++++
 9 files changed, 212 insertions(+), 28 deletions(-)

diff --git a/CHANGES.txt b/CHANGES.txt
index 472ab79df..5370bc65e 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -1,5 +1,7 @@
 Release 2.5.1 - ???
 
+   * Add a tika-async-cli module (TIKA-3885).
+
    * Fetch keys sent via headers to tika server are now URL decoded 
(TIKA-3864).
 
 
diff --git a/pom.xml b/pom.xml
index 935922d79..b75efbdaf 100644
--- a/pom.xml
+++ b/pom.xml
@@ -44,8 +44,8 @@
     <module>tika-xmp</module>
     <module>tika-batch</module>
     <module>tika-langdetect</module>
-    <module>tika-app</module>
     <module>tika-pipes</module>
+    <module>tika-app</module>
     <module>tika-server</module>
     <module>tika-integration-tests</module>
     <module>tika-eval</module>
diff --git a/tika-app/pom.xml b/tika-app/pom.xml
index 94248b041..7cc33ca2e 100644
--- a/tika-app/pom.xml
+++ b/tika-app/pom.xml
@@ -73,6 +73,11 @@
       <artifactId>tika-emitter-fs</artifactId>
       <version>${project.version}</version>
     </dependency>
+    <dependency>
+      <groupId>${project.groupId}</groupId>
+      <artifactId>tika-async-cli</artifactId>
+      <version>${project.version}</version>
+    </dependency>
 
     <!-- logging -->
     <dependency>
diff --git a/tika-app/src/main/java/org/apache/tika/cli/TikaCLI.java 
b/tika-app/src/main/java/org/apache/tika/cli/TikaCLI.java
index f174b674a..ee7feacfc 100644
--- a/tika-app/src/main/java/org/apache/tika/cli/TikaCLI.java
+++ b/tika-app/src/main/java/org/apache/tika/cli/TikaCLI.java
@@ -66,6 +66,7 @@ import org.xml.sax.SAXException;
 import org.xml.sax.helpers.DefaultHandler;
 
 import org.apache.tika.Tika;
+import org.apache.tika.async.cli.TikaAsyncCLI;
 import org.apache.tika.batch.BatchProcessDriverCLI;
 import org.apache.tika.config.TikaConfig;
 import org.apache.tika.config.TikaConfigSerializer;
@@ -99,7 +100,6 @@ import org.apache.tika.parser.PasswordProvider;
 import org.apache.tika.parser.RecursiveParserWrapper;
 import org.apache.tika.parser.digestutils.CommonsDigester;
 import org.apache.tika.parser.pdf.PDFParserConfig;
-import org.apache.tika.pipes.async.AsyncProcessor;
 import org.apache.tika.sax.BasicContentHandlerFactory;
 import org.apache.tika.sax.BodyContentHandler;
 import org.apache.tika.sax.ContentHandlerFactory;
@@ -282,7 +282,7 @@ public class TikaCLI {
                 tikaConfigPath = arg.substring(config.length());
             }
         }
-        AsyncProcessor.main(new String[]{ tikaConfigPath});
+        TikaAsyncCLI.main(new String[]{ tikaConfigPath});
     }
 
     /**
diff --git a/tika-bom/pom.xml b/tika-bom/pom.xml
index 71b528a80..2f325bdf4 100644
--- a/tika-bom/pom.xml
+++ b/tika-bom/pom.xml
@@ -375,7 +375,11 @@
         <artifactId>tika-pipes-iterator-solr</artifactId>
         <version>2.5.1-SNAPSHOT</version>
       </dependency>
-
+      <dependency>
+        <groupId>org.apache.tika</groupId>
+        <artifactId>tika-async-cli</artifactId>
+        <version>2.5.1-SNAPSHOT</version>
+      </dependency>
       <dependency>
         <groupId>org.apache.tika</groupId>
         <artifactId>tika-httpclient-commons</artifactId>
diff --git 
a/tika-core/src/main/java/org/apache/tika/pipes/async/AsyncProcessor.java 
b/tika-core/src/main/java/org/apache/tika/pipes/async/AsyncProcessor.java
index 7a948045d..fee2ba37e 100644
--- a/tika-core/src/main/java/org/apache/tika/pipes/async/AsyncProcessor.java
+++ b/tika-core/src/main/java/org/apache/tika/pipes/async/AsyncProcessor.java
@@ -19,7 +19,6 @@ package org.apache.tika.pipes.async;
 import java.io.Closeable;
 import java.io.IOException;
 import java.nio.file.Path;
-import java.nio.file.Paths;
 import java.util.List;
 import java.util.concurrent.ArrayBlockingQueue;
 import java.util.concurrent.Callable;
@@ -319,27 +318,4 @@ public class AsyncProcessor implements Closeable {
             }
         }
     }
-
-    public static void main(String[] args) throws Exception {
-        Path tikaConfigPath = Paths.get(args[0]);
-        PipesIterator pipesIterator = PipesIterator.build(tikaConfigPath);
-        long start = System.currentTimeMillis();
-        try (AsyncProcessor processor = new AsyncProcessor(tikaConfigPath, 
pipesIterator)) {
-
-            for (FetchEmitTuple t : pipesIterator) {
-                processor.offer(t, 2000);
-            }
-            processor.finished();
-            while (true) {
-                if (processor.checkActive()) {
-                    Thread.sleep(500);
-                } else {
-                    break;
-                }
-            }
-            long elapsed = System.currentTimeMillis() - start;
-            LOG.info("Successfully finished processing {} files in {} ms",
-                    processor.getTotalProcessed(), elapsed);
-        }
-    }
 }
diff --git a/tika-pipes/pom.xml b/tika-pipes/pom.xml
index b6876578a..dbfd4f6d7 100644
--- a/tika-pipes/pom.xml
+++ b/tika-pipes/pom.xml
@@ -35,6 +35,7 @@
     <module>tika-emitters</module>
     <module>tika-pipes-iterators</module>
     <module>tika-pipes-reporters</module>
+    <module>tika-async-cli</module>
   </modules>
 
   <dependencyManagement>
diff --git a/tika-pipes/tika-async-cli/pom.xml 
b/tika-pipes/tika-async-cli/pom.xml
new file mode 100644
index 000000000..f5d244075
--- /dev/null
+++ b/tika-pipes/tika-async-cli/pom.xml
@@ -0,0 +1,136 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one
+  or more contributor license agreements.  See the NOTICE file
+  distributed with this work for additional information
+  regarding copyright ownership.  The ASF licenses this file
+  to you under the Apache License, Version 2.0 (the
+  "License"); you may not use this file except in compliance
+  with the License.  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing,
+  software distributed under the License is distributed on an
+  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+  KIND, either express or implied.  See the License for the
+  specific language governing permissions and limitations
+  under the License.
+-->
+<project xmlns="http://maven.apache.org/POM/4.0.0"; 
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"; 
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 
https://maven.apache.org/xsd/maven-4.0.0.xsd";>
+  <parent>
+    <groupId>org.apache.tika</groupId>
+    <artifactId>tika-pipes</artifactId>
+    <version>2.5.1-SNAPSHOT</version>
+    <relativePath>../pom.xml</relativePath>
+  </parent>
+  <modelVersion>4.0.0</modelVersion>
+
+  <artifactId>tika-async-cli</artifactId>
+
+  <name>Apache Tika Async CLI</name>
+  <url>https://tika.apache.org/</url>
+
+  <dependencies>
+    <dependency>
+      <groupId>${project.groupId}</groupId>
+      <artifactId>tika-core</artifactId>
+      <version>${project.version}</version>
+    </dependency>
+    <!-- logging -->
+    <dependency>
+      <groupId>org.apache.logging.log4j</groupId>
+      <artifactId>log4j-core</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.logging.log4j</groupId>
+      <artifactId>log4j-slf4j2-impl</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>${project.groupId}</groupId>
+      <artifactId>tika-emitter-fs</artifactId>
+      <version>${project.version}</version>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>${project.groupId}</groupId>
+      <artifactId>tika-serialization</artifactId>
+      <version>${project.version}</version>
+      <scope>test</scope>
+    </dependency>
+  </dependencies>
+  <build>
+    <plugins>
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-jar-plugin</artifactId>
+        <configuration>
+          <archive>
+            <manifestEntries>
+              
<Automatic-Module-Name>org.apache.tika.pipes.reporters.fs.status</Automatic-Module-Name>
+            </manifestEntries>
+          </archive>
+        </configuration>
+        <executions>
+          <execution>
+            <goals>
+              <goal>test-jar</goal>
+            </goals>
+          </execution>
+        </executions>
+      </plugin>
+      <plugin>
+        <artifactId>maven-shade-plugin</artifactId>
+        <version>${maven.shade.version}</version>
+        <executions>
+          <execution>
+            <phase>package</phase>
+            <goals>
+              <goal>shade</goal>
+            </goals>
+            <configuration>
+              <createDependencyReducedPom>
+                false
+              </createDependencyReducedPom>
+              <!-- <filters> -->
+              <filters>
+                <filter>
+                  <artifact>*:*</artifact>
+                  <excludes>
+                    <exclude>META-INF/*</exclude>
+                    <exclude>LICENSE.txt</exclude>
+                    <exclude>NOTICE.txt</exclude>
+                  </excludes>
+                </filter>
+              </filters>
+              <transformers>
+                <transformer 
implementation="org.apache.maven.plugins.shade.resource.ManifestResourceTransformer">
+                  <mainClass>org.apache.tika.async.cli.TikaAsyncCLI</mainClass>
+                  <manifestEntries>
+                    <Multi-Release>true</Multi-Release>
+                  </manifestEntries>
+                </transformer>
+                <transformer 
implementation="org.apache.maven.plugins.shade.resource.IncludeResourceTransformer">
+                  <resource>META-INF/LICENSE</resource>
+                  <file>target/classes/META-INF/LICENSE</file>
+                </transformer>
+                <transformer 
implementation="org.apache.maven.plugins.shade.resource.IncludeResourceTransformer">
+                  <resource>META-INF/NOTICE</resource>
+                  <file>target/classes/META-INF/NOTICE</file>
+                </transformer>
+                <transformer 
implementation="org.apache.maven.plugins.shade.resource.IncludeResourceTransformer">
+                  <resource>META-INF/DEPENDENCIES</resource>
+                  <file>target/classes/META-INF/DEPENDENCIES</file>
+                </transformer>
+              </transformers>
+            </configuration>
+          </execution>
+        </executions>
+      </plugin>
+    </plugins>
+  </build>
+
+  <scm>
+    <tag>2.2.1-rc2</tag>
+  </scm>
+</project>
diff --git 
a/tika-pipes/tika-async-cli/src/main/java/org/apache/tika/async/cli/TikaAsyncCLI.java
 
b/tika-pipes/tika-async-cli/src/main/java/org/apache/tika/async/cli/TikaAsyncCLI.java
new file mode 100644
index 000000000..4490e33fd
--- /dev/null
+++ 
b/tika-pipes/tika-async-cli/src/main/java/org/apache/tika/async/cli/TikaAsyncCLI.java
@@ -0,0 +1,60 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.async.cli;
+
+import java.nio.file.Path;
+import java.nio.file.Paths;
+import java.util.concurrent.TimeoutException;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import org.apache.tika.pipes.FetchEmitTuple;
+import org.apache.tika.pipes.async.AsyncProcessor;
+import org.apache.tika.pipes.pipesiterator.PipesIterator;
+
+public class TikaAsyncCLI {
+
+    private static final long TIMEOUT_MS = 600_000;
+    private static final Logger LOG = 
LoggerFactory.getLogger(TikaAsyncCLI.class);
+
+    public static void main(String[] args) throws Exception {
+        Path tikaConfigPath = Paths.get(args[0]);
+        PipesIterator pipesIterator = PipesIterator.build(tikaConfigPath);
+        long start = System.currentTimeMillis();
+        try (AsyncProcessor processor = new AsyncProcessor(tikaConfigPath, 
pipesIterator)) {
+
+            for (FetchEmitTuple t : pipesIterator) {
+                boolean offered = processor.offer(t, TIMEOUT_MS);
+                if (! offered) {
+                    throw new TimeoutException("timed out waiting to add a 
fetch emit tuple");
+                }
+            }
+            processor.finished();
+            while (true) {
+                if (processor.checkActive()) {
+                    Thread.sleep(500);
+                } else {
+                    break;
+                }
+            }
+            long elapsed = System.currentTimeMillis() - start;
+            LOG.info("Successfully finished processing {} files in {} ms",
+                    processor.getTotalProcessed(), elapsed);
+        }
+    }
+}

Reply via email to