This is an automated email from the ASF dual-hosted git repository.
tallison pushed a commit to branch TIKA-3226
in repository https://gitbox.apache.org/repos/asf/tika.git
The following commit(s) were added to refs/heads/TIKA-3226 by this push:
new 43b6577 TIKA-3226 -- WIP do not merge -- add client
43b6577 is described below
commit 43b6577e3a1e5145fff8b1cafbacfd2cbaf45cb3
Author: tballison <[email protected]>
AuthorDate: Fri Jan 22 19:39:38 2021 -0500
TIKA-3226 -- WIP do not merge -- add client
---
.../main/java/org/apache/tika/config/Param.java | 8 +-
.../java/org/apache/tika/config/TikaConfig.java | 112 ++++++++-
.../org/apache/tika/emitter/DefaultEmitter.java | 2 +-
.../apache/tika/fetcher/DefaultFetchIterator.java | 57 +++++
.../org/apache/tika/fetcher/FetchIterator.java | 141 +++++++++++
.../org/apache/tika/fetcher/FetchMetadataPair.java | 46 ++++
.../tika/fetcher/FileSystemFetchIterator.java | 143 +++++++++++
.../tika/fetcher/FileSystemFetchIteratorTest.java | 75 ++++++
tika-server/pom.xml | 14 --
tika-server/tika-server-classic/pom.xml | 7 +-
.../apache/tika/server/classic/FetcherTest.java | 13 +-
tika-server/tika-server-client/pom.xml | 32 +++
.../org/apache/tika/server/client/TikaClient.java | 100 ++++++++
.../apache/tika/server/client/TikaClientCLI.java | 186 ++++++++++++++
.../server/client/TikaClientConfigException.java | 13 +
.../tika/server/client/TikaEmitterResult.java | 61 +++++
.../apache/tika/server/client/TikaHttpClient.java | 170 +++++++++++++
.../src/main/resources/log4j.properties | 24 ++
.../org/apache/tika/server/client/TestBasic.java | 16 ++
.../src/test/resources/log4j.properties | 24 ++
.../resources/tika-config-simple-solr-emitter.xml | 78 ++++++
.../org/apache/tika/server/core/TikaServerCli.java | 5 +-
.../tika/server/core/resource/EmitterResource.java | 80 +++---
.../tika/server/core/resource/TikaResource.java | 4 +-
.../tika/server/core/IntegrationTestBase.java | 88 +++++++
.../apache/tika/server/core/TikaEmitterTest.java | 87 +++++--
.../core/TikaServerEmitterIntegrationTest.java | 268 +++++++++++++++++++++
.../server/core/TikaServerIntegrationTest.java | 75 +-----
28 files changed, 1775 insertions(+), 154 deletions(-)
diff --git a/tika-core/src/main/java/org/apache/tika/config/Param.java
b/tika-core/src/main/java/org/apache/tika/config/Param.java
index 652fb6c..3462252 100644
--- a/tika-core/src/main/java/org/apache/tika/config/Param.java
+++ b/tika-core/src/main/java/org/apache/tika/config/Param.java
@@ -16,6 +16,7 @@
*/
package org.apache.tika.config;
+import org.apache.tika.exception.TikaConfigException;
import org.apache.tika.exception.TikaException;
import org.apache.tika.parser.multiple.AbstractMultipleParser;
import org.apache.tika.utils.XMLReaderUtils;
@@ -205,18 +206,21 @@ public class Param<T> implements Serializable {
return load(document.getFirstChild());
}
- public static <T> Param<T> load(Node node) {
+ public static <T> Param<T> load(Node node) throws TikaConfigException{
Node nameAttr = node.getAttributes().getNamedItem("name");
Node typeAttr = node.getAttributes().getNamedItem("type");
Node valueAttr = node.getAttributes().getNamedItem("value");
Node value = node.getFirstChild();
if (value instanceof NodeList && valueAttr != null) {
- throw new IllegalArgumentException("can't specify a value attr
_and_ a node list");
+ throw new TikaConfigException("can't specify a value attr _and_ a
node list");
}
if (valueAttr != null && (value == null || value.getTextContent() ==
null)) {
value = valueAttr;
}
+ if (typeAttr == null) {
+ throw new TikaConfigException("Must specify a \"type\" in: " +
node.getLocalName());
+ }
Param<T> ret = new Param<T>();
ret.name = nameAttr.getTextContent();
diff --git a/tika-core/src/main/java/org/apache/tika/config/TikaConfig.java
b/tika-core/src/main/java/org/apache/tika/config/TikaConfig.java
index be82195..df4c43c 100644
--- a/tika-core/src/main/java/org/apache/tika/config/TikaConfig.java
+++ b/tika-core/src/main/java/org/apache/tika/config/TikaConfig.java
@@ -50,7 +50,9 @@ import org.apache.tika.emitter.DefaultEmitter;
import org.apache.tika.emitter.Emitter;
import org.apache.tika.exception.TikaConfigException;
import org.apache.tika.exception.TikaException;
+import org.apache.tika.fetcher.DefaultFetchIterator;
import org.apache.tika.fetcher.DefaultFetcher;
+import org.apache.tika.fetcher.FetchIterator;
import org.apache.tika.fetcher.Fetcher;
import org.apache.tika.language.translate.DefaultTranslator;
import org.apache.tika.language.translate.Translator;
@@ -126,6 +128,10 @@ public class TikaConfig {
return new DefaultEmitter(Collections.EMPTY_LIST);
}
+ private static FetchIterator getDefaultFetchIterator(ServiceLoader loader)
{
+ return new DefaultFetchIterator(Collections.EMPTY_LIST);
+ }
+
//use this to look for unneeded instantiations of TikaConfig
protected static AtomicInteger TIMES_INSTANTIATED = new AtomicInteger();
@@ -140,6 +146,7 @@ public class TikaConfig {
private final MetadataFilter metadataFilter;
private final Fetcher fetcher;
private final Emitter emitter;
+ private final FetchIterator fetchIterator;
public TikaConfig(String file)
throws TikaException, IOException, SAXException {
@@ -208,6 +215,7 @@ public class TikaConfig {
MetadataFilterXmlLoader metadataFilterXmlLoader = new
MetadataFilterXmlLoader();
FetcherXmlLoader fetcherXmlLoader = new FetcherXmlLoader();
EmitterXmlLoader emitterXmlLoader = new EmitterXmlLoader();
+ FetchIteratorXmlLoader fetchIteratorXmlLoader = new
FetchIteratorXmlLoader();
updateXMLReaderUtils(element);
this.mimeTypes = typesFromDomElement(element);
this.detector = detectorLoader.loadOverall(element, mimeTypes, loader);
@@ -220,6 +228,7 @@ public class TikaConfig {
this.metadataFilter = metadataFilterXmlLoader.loadOverall(element,
mimeTypes, loader);
this.fetcher = fetcherXmlLoader.loadOverall(element, mimeTypes,
loader);
this.emitter = emitterXmlLoader.loadOverall(element, mimeTypes,
loader);
+ this.fetchIterator = fetchIteratorXmlLoader.loadOverall(element,
mimeTypes, loader);
this.serviceLoader = loader;
TIMES_INSTANTIATED.incrementAndGet();
}
@@ -248,6 +257,7 @@ public class TikaConfig {
this.metadataFilter = getDefaultMetadataFilter(serviceLoader);
this.fetcher = getDefaultFetcher(serviceLoader);
this.emitter = getDefaultEmitter(serviceLoader);
+ this.fetchIterator = getDefaultFetchIterator(serviceLoader);
TIMES_INSTANTIATED.incrementAndGet();
}
@@ -286,6 +296,7 @@ public class TikaConfig {
this.metadataFilter = getDefaultMetadataFilter(serviceLoader);
this.fetcher = getDefaultFetcher(serviceLoader);
this.emitter = getDefaultEmitter(serviceLoader);
+ this.fetchIterator = getDefaultFetchIterator(serviceLoader);
} else {
ServiceLoader tmpServiceLoader = new ServiceLoader();
try (InputStream stream = getConfigInputStream(config,
tmpServiceLoader)) {
@@ -299,6 +310,7 @@ public class TikaConfig {
MetadataFilterXmlLoader metadataFilterXmlLoader = new
MetadataFilterXmlLoader();
FetcherXmlLoader fetcherXmlLoader = new FetcherXmlLoader();
EmitterXmlLoader emitterXmlLoader = new EmitterXmlLoader();
+ FetchIteratorXmlLoader fetchIteratorXmlLoader = new
FetchIteratorXmlLoader();
this.mimeTypes = typesFromDomElement(element);
this.encodingDetector =
encodingDetectorLoader.loadOverall(element, mimeTypes, serviceLoader);
@@ -312,6 +324,7 @@ public class TikaConfig {
this.metadataFilter =
metadataFilterXmlLoader.loadOverall(element, mimeTypes, serviceLoader);
this.fetcher = fetcherXmlLoader.loadOverall(element,
mimeTypes, serviceLoader);
this.emitter = emitterXmlLoader.loadOverall(element,
mimeTypes, serviceLoader);
+ this.fetchIterator =
fetchIteratorXmlLoader.loadOverall(element, mimeTypes, serviceLoader);
} catch (SAXException e) {
throw new TikaException(
"Specified Tika configuration has syntax errors: "
@@ -448,6 +461,9 @@ public class TikaConfig {
return emitter;
}
+ public FetchIterator getFetchIterator() {
+ return fetchIterator;
+ }
/**
* Provides a default configuration (TikaConfig). Currently creates a
* new instance each time it's called; we may be able to have it
@@ -649,7 +665,9 @@ public class TikaConfig {
T loadOne(Element element, MimeTypes mimeTypes, ServiceLoader loader)
throws TikaException, IOException {
String name = element.getAttribute("class");
-
+ if (name == null) {
+ throw new TikaConfigException("class attribute must not be
null: "+element);
+ }
String initProbHandler =
element.getAttribute("initializableProblemHandler");
InitializableProblemHandler initializableProblemHandler;
if (initProbHandler == null || initProbHandler.length() == 0) {
@@ -767,7 +785,7 @@ public class TikaConfig {
* @param el xml node which has {@link #PARAMS_TAG_NAME} child
* @return Map of key values read from xml
*/
- Map<String, Param> getParams(Element el){
+ Map<String, Param> getParams(Element el) throws TikaException {
Map<String, Param> params = new HashMap<>();
for (Node child = el.getFirstChild(); child != null;
child = child.getNextSibling()){
@@ -1418,7 +1436,7 @@ public class TikaConfig {
@Override
boolean isComposite(Class<? extends Emitter> loadedClass) {
- return DefaultFetcher.class.isAssignableFrom(loadedClass);
+ return DefaultEmitter.class.isAssignableFrom(loadedClass);
}
@Override
@@ -1481,4 +1499,92 @@ public class TikaConfig {
return created; // No decoration of emitters yet
}
}
+
+ private static class FetchIteratorXmlLoader extends
+ XmlLoader<FetchIterator, FetchIterator> {
+
+ boolean supportsComposite() {
+ return true;
+ }
+
+ String getParentTagName() {
+ return "fetchIterators";
+ }
+
+ String getLoaderTagName() {
+ return "fetchIterator";
+ }
+
+ @Override
+ Class<? extends FetchIterator> getLoaderClass() {
+ return FetchIterator.class;
+ }
+
+
+ @Override
+ boolean isComposite(FetchIterator loaded) {
+ return false;
+ }
+
+ @Override
+ boolean isComposite(Class<? extends FetchIterator> loadedClass) {
+ return false;
+ }
+
+ @Override
+ FetchIterator preLoadOne(Class<? extends FetchIterator> loadedClass,
+ String classname, MimeTypes mimeTypes) throws
TikaException {
+ // Check for classes which can't be set in config
+ // Continue with normal loading
+ return null;
+ }
+
+ @Override
+ FetchIterator createDefault(MimeTypes mimeTypes, ServiceLoader loader)
{
+ return new DefaultFetchIterator(Collections.EMPTY_LIST);
+ }
+
+ //this ignores the service loader
+ @Override
+ FetchIterator createComposite(List<FetchIterator> loaded, MimeTypes
mimeTypes, ServiceLoader loader) {
+ return new DefaultFetchIterator(loaded);
+ }
+
+ @Override
+ FetchIterator createComposite(Class<? extends FetchIterator>
fetchIteratorClass,
+ List<FetchIterator> fetchIteratorChildren,
+ Set<Class<? extends FetchIterator>>
excludeFetchIterators,
+ Map<String, Param> params, MimeTypes
mimeTypes, ServiceLoader loader)
+ throws InvocationTargetException, IllegalAccessException,
+ InstantiationException {
+ FetchIterator fetchIterator = null;
+ Constructor<? extends FetchIterator> c;
+
+ // Try the possible default and composite detector constructors
+ if (fetchIterator == null) {
+ try {
+ c = fetchIteratorClass.getConstructor(ServiceLoader.class,
Collection.class);
+ fetchIterator = c.newInstance(loader,
excludeFetchIterators);
+ } catch (NoSuchMethodException me) {
+ me.printStackTrace();
+ }
+ }
+ if (fetchIterator == null) {
+ try {
+ c = fetchIteratorClass.getConstructor(List.class);
+ fetchIterator = c.newInstance(fetchIteratorChildren);
+ } catch (NoSuchMethodException me) {
+ me.printStackTrace();
+ }
+ }
+
+ return fetchIterator;
+ }
+
+ @Override
+ FetchIterator decorate(FetchIterator created, Element element) {
+ return created; // No decoration of FetchIterators yet
+ }
+ }
+
}
diff --git
a/tika-core/src/main/java/org/apache/tika/emitter/DefaultEmitter.java
b/tika-core/src/main/java/org/apache/tika/emitter/DefaultEmitter.java
index f95977b..80fe314 100644
--- a/tika-core/src/main/java/org/apache/tika/emitter/DefaultEmitter.java
+++ b/tika-core/src/main/java/org/apache/tika/emitter/DefaultEmitter.java
@@ -74,7 +74,7 @@ public class DefaultEmitter implements Emitter {
Emitter emitter = emitterMap.get(emitterName);
if (emitter == null) {
- throw new IllegalArgumentException("Can't find fetcher for prefix:
"+
+ throw new IllegalArgumentException("Can't find emitter for prefix:
"+
emitterName);
}
emitter.emit(emitterName, metadata);
diff --git
a/tika-core/src/main/java/org/apache/tika/fetcher/DefaultFetchIterator.java
b/tika-core/src/main/java/org/apache/tika/fetcher/DefaultFetchIterator.java
new file mode 100644
index 0000000..a579314
--- /dev/null
+++ b/tika-core/src/main/java/org/apache/tika/fetcher/DefaultFetchIterator.java
@@ -0,0 +1,57 @@
+package org.apache.tika.fetcher;
+
+import org.apache.tika.config.Field;
+import org.apache.tika.config.Initializable;
+import org.apache.tika.config.InitializableProblemHandler;
+import org.apache.tika.config.Param;
+import org.apache.tika.exception.TikaConfigException;
+
+import java.io.IOException;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.concurrent.TimeoutException;
+
+public class DefaultFetchIterator extends FetchIterator implements
Initializable {
+
+ private final Map<String, FetchIterator> fetchIterators = new HashMap<>();
+ private String iteratorName = "";
+ public DefaultFetchIterator(List<FetchIterator> fetchIterators) {
+ super("default");
+ for (FetchIterator fetchIterator : fetchIterators) {
+ if (this.fetchIterators.containsKey(fetchIterator.getName())) {
+ throw new RuntimeException(new TikaConfigException("Multiple
fetchIterators cannot have the same name: "
+ + fetchIterator.getName()));
+ }
+ this.fetchIterators.put(fetchIterator.getName(), fetchIterator);
+ }
+ }
+
+ @Override
+ protected void enqueue() throws IOException, TimeoutException {
+ if (fetchIterators.size() == 0) {
+ return;
+ } else if (fetchIterators.size() == 1) {
+ for (FetchIterator fetchIterator : fetchIterators.values()) {
+ fetchIterator.enqueue();
+ }
+ }
+ }
+
+ @Field
+ public void setIteratorName(String iteratorName) {
+ this.iteratorName = iteratorName;
+ }
+
+ @Override
+ public void initialize(Map<String, Param> params) throws
TikaConfigException {
+ //no-op
+ }
+
+ @Override
+ public void checkInitialization(InitializableProblemHandler
problemHandler) throws TikaConfigException {
+ if (this.fetchIterators.size() > 1 && this.iteratorName == null) {
+ throw new TikaConfigException("Must set desired iteratorName, if
multiple iterators are defined");
+ }
+ }
+}
diff --git a/tika-core/src/main/java/org/apache/tika/fetcher/FetchIterator.java
b/tika-core/src/main/java/org/apache/tika/fetcher/FetchIterator.java
new file mode 100644
index 0000000..84e74d3
--- /dev/null
+++ b/tika-core/src/main/java/org/apache/tika/fetcher/FetchIterator.java
@@ -0,0 +1,141 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.fetcher;
+
+import org.apache.tika.config.Field;
+
+import java.io.IOException;
+import java.util.Iterator;
+import java.util.concurrent.ArrayBlockingQueue;
+import java.util.concurrent.Callable;
+import java.util.concurrent.TimeUnit;
+import java.util.concurrent.TimeoutException;
+
+/**
+ * Abstract class that handles the testing for timeouts/thread safety
+ * issues. Concrete classes implement {@link #enqueue()}.
+ *
+ * This must be "called" and managed with an ExecutorService, etc.
+ * for the iterable to work.
+ */
+public abstract class FetchIterator implements Callable<Integer>,
+ Iterable<FetchMetadataPair> {
+
+ public static final long DEFAULT_MAX_WAIT_MS = 300_000;
+
+ static final FetchMetadataPair POISON =
+ new FetchMetadataPair(null, null);
+
+ private final int queueSize = 1000;
+ private long maxWaitMs = DEFAULT_MAX_WAIT_MS;
+ private final ArrayBlockingQueue<FetchMetadataPair> queue = new
ArrayBlockingQueue<>(queueSize);
+ private String name;
+
+ public FetchIterator(String name) {
+ this.name = name;
+ }
+
+ @Field
+ public void setName(String name) {
+ this.name = name;
+ }
+
+ public String getName() {
+ return name;
+ }
+
+ @Override
+ public Integer call() throws Exception {
+ enqueue();
+ System.out.println("finished");
+ return 1;
+ }
+
+ protected abstract void enqueue() throws IOException, TimeoutException;
+
+ void tryToAdd(FetchMetadataPair p) throws InterruptedException,
TimeoutException {
+ System.out.println("trying to add: "+p + " "+ queue.size());
+ boolean offered = queue.offer(p, maxWaitMs, TimeUnit.MILLISECONDS);
+ System.out.println("added: "+p + " "+ queue.size());
+ if (! offered) {
+ throw new TimeoutException("timed out while offering");
+ }
+ }
+
+ @Override
+ public Iterator<FetchMetadataPair> iterator() {
+ return new InternalIterator();
+ }
+
+ private class InternalIterator implements Iterator<FetchMetadataPair> {
+ //Object[] is recommended by FindBugs as a lock object
+ private Object[] lock = new Object[0];
+ private FetchMetadataPair next = null;
+ volatile boolean initialized = false;
+ InternalIterator() {
+
+ }
+
+ @Override
+ public boolean hasNext() {
+ System.out.println("hasNExt");
+ if (!initialized) {
+ next = getNext();
+ initialized = true;
+ }
+
+ return next != POISON;
+ }
+
+ /**
+ *
+ * @return next FetcherStringMetadataPair; if {@link #hasNext()}
returns
+ * false, this will return null.
+ */
+ @Override
+ public FetchMetadataPair next() {
+
+ System.out.println("in next");
+
+ if (next == POISON) {
+ return null;
+ }
+ FetchMetadataPair ret = next;
+ next = getNext();
+ return ret;
+ }
+
+ private FetchMetadataPair getNext() {
+ FetchMetadataPair p = null;
+ System.out.println("in get next: " + queue.size());
+ try {
+ //System.out.println("peek: " + queue.peek());
+ p = queue.poll(maxWaitMs, TimeUnit.MILLISECONDS);
+ //System.out.println("peek2: " + queue.peek() + " :: "+p);
+ } catch (InterruptedException e) {
+ throw new RuntimeException(e);
+ } catch (Exception e) {
+ e.printStackTrace();
+ }
+
+ if (p == null) {
+ throw new RuntimeException(new TimeoutException(""));
+ }
+ return p;
+ }
+ }
+}
diff --git
a/tika-core/src/main/java/org/apache/tika/fetcher/FetchMetadataPair.java
b/tika-core/src/main/java/org/apache/tika/fetcher/FetchMetadataPair.java
new file mode 100644
index 0000000..9711c1f
--- /dev/null
+++ b/tika-core/src/main/java/org/apache/tika/fetcher/FetchMetadataPair.java
@@ -0,0 +1,46 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.fetcher;
+
+import org.apache.tika.metadata.Metadata;
+
+public class FetchMetadataPair {
+
+ private final String fetcherString;
+ private final Metadata metadata;
+
+ public FetchMetadataPair(String fetcherString, Metadata metadata) {
+ this.fetcherString = fetcherString;
+ this.metadata = metadata;
+ }
+
+ public String getFetcherString() {
+ return fetcherString;
+ }
+
+ public Metadata getMetadata() {
+ return metadata;
+ }
+
+ @Override
+ public String toString() {
+ return "FetcherStringMetadataPair{" +
+ "fetcherString='" + fetcherString + '\'' +
+ ", metadata=" + metadata +
+ '}';
+ }
+}
diff --git
a/tika-core/src/main/java/org/apache/tika/fetcher/FileSystemFetchIterator.java
b/tika-core/src/main/java/org/apache/tika/fetcher/FileSystemFetchIterator.java
new file mode 100644
index 0000000..5edf83d
--- /dev/null
+++
b/tika-core/src/main/java/org/apache/tika/fetcher/FileSystemFetchIterator.java
@@ -0,0 +1,143 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.fetcher;
+
+import org.apache.tika.config.Field;
+import org.apache.tika.config.Initializable;
+import org.apache.tika.config.InitializableProblemHandler;
+import org.apache.tika.config.Param;
+import org.apache.tika.exception.TikaConfigException;
+import org.apache.tika.metadata.Metadata;
+
+import java.io.IOException;
+import java.nio.file.FileVisitResult;
+import java.nio.file.FileVisitor;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.Paths;
+import java.nio.file.attribute.BasicFileAttributes;
+import java.rmi.ServerRuntimeException;
+import java.util.Map;
+import java.util.concurrent.TimeoutException;
+
+public class FileSystemFetchIterator extends FetchIterator implements
Initializable {
+
+ private static final String NAME = "fs";
+ private Path basePath;
+ private String fetchPrefix;
+
+ public FileSystemFetchIterator() {
+ super(NAME);
+ }
+
+ public FileSystemFetchIterator(String fetchPrefix, Path basePath) {
+ super(NAME);
+ this.fetchPrefix = fetchPrefix;
+ this.basePath = basePath;
+ }
+
+ /**
+ * fetchPrefix not including the colon (:), e.g. "fs"
+ * @param fetchPrefix
+ */
+ @Field
+ public void setFetchPrefix(String fetchPrefix) {
+ this.fetchPrefix = fetchPrefix;
+ }
+
+ @Field
+ public void setBasePath(String basePath) {
+ this.basePath = Paths.get(basePath);
+ }
+
+ @Override
+ protected void enqueue() throws IOException, TimeoutException {
+
+ try {
+ Files.walkFileTree(basePath, new FSFileVisitor());
+ } catch (IOException e) {
+ Throwable cause = e.getCause();
+ if (cause != null && cause instanceof TimeoutException) {
+ throw (TimeoutException) cause;
+ }
+ throw e;
+ }
+ try {
+ tryToAdd(POISON);
+ } catch (InterruptedException e) {
+ throw new RuntimeException(e);
+ }
+ }
+
+ @Override
+ public void initialize(Map<String, Param> params) throws
TikaConfigException {
+ //no-op
+ }
+
+ @Override
+ public void checkInitialization(InitializableProblemHandler
problemHandler) throws TikaConfigException {
+ if (basePath == null) {
+ throw new TikaConfigException("Must specify a \"basePath\"");
+ }
+ if (! Files.isDirectory(basePath)) {
+ throw new TikaConfigException("\"root\" directory does not exist:
" +
+ basePath.toAbsolutePath());
+ }
+ if (fetchPrefix == null || fetchPrefix.trim().length() == 0) {
+ throw new TikaConfigException("\"fetchPrefix\" must be specified
and must be not blank");
+ }
+ if (fetchPrefix.contains(":")) {
+ throw new TikaConfigException("\"fetchPrefix\" must not contain a
colon (:)");
+ }
+ }
+
+
+ private class FSFileVisitor implements FileVisitor<Path> {
+
+ @Override
+ public FileVisitResult preVisitDirectory(Path dir, BasicFileAttributes
attrs) throws IOException {
+ return FileVisitResult.CONTINUE;
+ }
+
+ @Override
+ public FileVisitResult visitFile(Path file, BasicFileAttributes attrs)
throws IOException {
+ String relPath = basePath.relativize(file).toString();
+ String fetcherString = fetchPrefix + ":" + relPath;
+
+ try {
+ tryToAdd(new FetchMetadataPair(fetcherString, new Metadata()));
+ } catch (TimeoutException e) {
+ throw new IOException(e);
+ } catch (InterruptedException e) {
+ return FileVisitResult.TERMINATE;
+ }
+ return FileVisitResult.CONTINUE;
+ }
+
+ @Override
+ public FileVisitResult visitFileFailed(Path file, IOException exc)
throws IOException {
+ return FileVisitResult.CONTINUE;
+ }
+
+ @Override
+ public FileVisitResult postVisitDirectory(Path dir, IOException exc)
throws IOException {
+ return FileVisitResult.CONTINUE;
+ }
+ }
+
+
+}
diff --git
a/tika-core/src/test/java/org/apache/tika/fetcher/FileSystemFetchIteratorTest.java
b/tika-core/src/test/java/org/apache/tika/fetcher/FileSystemFetchIteratorTest.java
new file mode 100644
index 0000000..5349fcb
--- /dev/null
+++
b/tika-core/src/test/java/org/apache/tika/fetcher/FileSystemFetchIteratorTest.java
@@ -0,0 +1,75 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.fetcher;
+
+import org.junit.Test;
+
+import java.io.IOException;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.Paths;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Set;
+import java.util.concurrent.ExecutorCompletionService;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
+import java.util.concurrent.Future;
+import java.util.stream.Collectors;
+import java.util.stream.Stream;
+
+import static org.junit.Assert.assertEquals;
+
+
+public class FileSystemFetchIteratorTest {
+
+ @Test
+ public void testBasic() throws Exception {
+ Path root = Paths.get(".");
+ String fetchPrefix = "fs";
+ ExecutorService es = Executors.newFixedThreadPool(1);
+ ExecutorCompletionService cs = new ExecutorCompletionService(es);
+ FetchIterator it = new FileSystemFetchIterator(fetchPrefix, root);
+
+ cs.submit(it);
+ Set<String> iteratorSet = new HashSet<>();
+ int i = 0;
+ for (FetchMetadataPair p : it) {
+ iteratorSet.add(p.getFetcherString());
+ }
+ Future f = cs.take();
+ f.get();
+ List<Path> files = listFiles(root);
+ Set<String> truthSet = new HashSet<>();
+ for (Path p : files) {
+ String fetchString = fetchPrefix+":"+root.relativize(p);
+ truthSet.add(fetchString);
+ }
+ assertEquals(truthSet, iteratorSet);
+ }
+
+ public static List<Path> listFiles(Path path) throws IOException {
+
+ List<Path> result;
+ try (Stream<Path> walk = Files.walk(path)) {
+ result = walk.filter(Files::isRegularFile)
+ .collect(Collectors.toList());
+ }
+ return result;
+
+ }
+}
diff --git a/tika-server/pom.xml b/tika-server/pom.xml
index 6262b2f..f35927b 100644
--- a/tika-server/pom.xml
+++ b/tika-server/pom.xml
@@ -34,18 +34,4 @@
<name>Apache Tika server</name>
<url>http://tika.apache.org/</url>
-
-
- <organization>
- <name>The Apache Software Foundation</name>
- <url>http://www.apache.org</url>
- </organization>
- <issueManagement>
- <system>JIRA</system>
- <url>https://issues.apache.org/jira/browse/TIKA</url>
- </issueManagement>
- <ciManagement>
- <system>Jenkins</system>
- <url>https://builds.apache.org/job/Tika-trunk/</url>
- </ciManagement>
</project>
diff --git a/tika-server/tika-server-classic/pom.xml
b/tika-server/tika-server-classic/pom.xml
index fbd9071..a212671 100644
--- a/tika-server/tika-server-classic/pom.xml
+++ b/tika-server/tika-server-classic/pom.xml
@@ -53,7 +53,11 @@
<artifactId>tika-xmp</artifactId>
<version>${project.version}</version>
</dependency>
-
+ <dependency>
+ <groupId>${project.groupId}</groupId>
+ <artifactId>tika-emitter-fs</artifactId>
+ <version>${project.version}</version>
+ </dependency>
<!-- test jars -->
<dependency>
<groupId>${project.groupId}</groupId>
@@ -100,7 +104,6 @@
<artifactSet>
<excludes>
<exclude>org.apache.tika:tika-parsers-classic-package:jar:</exclude>
-
<exclude>org.apache.tika:tika-server-core:jar:</exclude>
</excludes>
</artifactSet>
<filters>
diff --git
a/tika-server/tika-server-classic/src/test/java/org/apache/tika/server/classic/FetcherTest.java
b/tika-server/tika-server-classic/src/test/java/org/apache/tika/server/classic/FetcherTest.java
index 94effe3..3e5a15f 100644
---
a/tika-server/tika-server-classic/src/test/java/org/apache/tika/server/classic/FetcherTest.java
+++
b/tika-server/tika-server-classic/src/test/java/org/apache/tika/server/classic/FetcherTest.java
@@ -24,7 +24,9 @@ import
org.apache.cxf.jaxrs.lifecycle.SingletonResourceProvider;
import org.apache.tika.TikaTest;
import org.apache.tika.config.TikaConfig;
import org.apache.tika.metadata.Metadata;
+import org.apache.tika.metadata.TikaCoreProperties;
import org.apache.tika.metadata.serialization.JsonMetadataList;
+import org.apache.tika.sax.AbstractRecursiveParserWrapperHandler;
import org.apache.tika.server.core.CXFTestBase;
import org.apache.tika.server.core.FetcherStreamFactory;
import org.apache.tika.server.core.InputStreamFactory;
@@ -42,11 +44,9 @@ import java.util.List;
import static java.nio.charset.StandardCharsets.UTF_8;
import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertNotNull;
-import static org.junit.Assert.assertNull;
-import static org.junit.Assert.assertTrue;
-@Ignore("turn into actual unit tests")
+
+@Ignore("turn into actual unit tests -- this relies on network
connectivity...bad")
public class FetcherTest extends CXFTestBase {
private static final String META_PATH = "/rmeta";
@@ -88,7 +88,10 @@ public class FetcherTest extends CXFTestBase {
Reader reader = new InputStreamReader(new
GzipCompressorInputStream((InputStream) response.getEntity()), UTF_8);
List<Metadata> metadataList = JsonMetadataList.fromJson(reader);
- TikaTest.debug(metadataList);
+ Metadata parent = metadataList.get(0);
+ String txt =
parent.get(AbstractRecursiveParserWrapperHandler.TIKA_CONTENT);
+ assertContains("toolkit detects and extracts metadata", txt);
+ assertEquals("Apache Tika – Apache Tika",
parent.get(TikaCoreProperties.TITLE));
}
}
diff --git a/tika-server/tika-server-client/pom.xml
b/tika-server/tika-server-client/pom.xml
index 00284a5..23e304f 100644
--- a/tika-server/tika-server-client/pom.xml
+++ b/tika-server/tika-server-client/pom.xml
@@ -25,5 +25,37 @@
<artifactId>tika-server-client</artifactId>
+ <dependencies>
+ <dependency>
+ <groupId>${project.groupId}</groupId>
+ <artifactId>tika-core</artifactId>
+ <version>${project.version}</version>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.httpcomponents</groupId>
+ <artifactId>httpclient</artifactId>
+ <version>${httpcomponents.version}</version>
+ </dependency>
+ <dependency>
+ <groupId>com.google.code.gson</groupId>
+ <artifactId>gson</artifactId>
+ <version>${gson.version}</version>
+ </dependency>
+ <dependency>
+ <groupId>org.slf4j</groupId>
+ <artifactId>slf4j-log4j12</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>${project.groupId}</groupId>
+ <artifactId>tika-emitter-fs</artifactId>
+ <version>${project.version}</version>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>junit</groupId>
+ <artifactId>junit</artifactId>
+ <scope>test</scope>
+ </dependency>
+ </dependencies>
</project>
\ No newline at end of file
diff --git
a/tika-server/tika-server-client/src/main/java/org/apache/tika/server/client/TikaClient.java
b/tika-server/tika-server-client/src/main/java/org/apache/tika/server/client/TikaClient.java
new file mode 100644
index 0000000..f535f52
--- /dev/null
+++
b/tika-server/tika-server-client/src/main/java/org/apache/tika/server/client/TikaClient.java
@@ -0,0 +1,100 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.server.client;
+
+
+
+import com.google.gson.Gson;
+import com.google.gson.GsonBuilder;
+import com.google.gson.JsonArray;
+import com.google.gson.JsonObject;
+import com.google.gson.JsonPrimitive;
+import org.apache.tika.config.TikaConfig;
+import org.apache.tika.exception.TikaConfigException;
+import org.apache.tika.exception.TikaException;
+import org.apache.tika.metadata.Metadata;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Random;
+
+public class TikaClient {
+
+ private static final Gson GSON = new GsonBuilder().create();
+
+ private final Random random = new Random();
+ private final TikaConfig tikaConfig;
+ private List<TikaHttpClient> clients;
+
+
+ public static TikaClient get(TikaConfig tikaConfig, List<String>
tikaServers) throws TikaClientConfigException {
+ List clients = new ArrayList<>();
+ for (String url : tikaServers) {
+ clients.add(TikaHttpClient.get(url));
+ }
+ return new TikaClient(tikaConfig, clients);
+ }
+
+ private TikaClient(TikaConfig tikaConfig, List<TikaHttpClient> clients) {
+ this.tikaConfig = tikaConfig;
+ this.clients = clients;
+ }
+
+ /*public List<Metadata> parse(InputStream is, Metadata metadata) throws
IOException, TikaException {
+
+ }*/
+
+ public TikaEmitterResult parse(String fetcherString, Metadata metadata,
String emitter)
+ throws IOException, TikaException {
+ TikaHttpClient client = getHttpClient();
+ String jsonRequest = jsonifyRequest(fetcherString, metadata, emitter);
+ return client.postJson(jsonRequest);
+
+ }
+
+ private String jsonifyRequest(String fetcherString, Metadata metadata,
String emitter) {
+ JsonObject root = new JsonObject();
+ root.add("fetcherString", new JsonPrimitive(fetcherString));
+ root.add("emitter", new JsonPrimitive(emitter));
+ if (metadata.size() > 0) {
+ JsonObject m = new JsonObject();
+ for (String n : metadata.names()) {
+ String[] vals = metadata.getValues(n);
+ if (vals.length == 1) {
+ m.add(n, new JsonPrimitive(vals[0]));
+ } else if (vals.length > 1) {
+ JsonArray arr = new JsonArray();
+ for (int i = 0; i < vals.length; i++) {
+ arr.add(vals[i]);
+ }
+ m.add(n, arr);
+ }
+ }
+ root.add("metadata", m);
+ }
+ return GSON.toJson(root);
+ }
+
+ private TikaHttpClient getHttpClient() {
+ if (clients.size() == 1) {
+ return clients.get(0);
+ }
+ int index = random.nextInt(clients.size());
+ return clients.get(index);
+ }
+}
diff --git
a/tika-server/tika-server-client/src/main/java/org/apache/tika/server/client/TikaClientCLI.java
b/tika-server/tika-server-client/src/main/java/org/apache/tika/server/client/TikaClientCLI.java
new file mode 100644
index 0000000..08489c2
--- /dev/null
+++
b/tika-server/tika-server-client/src/main/java/org/apache/tika/server/client/TikaClientCLI.java
@@ -0,0 +1,186 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.server.client;
+
+import org.apache.tika.config.TikaConfig;
+import org.apache.tika.exception.TikaException;
+import org.apache.tika.fetcher.FetchIterator;
+import org.apache.tika.fetcher.FetchMetadataPair;
+import org.apache.tika.fetcher.FileSystemFetchIterator;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import org.xml.sax.SAXException;
+
+import java.io.IOException;
+import java.nio.file.Path;
+import java.nio.file.Paths;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.List;
+import java.util.concurrent.ArrayBlockingQueue;
+import java.util.concurrent.Callable;
+import java.util.concurrent.ExecutionException;
+import java.util.concurrent.ExecutorCompletionService;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
+import java.util.concurrent.Future;
+import java.util.concurrent.TimeUnit;
+import java.util.concurrent.TimeoutException;
+
+public class TikaClientCLI {
+
+ private static final FetchMetadataPair POISON =
+ new FetchMetadataPair(null, null);
+
+ private static final Logger LOGGER =
LoggerFactory.getLogger(TikaClientCLI.class);
+
+ //make these configurable
+ private int numThreads = 1;
+ private long maxWaitMs = 300000;
+
+ public static void main(String[] args) throws Exception {
+ //TODO -- add an actual commandline
+ Path tikaConfigPath = Paths.get(args[0]);
+ List<String> tikaServerUrls = Arrays.asList(args[1].split(","));
+ String fetcherString = args[2];
+
+ TikaClientCLI cli = new TikaClientCLI();
+ cli.execute(tikaConfigPath, tikaServerUrls, fetcherString);
+ }
+
+ private void execute(Path tikaConfigPath, List<String> tikaServerUrls,
String fetcherString)
+ throws TikaException, IOException, SAXException {
+ TikaConfig config = new TikaConfig(tikaConfigPath);
+
+ ArrayBlockingQueue<FetchMetadataPair> queue = new
ArrayBlockingQueue<>(1000);
+ ExecutorService executorService =
Executors.newFixedThreadPool(numThreads+2);
+ ExecutorCompletionService<Integer> completionService = new
ExecutorCompletionService<>(executorService);
+ //TODO: fix this!
+ final FetchIterator fetchIterator = new FileSystemFetchIterator(
+ "fs", Paths.get("."));
+ completionService.submit(fetchIterator);
+ completionService.submit(new Enqueuer(queue, fetchIterator,
numThreads));
+ if (tikaServerUrls.size() == numThreads) {
+ logDiffSizes(tikaServerUrls.size(), numThreads);
+ for (int i = 0; i < numThreads; i++) {
+ TikaClient client = TikaClient.get(config,
+ Collections.singletonList(tikaServerUrls.get(i)));
+ completionService.submit(new FetchWorker(queue, client,
fetcherString));
+ }
+ } else {
+ TikaClient client = TikaClient.get(config,tikaServerUrls);
+ completionService.submit(new FetchWorker(queue, client,
fetcherString));
+ }
+
+ int finished = 0;
+ while (finished < numThreads+2) {
+ Future<Integer> future = null;
+ try {
+ future = completionService.poll(maxWaitMs,
TimeUnit.MILLISECONDS);
+ } catch (InterruptedException e) {
+ //stop the world
+ LOGGER.error("", e);
+ throw new RuntimeException(e);
+ }
+ if (future != null) {
+ finished++;
+ try {
+ future.get();
+ } catch (InterruptedException|ExecutionException e) {
+ //stop the world
+ LOGGER.error("", e);
+ throw new RuntimeException(e);
+ }
+ }
+ }
+
+ }
+
+ private void logDiffSizes(int servers, int numThreads) {
+ LOGGER.info("tika server count ({}) != numThreads ({}). " +
+ "Each client will randomly select a server from this
list",
+ servers, numThreads);
+ }
+
+ private class Enqueuer implements Callable<Integer> {
+ //simple class that pulls fetchmetadata pairs from the fetchiterator
+ //and enqueues them for the worker threads.
+ private final ArrayBlockingQueue<FetchMetadataPair> queue;
+ private final FetchIterator fetchIterator;
+ private final int numThreads;
+
+ public Enqueuer(ArrayBlockingQueue<FetchMetadataPair> queue,
FetchIterator fetchIterator, int numThreads) {
+ this.queue = queue;
+ this.fetchIterator = fetchIterator;
+ this.numThreads = numThreads;
+ }
+
+ @Override
+ public Integer call() throws Exception {
+ System.out.println("enqueing");
+ for (FetchMetadataPair p : fetchIterator) {
+ System.out.println("offering "+p);
+ boolean offered = queue.offer(p, maxWaitMs,
TimeUnit.MILLISECONDS);
+ if (! offered) {
+ throw new TimeoutException("exceeded max wait");
+ }
+ }
+ for (int i = 0; i < numThreads; i++) {
+ boolean offered = queue.offer(POISON, maxWaitMs,
TimeUnit.MILLISECONDS);
+ if (! offered) {
+ throw new TimeoutException("exceeded max wait");
+ }
+ }
+ return 1;
+ }
+ }
+
+ private class FetchWorker implements Callable<Integer> {
+ private final ArrayBlockingQueue<FetchMetadataPair> queue;
+ private final TikaClient client;
+ private final String emitterString;
+ public FetchWorker(ArrayBlockingQueue<FetchMetadataPair> queue,
TikaClient client,
+ String emitterString) {
+ this.queue = queue;
+ this.client = client;
+ this.emitterString = emitterString;
+ }
+
+ @Override
+ public Integer call() throws Exception {
+
+ while (true) {
+ System.out.println("about to work");
+ FetchMetadataPair p = queue.poll(maxWaitMs,
TimeUnit.MILLISECONDS);
+ if (p == null) {
+ throw new TimeoutException("exceeded maxWaitMs");
+ }
+ if (p == POISON) {
+ return 1;
+ }
+ try {
+ System.out.println("parsing; "+p.getFetcherString());
+ System.out.println(client.parse(p.getFetcherString(),
p.getMetadata(), emitterString));
+ } catch (IOException e) {
+ LOGGER.warn(p.getFetcherString(), e);
+ } catch (TikaException e) {
+ LOGGER.warn(p.getFetcherString(), e);
+ }
+ }
+ }
+ }
+}
diff --git
a/tika-server/tika-server-client/src/main/java/org/apache/tika/server/client/TikaClientConfigException.java
b/tika-server/tika-server-client/src/main/java/org/apache/tika/server/client/TikaClientConfigException.java
new file mode 100644
index 0000000..9d5b51a
--- /dev/null
+++
b/tika-server/tika-server-client/src/main/java/org/apache/tika/server/client/TikaClientConfigException.java
@@ -0,0 +1,13 @@
+package org.apache.tika.server.client;
+
+import org.apache.tika.exception.TikaException;
+
+public class TikaClientConfigException extends TikaException {
+ public TikaClientConfigException(String msg) {
+ super(msg);
+ }
+
+ public TikaClientConfigException(String msg, Throwable cause) {
+ super(msg, cause);
+ }
+}
diff --git
a/tika-server/tika-server-client/src/main/java/org/apache/tika/server/client/TikaEmitterResult.java
b/tika-server/tika-server-client/src/main/java/org/apache/tika/server/client/TikaEmitterResult.java
new file mode 100644
index 0000000..9bed7a6
--- /dev/null
+++
b/tika-server/tika-server-client/src/main/java/org/apache/tika/server/client/TikaEmitterResult.java
@@ -0,0 +1,61 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.server.client;
+
+public class TikaEmitterResult {
+
+
+ enum STATUS {
+ OK,
+ NOT_OK,
+ EXCEEDED_MAX_RETRIES,
+ TIMED_OUT_WAITING_FOR_TIKA
+ }
+
+ private STATUS status;
+ private String msg;//used for exceptions. will be null for status ok
+ private long timeElapsed;
+
+ public TikaEmitterResult(STATUS status, long timeElapsed, String msg) {
+ this.status = status;
+ this.timeElapsed = timeElapsed;
+ this.msg = msg;
+ }
+
+ @Override
+ public String toString() {
+ return "TikaEmitterResult{" +
+ "status=" + status +
+ ", msg='" + msg + '\'' +
+ ", timeElapsed=" + timeElapsed +
+ '}';
+ }
+
+ public STATUS getStatus() {
+ return status;
+ }
+
+ public String getMsg() {
+ return msg;
+ }
+
+ public long getTimeElapsed() {
+ return timeElapsed;
+ }
+
+
+}
diff --git
a/tika-server/tika-server-client/src/main/java/org/apache/tika/server/client/TikaHttpClient.java
b/tika-server/tika-server-client/src/main/java/org/apache/tika/server/client/TikaHttpClient.java
new file mode 100644
index 0000000..621f5db
--- /dev/null
+++
b/tika-server/tika-server-client/src/main/java/org/apache/tika/server/client/TikaHttpClient.java
@@ -0,0 +1,170 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.server.client;
+
+import org.apache.http.HttpHost;
+import org.apache.http.HttpResponse;
+import org.apache.http.client.ClientProtocolException;
+import org.apache.http.client.HttpClient;
+import org.apache.http.client.methods.HttpGet;
+import org.apache.http.client.methods.HttpPost;
+import org.apache.http.entity.ByteArrayEntity;
+import org.apache.http.impl.client.HttpClients;
+import org.apache.http.util.EntityUtils;
+import org.apache.tika.exception.TikaException;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.IOException;
+import java.net.URI;
+import java.net.URISyntaxException;
+import java.nio.charset.StandardCharsets;
+
+/**
+ * Low-level class to handle the http layer.
+ */
+class TikaHttpClient {
+
+ private static final String ENDPOINT = "emit";
+ private static final String TIKA_ENDPOINT = "tika";
+ private static final Logger LOGGER =
LoggerFactory.getLogger(TikaHttpClient.class);
+ private final HttpHost httpHost;
+ private final HttpClient httpClient;
+ private final String endPointUrl;
+ private final String tikaUrl;
+ private int maxRetries = 3;
+ //if can't make contact with Tika server, max wait time in ms
+ private long maxWaitForTikaMs = 120000;
+ //how often to ping /tika (in ms) to see if the server is up and running
+ private long pulseWaitForTikaMs = 1000;
+
+ static TikaHttpClient get(String baseUrl) throws TikaClientConfigException
{
+ String endPointUrl = baseUrl.endsWith("/") ? baseUrl+ENDPOINT :
baseUrl+"/"+ENDPOINT;
+ String tikaUrl = baseUrl.endsWith("/") ? baseUrl+TIKA_ENDPOINT :
baseUrl+"/"+TIKA_ENDPOINT;
+ URI uri;
+ try {
+ uri = new URI(endPointUrl);
+ } catch (URISyntaxException e) {
+ throw new TikaClientConfigException("bad URI", e);
+ }
+ HttpHost httpHost = new HttpHost(uri.getHost(), uri.getPort(),
uri.getScheme());
+ //TODO: need to add other configuration stuff? proxy, username,
password, timeouts...
+ HttpClient client = HttpClients.createDefault();
+ return new TikaHttpClient(endPointUrl, tikaUrl, httpHost, client);
+ }
+
+ /**
+ *
+ * @param endPointUrl full url to the tika-server including endpoint
+ * @param tikaUrl url to /tika endpoint to use to check on server status
+ * @param httpHost
+ * @param httpClient
+ */
+ private TikaHttpClient(String endPointUrl, String tikaUrl, HttpHost
httpHost, HttpClient httpClient) {
+ this.endPointUrl = endPointUrl;
+ this.tikaUrl = tikaUrl;
+ this.httpHost = httpHost;
+ this.httpClient = httpClient;
+ }
+
+
+ public TikaEmitterResult postJson(String jsonRequest) {
+ System.out.println("NED:"+endPointUrl);
+ HttpPost post = new HttpPost(endPointUrl);
+ ByteArrayEntity entity = new
ByteArrayEntity(jsonRequest.getBytes(StandardCharsets.UTF_8));
+ post.setEntity(entity);
+ post.setHeader("Content-Type", "application/json");
+
+ int tries = 0;
+ long start = System.currentTimeMillis();
+ try {
+ while (tries++ < maxRetries) {
+ HttpResponse response = null;
+ try {
+ response = httpClient.execute(httpHost, post);
+ } catch (IOException e) {
+ LOGGER.warn("Exception trying to parse", e);
+ waitForServer();
+ continue;
+ }
+ String msg = "";
+ try {
+ msg = EntityUtils.toString(response.getEntity(),
StandardCharsets.UTF_8);
+ } catch (IOException e) {
+ }
+ long elapsed = System.currentTimeMillis() - start;
+ TikaEmitterResult.STATUS status = TikaEmitterResult.STATUS.OK;
+ if (response.getStatusLine().getStatusCode() != 200) {
+ status = TikaEmitterResult.STATUS.NOT_OK;
+ } else {
+ //pull out stacktrace from parse exception?
+ }
+ return new TikaEmitterResult(status, elapsed, msg);
+ }
+ } catch (TimeoutWaitingForTikaException e) {
+ long elapsed = System.currentTimeMillis() - start;
+ return new TikaEmitterResult(
+ TikaEmitterResult.STATUS.TIMED_OUT_WAITING_FOR_TIKA,
+ elapsed, ""
+ );
+ }
+ long elapsed = System.currentTimeMillis() - start;
+ return new TikaEmitterResult(
+ TikaEmitterResult.STATUS.EXCEEDED_MAX_RETRIES,
+ elapsed, ""
+ );
+ }
+
+
+ private void waitForServer() throws TimeoutWaitingForTikaException {
+ long start = System.currentTimeMillis();
+ long elapsed = System.currentTimeMillis() - start;
+ LOGGER.info("server unreachable; waiting for it to restart");
+ while (elapsed < maxWaitForTikaMs) {
+ try {
+ Thread.sleep(pulseWaitForTikaMs);
+ } catch (InterruptedException e) {
+
+ }
+
+ HttpGet get = new HttpGet(tikaUrl);
+ try {
+ HttpResponse response = httpClient.execute(httpHost, get);
+ if (response.getStatusLine().getStatusCode() == 200) {
+ LOGGER.debug("server back up");
+ return;
+ }
+ } catch (IOException e) {
+ elapsed = System.currentTimeMillis()-start;
+ LOGGER.debug("waiting for server; failed to reach it: {} ms",
+ elapsed);
+ }
+
+ elapsed = System.currentTimeMillis()-start;
+ }
+
+ LOGGER.warn("Timeout waiting for tika server {} in {} ms", tikaUrl,
+ elapsed);
+ throw new TimeoutWaitingForTikaException("");
+ }
+
+ private class TimeoutWaitingForTikaException extends TikaException {
+ public TimeoutWaitingForTikaException(String msg) {
+ super(msg);
+ }
+ }
+}
\ No newline at end of file
diff --git a/tika-server/tika-server-client/src/main/resources/log4j.properties
b/tika-server/tika-server-client/src/main/resources/log4j.properties
new file mode 100644
index 0000000..7d3b372
--- /dev/null
+++ b/tika-server/tika-server-client/src/main/resources/log4j.properties
@@ -0,0 +1,24 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+#info,debug, error,fatal ...
+log4j.rootLogger=info,stderr
+
+#console
+log4j.appender.stderr=org.apache.log4j.ConsoleAppender
+log4j.appender.stderr.layout=org.apache.log4j.PatternLayout
+log4j.appender.stderr.Target=System.err
+
+log4j.appender.stderr.layout.ConversionPattern= %-5p %m%n
diff --git
a/tika-server/tika-server-client/src/test/java/org/apache/tika/server/client/TestBasic.java
b/tika-server/tika-server-client/src/test/java/org/apache/tika/server/client/TestBasic.java
new file mode 100644
index 0000000..9df8e44
--- /dev/null
+++
b/tika-server/tika-server-client/src/test/java/org/apache/tika/server/client/TestBasic.java
@@ -0,0 +1,16 @@
+package org.apache.tika.server.client;
+
+import org.junit.Test;
+
+public class TestBasic {
+
+ @Test
+ public void testBasic() throws Exception {
+ String[] args = new String[]{
+
"/Users/allison/Intellij/tika-main/tika-server/tika-server-client/src/test/resources/tika-config-simple-solr-emitter.xml",
+ "http://localhost:9998/",
+ "fs"
+ };
+ TikaClientCLI.main(args);
+ }
+}
diff --git a/tika-server/tika-server-client/src/test/resources/log4j.properties
b/tika-server/tika-server-client/src/test/resources/log4j.properties
new file mode 100644
index 0000000..afc3ec5
--- /dev/null
+++ b/tika-server/tika-server-client/src/test/resources/log4j.properties
@@ -0,0 +1,24 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+#info,debug, error,fatal ...
+log4j.rootLogger=debug,stderr
+
+#console
+log4j.appender.stderr=org.apache.log4j.ConsoleAppender
+log4j.appender.stderr.layout=org.apache.log4j.PatternLayout
+log4j.appender.stderr.Target=System.err
+
+log4j.appender.stderr.layout.ConversionPattern= %-5p %d [%t] (%F:%L) - %m%n
diff --git
a/tika-server/tika-server-client/src/test/resources/tika-config-simple-solr-emitter.xml
b/tika-server/tika-server-client/src/test/resources/tika-config-simple-solr-emitter.xml
new file mode 100644
index 0000000..ae5a9b2
--- /dev/null
+++
b/tika-server/tika-server-client/src/test/resources/tika-config-simple-solr-emitter.xml
@@ -0,0 +1,78 @@
+<?xml version="1.0" encoding="UTF-8" ?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing,
+ software distributed under the License is distributed on an
+ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ KIND, either express or implied. See the License for the
+ specific language governing permissions and limitations
+ under the License.
+-->
+<properties>
+ <service-loader initializableProblemHandler="throw"/>
+ <fetchIterators>
+ <fetchIterator class="org.apache.tika.fetcher.FileSystemFetchIterator">
+ <params>
+ <param name="fetchPrefix" type="string">fs</param>
+ <param name="basePath" type="string">fix</param>
+ </params>
+ </fetchIterator>
+ </fetchIterators>
+ <fetchers>
+ <fetcher class="org.apache.tika.fetcher.FileSystemFetcher">
+ <params>
+ <param name="name" type="string">fs</param>
+ <param name="basePath" type="string">fix</param>
+ </params>
+ </fetcher>
+ </fetchers>
+ <metadataFilters>
+ <metadataFilter
class="org.apache.tika.metadata.filter.FieldNameMappingFilter">
+ <params>
+ <param name="mappings" type="list">
+ <string>X-TIKA:content->content</string>
+
<string>X-TIKA:embedded_resource_path->embedded_path</string>
+ <string>Content-Length->length</string>
+ <string>dc:creator->creators</string>
+ <string>dc:title->title</string>
+ </param>
+ </params>
+ </metadataFilter>
+ </metadataFilters>
+ <emitters>
+ <emitter class="org.apache.tika.emitter.fs.FileSystemEmitter">
+ <params>
+ <param name="name" type="string">fs</param>
+ <param name="basePath" type="string">fix</param>
+ </params>
+ </emitter>
+ <!--
+ <emitter class="org.apache.tika.emitter.solr.SolrEmitter">
+ <params>
+ <param name="name" type="string">solr1</param>
+ <param name="url"
type="string">http://localhost:8983/solr/tika-test</param>
+ <param name="attachmentStrategy"
type="string">concatenate-content</param>
+ <param name="contentField" type="string">content</param>
+ <param name="commitWithin" type="int">10</param>
+ </params>
+ </emitter>
+ <emitter class="org.apache.tika.emitter.solr.SolrEmitter">
+ <params>
+ <param name="name" type="string">solr2</param>
+ <param name="url"
type="string">http://localhost:8983/solr/tika-test</param>
+ <param name="attachmentStrategy"
type="string">parent-child</param>
+ <param name="contentField" type="string">content</param>
+ <param name="commitWithin" type="int">10</param>
+ </params>
+ </emitter>-->
+ </emitters>
+</properties>
\ No newline at end of file
diff --git
a/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/TikaServerCli.java
b/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/TikaServerCli.java
index b84b74d..0035efb 100644
---
a/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/TikaServerCli.java
+++
b/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/TikaServerCli.java
@@ -346,9 +346,8 @@ public class TikaServerCli {
providers.add(new TextMessageBodyWriter());
providers.addAll(loadWriterServices());
providers.add(new
TikaServerParseExceptionMapper(returnStackTrace));
- if (line.hasOption("status")) {
- providers.add(new JSONObjWriter());
- }
+ providers.add(new JSONObjWriter());
+
if (logFilter != null) {
providers.add(logFilter);
}
diff --git
a/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/resource/EmitterResource.java
b/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/resource/EmitterResource.java
index bad2983..008feed 100644
---
a/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/resource/EmitterResource.java
+++
b/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/resource/EmitterResource.java
@@ -20,13 +20,13 @@ package org.apache.tika.server.core.resource;
import com.google.gson.JsonElement;
import com.google.gson.JsonObject;
import com.google.gson.JsonParser;
-import org.apache.commons.io.IOUtils;
import org.apache.tika.emitter.Emitter;
import org.apache.tika.emitter.TikaEmitterException;
import org.apache.tika.exception.TikaException;
import org.apache.tika.fetcher.Fetcher;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.metadata.TikaCoreProperties;
+import org.apache.tika.sax.AbstractRecursiveParserWrapperHandler;
import org.apache.tika.utils.ExceptionUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -54,7 +54,6 @@ import java.util.Map;
public class EmitterResource {
private static final String EMITTER_PARAM = "emitter";
- private static final String FETCH_STRING = "fetcherString";
private static final String FETCH_STRING_ABBREV = "f";
/**
@@ -66,7 +65,7 @@ public class EmitterResource {
/**
- *
+ * @param is input stream is ignored in 'get'
* @param httpHeaders
* @param info
* @param emitterName
@@ -77,15 +76,20 @@ public class EmitterResource {
@GET
@Produces("application/json")
@Path("{" + EMITTER_PARAM + " : (\\w+)?}")
- public Map<String, String> getMetadata(InputStream is, @Context
HttpHeaders httpHeaders,
+ public Map<String, String> getMetadataAbbrev(InputStream is, @Context
HttpHeaders httpHeaders,
@Context UriInfo info,
@PathParam(EMITTER_PARAM) String
emitterName,
- @QueryParam(FETCH_STRING) String
fetcherString) throws Exception {
-
+ @QueryParam(FETCH_STRING_ABBREV)
String fetcherString) throws Exception {
Metadata metadata = new Metadata();
Fetcher fetcher = TikaResource.getConfig().getFetcher();
+ System.out.println("FETCHER: " + fetcher.getClass() + " : " +
fetcher.getSupportedPrefixes());
+ System.out.println("emitter: " +
TikaResource.getConfig().getEmitter().getClass()
+ + " : " +
TikaResource.getConfig().getEmitter().getSupported());
List<Metadata> metadataList;
try (InputStream fetchedIs = fetcher.fetch(fetcherString, metadata)) {
+ for (String n : metadata.names()) {
+ System.out.println(n + " ; "+metadata.get(n));
+ }
metadataList =
RecursiveMetadataResource.parseMetadata(fetchedIs,
metadata,
@@ -95,25 +99,6 @@ public class EmitterResource {
}
/**
- *
- * @param httpHeaders
- * @param info
- * @param emitterName
- * @param fetcherString specify the fetch string in the url's query section
- * @return
- * @throws Exception
- */
- @GET
- @Produces("application/json")
- @Path("{" + EMITTER_PARAM + " : (\\w+)?}")
- public Map<String, String> getMetadataAbbrev(InputStream is, @Context
HttpHeaders httpHeaders,
- @Context UriInfo info,
- @PathParam(EMITTER_PARAM) String
emitterName,
- @QueryParam(FETCH_STRING_ABBREV)
String fetcherString) throws Exception {
- return getMetadata(is, httpHeaders, info, emitterName, fetcherString);
- }
-
- /**
* The user puts the raw bytes of the file and specifies the emitter
* as elsewhere. This will not trigger a fetcher. If you want a
* fetcher, use the get or post options.
@@ -149,34 +134,31 @@ public class EmitterResource {
/**
* The client posts a json request. At a minimum, this must be a
- * json object that contains a fetcherString key with the key to
- * fetch the inputStream. Optionally, it may contain a metadata
+ * json object that contains an emitter and a fetcherString key with
+ * the key to fetch the inputStream. Optionally, it may contain a metadata
* object that will be used to populate the metadata key for pass
* through of metadata from the client.
* <p>
* The extracted text content is stored with the key
* {@link
org.apache.tika.sax.AbstractRecursiveParserWrapperHandler#TIKA_CONTENT}
* <p>
- * Must specify an emitter in the path, e.g. /emit/solr
+ * Must specify a fetcherString and an emitter in the posted json.
* @param info uri info
- * @param emitterName which emitter to use; emitters must be configured in
- * the TikaConfig file.
* @return InputStream that can be deserialized as a list of {@link
Metadata} objects
* @throws Exception
*/
@POST
@Produces("application/json")
- @Path("{" + EMITTER_PARAM + " : (\\w+)?}")
- public Map<String, String> getMetadata(InputStream is,
+ public Map<String, String> getMetadataFromPost(InputStream is,
@Context HttpHeaders httpHeaders,
- @Context UriInfo info,
- @PathParam(EMITTER_PARAM) String emitterName
+ @Context UriInfo info
) throws Exception {
JsonElement root = null;
try (Reader reader = new InputStreamReader(is,
StandardCharsets.UTF_8)) {
root = JsonParser.parseReader(reader);
}
String fetcherString =
root.getAsJsonObject().get("fetcherString").getAsString();
+ String emitterName =
root.getAsJsonObject().get("emitter").getAsString();
Metadata metadata = new Metadata();
if (root.getAsJsonObject().has("metadata")) {
JsonObject meta =
root.getAsJsonObject().getAsJsonObject("metadata");
@@ -191,16 +173,37 @@ public class EmitterResource {
}
}
}
- List<Metadata> metadataList;
+ for (String n : metadata.names()) {
+ System.out.println(n + " ; "+metadata.get(n));
+ }
+ List<Metadata> metadataList = null;
try (InputStream stream =
TikaResource.getConfig().getFetcher().fetch(fetcherString, metadata)) {
+ for (String n : metadata.names()) {
+ System.out.println("2: " + n + " ; "+metadata.get(n));
+ }
metadataList = RecursiveMetadataResource.parseMetadata(
stream,
metadata,
httpHeaders.getRequestHeaders(), info, "text");
+ } catch (Error error) {
+ return returnError(emitterName, error);
+ }
+ for (String n : metadataList.get(0).names()) {
+ System.out.println("3: " + n + " ; "+metadataList.get(0).get(n));
}
return emit(emitterName, metadataList);
}
+ private Map<String, String> returnError(String emitterName, Error error) {
+ Map<String, String> statusMap = new HashMap<>();
+ statusMap.put("status", "parse_error");
+ statusMap.put("emitter", emitterName);
+ String msg = ExceptionUtils.getStackTrace(error);
+ statusMap.put("parse_error", msg);
+ return statusMap;
+
+ }
+
private Map<String, String> emit(String emitterName, List<Metadata>
metadataList) throws TikaException {
Emitter emitter = TikaResource.getConfig().getEmitter();
String status = "ok";
@@ -216,7 +219,12 @@ public class EmitterResource {
statusMap.put("status", status);
statusMap.put("emitter", emitterName);
if (exceptionMsg.length() > 0) {
- statusMap.put("exception_msg", exceptionMsg);
+ statusMap.put("emitter_exception", exceptionMsg);
+ }
+ String parseStackTrace = metadataList.get(0).get(
+ AbstractRecursiveParserWrapperHandler.CONTAINER_EXCEPTION);
+ if (parseStackTrace != null) {
+ statusMap.put("parse_exception", parseStackTrace);
}
return statusMap;
}
diff --git
a/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/resource/TikaResource.java
b/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/resource/TikaResource.java
index 46cf093..9285309 100644
---
a/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/resource/TikaResource.java
+++
b/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/resource/TikaResource.java
@@ -292,8 +292,8 @@ public class TikaResource {
}
if (mediaType != null) {
- metadata.add(Metadata.CONTENT_TYPE, mediaType.toString());
- metadata.add(TikaCoreProperties.CONTENT_TYPE_OVERRIDE,
mediaType.toString());
+ //metadata.add(Metadata.CONTENT_TYPE, mediaType.toString());
+ //metadata.add(TikaCoreProperties.CONTENT_TYPE_OVERRIDE,
mediaType.toString());
}
for (Map.Entry<String, List<String>> e : httpHeaders.entrySet()) {
diff --git
a/tika-server/tika-server-core/src/test/java/org/apache/tika/server/core/IntegrationTestBase.java
b/tika-server/tika-server-core/src/test/java/org/apache/tika/server/core/IntegrationTestBase.java
new file mode 100644
index 0000000..5144798
--- /dev/null
+++
b/tika-server/tika-server-core/src/test/java/org/apache/tika/server/core/IntegrationTestBase.java
@@ -0,0 +1,88 @@
+package org.apache.tika.server.core;
+
+import org.apache.cxf.common.logging.LogUtils;
+import org.apache.tika.TikaTest;
+import org.junit.After;
+import org.junit.AfterClass;
+import org.junit.Before;
+import org.junit.BeforeClass;
+
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.StandardCopyOption;
+import java.security.Permission;
+
+public class IntegrationTestBase extends TikaTest {
+
+ static final String TEST_HELLO_WORLD =
"test-documents/mock/hello_world.xml";
+ static final String TEST_OOM = "test-documents/mock/fake_oom.xml";
+ static final String TEST_SYSTEM_EXIT =
"test-documents/mock/system_exit.xml";
+ static final String TEST_HEAVY_HANG =
"test-documents/mock/heavy_hang_30000.xml";
+ static final String TEST_HEAVY_HANG_SHORT =
"test-documents/mock/heavy_hang_100.xml";
+ static final String TEST_STDOUT_STDERR =
"test-documents/mock/testStdOutErr.xml";
+ static final String TEST_STATIC_STDOUT_STDERR =
"test-documents/mock/testStaticStdOutErr.xml";
+ static final String META_PATH = "/rmeta";
+ static final String STATUS_PATH = "/status";
+
+ static final long MAX_WAIT_MS = 60000;
+
+ //running into conflicts on 9998 with the CXFTestBase tests
+ //TODO: figure out why?!
+ static final String INTEGRATION_TEST_PORT = "9999";
+
+ protected static final String endPoint =
+ "http://localhost:" + INTEGRATION_TEST_PORT;
+
+ private SecurityManager existingSecurityManager = null;
+ static Path LOG_FILE;
+
+
+ @BeforeClass
+ public static void staticSetup() throws Exception {
+ LogUtils.setLoggerClass(NullWebClientLogger.class);
+ LOG_FILE = Files.createTempFile("tika-server-integration", ".xml");
+
Files.copy(TikaServerIntegrationTest.class.getResourceAsStream("/logging/log4j_forked.xml"),
+ LOG_FILE, StandardCopyOption.REPLACE_EXISTING);
+ }
+
+ @Before
+ public void setUp() throws Exception {
+ existingSecurityManager = System.getSecurityManager();
+ System.setSecurityManager(new SecurityManager() {
+ @Override
+ public void checkExit(int status) {
+ super.checkExit(status);
+ throw new MyExitException(status);
+ }
+ @Override
+ public void checkPermission(Permission perm) {
+ // all ok
+ }
+ @Override
+ public void checkPermission(Permission perm, Object context) {
+ // all ok
+ }
+ });
+ }
+
+ @AfterClass
+ public static void staticTearDown() throws Exception {
+ Files.delete(LOG_FILE);
+ }
+
+ @After
+ public void tearDown() throws Exception {
+ System.setSecurityManager(existingSecurityManager);
+ }
+
+ static class MyExitException extends RuntimeException {
+ private final int status;
+ MyExitException(int status) {
+ this.status = status;
+ }
+
+ public int getStatus() {
+ return status;
+ }
+ }
+}
diff --git
a/tika-server/tika-server-core/src/test/java/org/apache/tika/server/core/TikaEmitterTest.java
b/tika-server/tika-server-core/src/test/java/org/apache/tika/server/core/TikaEmitterTest.java
index 2155680..c02f078 100644
---
a/tika-server/tika-server-core/src/test/java/org/apache/tika/server/core/TikaEmitterTest.java
+++
b/tika-server/tika-server-core/src/test/java/org/apache/tika/server/core/TikaEmitterTest.java
@@ -19,6 +19,7 @@ package org.apache.tika.server.core;
import com.google.gson.Gson;
import com.google.gson.JsonArray;
import com.google.gson.JsonObject;
+import com.google.gson.JsonParser;
import com.google.gson.JsonPrimitive;
import org.apache.commons.io.FileUtils;
import org.apache.commons.io.IOUtils;
@@ -42,6 +43,7 @@ import org.junit.Test;
import javax.ws.rs.core.Response;
import java.io.ByteArrayInputStream;
import java.io.InputStream;
+import java.io.InputStreamReader;
import java.io.Reader;
import java.net.URLEncoder;
import java.nio.charset.StandardCharsets;
@@ -53,6 +55,7 @@ import java.util.List;
import static org.junit.Assert.assertArrayEquals;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertNotNull;
/**
* This offers basic integration tests with fetchers and emitters.
@@ -60,22 +63,30 @@ import static org.junit.Assert.assertFalse;
*/
public class TikaEmitterTest extends CXFTestBase {
- private static final String EMITTER_PATH = "/emit/fs";
+ private static final String EMITTER_PATH = "/emit";
+ private static final String EMITTER_PATH_AND_FS = "/emit/fs";
private static Path TMP_DIR;
private static Path TMP_OUTPUT_DIR;
private static Path TMP_OUTPUT_FILE;
private static String TIKA_CONFIG_XML;
+ private static String HELLO_WORLD = "hello_world.xml";
+ private static String HELLO_WORLD_JSON = "hello_world.xml.json";
@BeforeClass
public static void setUpBeforeClass() throws Exception {
TMP_DIR = Files.createTempDirectory("tika-emitter-test-");
Path inputDir = TMP_DIR.resolve("input");
TMP_OUTPUT_DIR = TMP_DIR.resolve("output");
- TMP_OUTPUT_FILE = TMP_OUTPUT_DIR.resolve("hello_world.xml.json");
+ TMP_OUTPUT_FILE = TMP_OUTPUT_DIR.resolve(HELLO_WORLD_JSON);
Files.createDirectories(inputDir);
Files.createDirectories(TMP_OUTPUT_DIR);
-
Files.copy(TikaEmitterTest.class.getResourceAsStream("/test-documents/mock/hello_world.xml"),
- inputDir.resolve("hello_world.xml"));
+
+ for (String mockFile : new String[]{
+ "hello_world.xml", "null_pointer.xml"}) {
+ Files.copy(TikaEmitterTest.class.getResourceAsStream(
+ "/test-documents/mock/"+mockFile),
+ inputDir.resolve(mockFile));
+ }
TIKA_CONFIG_XML = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>"+
"<properties>"+
@@ -106,6 +117,7 @@ public class TikaEmitterTest extends CXFTestBase {
if (Files.exists(TMP_OUTPUT_FILE)) {
Files.delete(TMP_OUTPUT_FILE);
}
+ assertFalse(Files.isRegularFile(TMP_OUTPUT_FILE));
}
@Override
@@ -134,18 +146,16 @@ public class TikaEmitterTest extends CXFTestBase {
@Test
public void testGet() throws Exception {
- Path targetFile = TMP_OUTPUT_DIR.resolve("hello_world.xml.json");
- assertFalse(Files.isRegularFile(targetFile));
- String q = "?fetcherString="+
+ String q = "?f="+
URLEncoder.encode("fs:hello_world.xml",
StandardCharsets.UTF_8.name());
- String getUrl = endPoint+EMITTER_PATH+q;
+ String getUrl = endPoint+EMITTER_PATH_AND_FS+q;
Response response = WebClient
.create(getUrl)
.accept("application/json").get();
assertEquals(200, response.getStatus());
List<Metadata> metadataList = null;
- try (Reader reader = Files.newBufferedReader(targetFile)) {
+ try (Reader reader = Files.newBufferedReader(TMP_OUTPUT_FILE)) {
metadataList = JsonMetadataList.fromJson(reader);
}
assertEquals(1, metadataList.size());
@@ -159,11 +169,10 @@ public class TikaEmitterTest extends CXFTestBase {
@Test
public void testPost() throws Exception {
- Path targetFile = TMP_OUTPUT_DIR.resolve("hello_world.xml.json");
- assertFalse(Files.isRegularFile(targetFile));
JsonObject root = new JsonObject();
root.add("fetcherString", new JsonPrimitive("fs:hello_world.xml"));
+ root.add("emitter", new JsonPrimitive("fs"));
JsonObject userMetadata = new JsonObject();
String[] valueArray = new String[] {"my-value-1", "my-value-2",
"my-value-3"};
JsonArray arr = new JsonArray();
@@ -184,7 +193,7 @@ public class TikaEmitterTest extends CXFTestBase {
assertEquals(200, response.getStatus());
List<Metadata> metadataList = null;
- try (Reader reader = Files.newBufferedReader(targetFile)) {
+ try (Reader reader = Files.newBufferedReader(TMP_OUTPUT_FILE)) {
metadataList = JsonMetadataList.fromJson(reader);
}
assertEquals(1, metadataList.size());
@@ -200,10 +209,8 @@ public class TikaEmitterTest extends CXFTestBase {
@Test
public void testPut() throws Exception {
- Path targetFile = TMP_OUTPUT_DIR.resolve("hello_world.xml.json");
- assertFalse(Files.isRegularFile(targetFile));
- String getUrl = endPoint+EMITTER_PATH;
+ String getUrl = endPoint+EMITTER_PATH_AND_FS;
String metaPathKey = EmitterResource.PATH_KEY_FOR_HTTP_HEADER;
Response response = WebClient
@@ -214,10 +221,9 @@ public class TikaEmitterTest extends CXFTestBase {
ClassLoader
.getSystemResourceAsStream("test-documents/mock/hello_world.xml")
);
- System.out.println(IOUtils.toString((InputStream)
response.getEntity(), StandardCharsets.UTF_8));
assertEquals(200, response.getStatus());
List<Metadata> metadataList = null;
- try (Reader reader = Files.newBufferedReader(targetFile)) {
+ try (Reader reader = Files.newBufferedReader(TMP_OUTPUT_FILE)) {
metadataList = JsonMetadataList.fromJson(reader);
}
assertEquals(1, metadataList.size());
@@ -229,4 +235,51 @@ public class TikaEmitterTest extends CXFTestBase {
assertEquals("application/mock+xml",
metadata.get(Metadata.CONTENT_TYPE));
}
+ @Test
+ public void testPostNPE() throws Exception {
+
+ JsonObject root = new JsonObject();
+ root.add("fetcherString", new JsonPrimitive("fs:null_pointer.xml"));
+ root.add("emitter", new JsonPrimitive("fs"));
+ JsonObject userMetadata = new JsonObject();
+ String[] valueArray = new String[] {"my-value-1", "my-value-2",
"my-value-3"};
+ JsonArray arr = new JsonArray();
+ for (int i = 0; i < valueArray.length; i++) {
+ arr.add(valueArray[i]);
+ }
+
+ userMetadata.add("my-key", new JsonPrimitive("my-value"));
+ userMetadata.add("my-key-multi", arr);
+ root.add("metadata", userMetadata);
+ String jsonPost = new Gson().toJson(root);
+
+ String getUrl = endPoint+EMITTER_PATH;
+ Response response = WebClient
+ .create(getUrl)
+ .accept("application/json")
+ .post(jsonPost);
+ assertEquals(200, response.getStatus());
+
+ JsonObject jsonResponse;
+ try (Reader reader = new InputStreamReader(
+ (InputStream)response.getEntity(), StandardCharsets.UTF_8)) {
+ jsonResponse = JsonParser.parseReader(reader).getAsJsonObject();
+ };
+ String parseException =
jsonResponse.get("parse_exception").getAsString();
+ assertNotNull(parseException);
+ assertContains("NullPointerException", parseException);
+
+ List<Metadata> metadataList = null;
+ try (Reader reader =
Files.newBufferedReader(TMP_OUTPUT_DIR.resolve("null_pointer.xml.json"))) {
+ metadataList = JsonMetadataList.fromJson(reader);
+ }
+ assertEquals(1, metadataList.size());
+ Metadata metadata = metadataList.get(0);
+ assertEquals("application/mock+xml",
metadata.get(Metadata.CONTENT_TYPE));
+ assertEquals("my-value", metadata.get("my-key"));
+ assertArrayEquals(valueArray, metadata.getValues("my-key-multi"));
+ assertContains("NullPointerException",
metadata.get(AbstractRecursiveParserWrapperHandler.CONTAINER_EXCEPTION));
+ }
+
+ //can't test system_exit here because server is in same process
}
diff --git
a/tika-server/tika-server-core/src/test/java/org/apache/tika/server/core/TikaServerEmitterIntegrationTest.java
b/tika-server/tika-server-core/src/test/java/org/apache/tika/server/core/TikaServerEmitterIntegrationTest.java
new file mode 100644
index 0000000..acb4657
--- /dev/null
+++
b/tika-server/tika-server-core/src/test/java/org/apache/tika/server/core/TikaServerEmitterIntegrationTest.java
@@ -0,0 +1,268 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.server.core;
+
+import com.google.gson.Gson;
+import com.google.gson.GsonBuilder;
+import com.google.gson.JsonObject;
+import com.google.gson.JsonParser;
+import com.google.gson.JsonPrimitive;
+import org.apache.commons.io.FileUtils;
+import org.apache.cxf.jaxrs.client.WebClient;
+import org.junit.AfterClass;
+import org.junit.Before;
+import org.junit.BeforeClass;
+import org.junit.Test;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import javax.ws.rs.ProcessingException;
+import javax.ws.rs.core.Response;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.io.Reader;
+import java.nio.charset.StandardCharsets;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.time.Duration;
+import java.time.Instant;
+import java.util.List;
+import java.util.Random;
+import java.util.concurrent.TimeoutException;
+import java.util.concurrent.atomic.AtomicInteger;
+
+import static java.nio.charset.StandardCharsets.UTF_8;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertTrue;
+
+public class TikaServerEmitterIntegrationTest extends IntegrationTestBase {
+
+ private static final Logger LOG =
LoggerFactory.getLogger(TikaServerEmitterIntegrationTest.class);
+
+ private static Path TMP_DIR;
+ private static Path TMP_OUTPUT_DIR;
+ private static String TIKA_CONFIG_XML;
+ private static Path TIKA_CONFIG;
+ private static Gson GSON = new GsonBuilder().create();
+
+ private static String[] FILES = new String[] {
+ "hello_world.xml",
+ "heavy_hang_30000.xml", "real_oom.xml", "system_exit.xml"
+ };
+
+ @BeforeClass
+ public static void setUpBeforeClass() throws Exception {
+ TMP_DIR = Files.createTempDirectory("tika-emitter-test-");
+ Path inputDir = TMP_DIR.resolve("input");
+ TMP_OUTPUT_DIR = TMP_DIR.resolve("output");
+ Files.createDirectories(inputDir);
+ Files.createDirectories(TMP_OUTPUT_DIR);
+
+ for (String mockFile : FILES) {
+ Files.copy(TikaEmitterTest.class.getResourceAsStream(
+ "/test-documents/mock/"+mockFile),
+ inputDir.resolve(mockFile));
+ }
+ TIKA_CONFIG = TMP_DIR.resolve("tika-config.xml");
+
+ TIKA_CONFIG_XML = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>"+
+ "<properties>"+
+ "<fetchers>"+
+ "<fetcher
class=\"org.apache.tika.fetcher.FileSystemFetcher\">"+
+ "<params>"+
+ "<param name=\"basePath\"
type=\"string\">"+inputDir.toAbsolutePath()+"</param>"+
+ "</params>"+
+ "</fetcher>"+
+ "</fetchers>"+
+ "<emitters>"+
+ "<emitter
class=\"org.apache.tika.emitter.fs.FileSystemEmitter\">"+
+ "<params>"+
+ "<param name=\"basePath\" type=\"string\">"+
TMP_OUTPUT_DIR.toAbsolutePath()+"</param>"+
+ "</params>"+
+ "</emitter>"+
+ "</emitters>"+
+ "</properties>";
+
+ FileUtils.write(TIKA_CONFIG.toFile(), TIKA_CONFIG_XML, UTF_8);
+ }
+
+ @AfterClass
+ public static void tearDownAfterClass() throws Exception {
+ FileUtils.deleteDirectory(TMP_DIR.toFile());
+ }
+
+ @Before
+ public void setUpEachTest() throws Exception {
+ for (String problemFile : FILES) {
+ Path targ = TMP_OUTPUT_DIR.resolve(problemFile + ".json");
+
+ if (Files.exists(targ)) {
+ Files.delete(targ);
+ assertFalse(Files.isRegularFile(targ));
+ }
+ }
+ }
+
+
+ @Test
+ public void testBasic() throws Exception {
+
+ Thread serverThread = new Thread() {
+ @Override
+ public void run() {
+ TikaServerCli.main(
+ new String[]{
+ "-enableUnsecureFeatures",
+ "-maxFiles", "2000",
+ "-p", INTEGRATION_TEST_PORT,
+ "-tmpFilePrefix", "basic-",
+ "-config",
TIKA_CONFIG.toAbsolutePath().toString()
+ });
+ }
+ };
+ serverThread.start();
+ try {
+ testOne("hello_world.xml", true);
+ } finally {
+ serverThread.interrupt();
+ }
+ }
+
+ @Test(expected = ProcessingException.class)
+ public void testSystemExit() throws Exception {
+
+ Thread serverThread = new Thread() {
+ @Override
+ public void run() {
+ TikaServerCli.main(
+ new String[]{
+ "-enableUnsecureFeatures",
+ "-maxFiles", "2000",
+ "-p", INTEGRATION_TEST_PORT,
+ "-tmpFilePrefix", "basic-",
+ "-config",
TIKA_CONFIG.toAbsolutePath().toString()
+ });
+ }
+ };
+ serverThread.start();
+ try {
+ testOne("system_exit.xml", false);
+ } finally {
+ serverThread.interrupt();
+ }
+ }
+
+ @Test
+ public void testOOM() throws Exception {
+
+ Thread serverThread = new Thread() {
+ @Override
+ public void run() {
+ TikaServerCli.main(
+ new String[]{
+ "-enableUnsecureFeatures",
+ "-JXmx128m",
+ "-maxFiles", "2000",
+ "-p", INTEGRATION_TEST_PORT,
+ "-tmpFilePrefix", "basic-",
+ "-config",
TIKA_CONFIG.toAbsolutePath().toString()
+ });
+ }
+ };
+ serverThread.start();
+ try {
+ JsonObject response = testOne("real_oom.xml", false);
+ assertContains("heap space",
response.get("parse_error").getAsString());
+ } finally {
+ serverThread.interrupt();
+ }
+ }
+
+ @Test(expected = ProcessingException.class)
+ public void testTimeout() throws Exception {
+
+ Thread serverThread = new Thread() {
+ @Override
+ public void run() {
+ TikaServerCli.main(
+ new String[]{
+ "-enableUnsecureFeatures",
+ "-JXmx128m",
+ "-taskTimeoutMillis", "2000",
"-taskPulseMillis", "100",
+ "-p", INTEGRATION_TEST_PORT,
+ "-tmpFilePrefix", "basic-",
+ "-config",
TIKA_CONFIG.toAbsolutePath().toString()
+ });
+ }
+ };
+ serverThread.start();
+ try {
+ JsonObject response = testOne("heavy_hang_30000.xml", false);
+ assertContains("heap space",
response.get("parse_error").getAsString());
+ } finally {
+ serverThread.interrupt();
+ }
+ }
+
+ private void awaitServerStartup() throws Exception {
+ Instant started = Instant.now();
+ long elapsed = Duration.between(started, Instant.now()).toMillis();
+ WebClient client =
WebClient.create(endPoint+"/tika").accept("text/plain");
+ while (elapsed < MAX_WAIT_MS) {
+ try {
+ Response response = client.get();
+ if (response.getStatus() == 200) {
+ elapsed = Duration.between(started,
Instant.now()).toMillis();
+ LOG.info("client observes server successfully started
after " +
+ elapsed+ " ms");
+ return;
+ }
+ LOG.debug("tika test client failed to connect to server with
status: {}", response.getStatus());
+
+ } catch (javax.ws.rs.ProcessingException e) {
+ LOG.debug("tika test client failed to connect to server", e);
+ }
+
+ Thread.sleep(100);
+ elapsed = Duration.between(started, Instant.now()).toMillis();
+ }
+ throw new TimeoutException("couldn't connect to server after " +
+ elapsed + " ms");
+ }
+
+ private JsonObject testOne(String fileName, boolean shouldFileExist)
throws Exception {
+ awaitServerStartup();
+ Response response = WebClient
+ .create(endPoint + "/emit")
+ .accept("application/json")
+ .post(getJsonString(fileName));
+ if (shouldFileExist) {
+ Path targFile = TMP_OUTPUT_DIR.resolve(fileName + ".json");
+ assertTrue(Files.size(targFile) > 1);
+ }
+ Reader reader = new InputStreamReader((InputStream)
response.getEntity(), UTF_8);
+ return JsonParser.parseReader(reader).getAsJsonObject();
+ }
+
+ private String getJsonString(String fileName) {
+ JsonObject root = new JsonObject();
+ root.add("fetcherString", new JsonPrimitive("fs:"+fileName));
+ root.add("emitter", new JsonPrimitive("fs"));
+ return GSON.toJson(root);
+ }
+}
diff --git
a/tika-server/tika-server-core/src/test/java/org/apache/tika/server/core/TikaServerIntegrationTest.java
b/tika-server/tika-server-core/src/test/java/org/apache/tika/server/core/TikaServerIntegrationTest.java
index 74b4f24..8d25d83 100644
---
a/tika-server/tika-server-core/src/test/java/org/apache/tika/server/core/TikaServerIntegrationTest.java
+++
b/tika-server/tika-server-core/src/test/java/org/apache/tika/server/core/TikaServerIntegrationTest.java
@@ -50,79 +50,11 @@ import java.util.concurrent.atomic.AtomicInteger;
import static java.nio.charset.StandardCharsets.UTF_8;
import static org.junit.Assert.assertEquals;
-public class TikaServerIntegrationTest extends TikaTest {
+public class TikaServerIntegrationTest extends IntegrationTestBase {
private static final Logger LOG =
LoggerFactory.getLogger(TikaServerIntegrationTest.class);
- private static final String TEST_HELLO_WORLD =
"test-documents/mock/hello_world.xml";
- private static final String TEST_OOM = "test-documents/mock/fake_oom.xml";
- private static final String TEST_SYSTEM_EXIT =
"test-documents/mock/system_exit.xml";
- private static final String TEST_HEAVY_HANG =
"test-documents/mock/heavy_hang_30000.xml";
- private static final String TEST_HEAVY_HANG_SHORT =
"test-documents/mock/heavy_hang_100.xml";
- private static final String TEST_STDOUT_STDERR =
"test-documents/mock/testStdOutErr.xml";
- private static final String TEST_STATIC_STDOUT_STDERR =
"test-documents/mock/testStaticStdOutErr.xml";
- private static final String META_PATH = "/rmeta";
- private static final String STATUS_PATH = "/status";
- private static final long MAX_WAIT_MS = 60000;
-
- //running into conflicts on 9998 with the CXFTestBase tests
- //TODO: figure out why?!
- private static final String INTEGRATION_TEST_PORT = "9999";
-
- protected static final String endPoint =
- "http://localhost:" + INTEGRATION_TEST_PORT;
-
- private SecurityManager existingSecurityManager = null;
- private static Path LOG_FILE;
-
- private static class MyExitException extends RuntimeException {
- private final int status;
- MyExitException(int status) {
- this.status = status;
- }
-
- public int getStatus() {
- return status;
- }
- }
- @BeforeClass
- public static void staticSetup() throws Exception {
- LogUtils.setLoggerClass(NullWebClientLogger.class);
- LOG_FILE = Files.createTempFile("tika-server-integration", ".xml");
-
Files.copy(TikaServerIntegrationTest.class.getResourceAsStream("/logging/log4j_forked.xml"),
- LOG_FILE, StandardCopyOption.REPLACE_EXISTING);
- }
-
- @Before
- public void setUp() throws Exception {
- existingSecurityManager = System.getSecurityManager();
- System.setSecurityManager(new SecurityManager() {
- @Override
- public void checkExit(int status) {
- super.checkExit(status);
- throw new MyExitException(status);
- }
- @Override
- public void checkPermission(Permission perm) {
- // all ok
- }
- @Override
- public void checkPermission(Permission perm, Object context) {
- // all ok
- }
- });
- }
-
- @AfterClass
- public static void staticTearDown() throws Exception {
- Files.delete(LOG_FILE);
- }
-
- @After
- public void tearDown() throws Exception {
- System.setSecurityManager(existingSecurityManager);
- }
@Test
public void testBasic() throws Exception {
@@ -529,6 +461,11 @@ public class TikaServerIntegrationTest extends TikaTest {
}
}
+ @Test
+ public void testEmitterSysExit() throws Exception {
+
+ }
+
private void awaitServerStartup() throws Exception {
Instant started = Instant.now();
long elapsed = Duration.between(started, Instant.now()).toMillis();