This is an automated email from the ASF dual-hosted git repository. tallison pushed a commit to branch TIKA-3946 in repository https://gitbox.apache.org/repos/asf/tika.git
commit d554887018268fd1256e97e1236849a01777d3ff Author: tallison <[email protected]> AuthorDate: Thu Dec 15 08:42:47 2022 -0500 TIKA-3946 -- WIP improve serialization -- remove need for <params> element on ConfigBase --- .../test/java/org/apache/tika/cli/TikaCLITest.java | 16 +- .../java/org/apache/tika/config/ConfigBase.java | 251 +++++++++++---------- .../tika/pipes/async/AsyncProcessorTest.java | 10 +- .../apache/tika/pipes/async/MockReporterTest.java | 4 +- .../org/apache/tika/config/TIKA-3137-exclude.xml | 10 +- .../apache/tika/config/TIKA-3137-field-mapping.xml | 18 +- .../org/apache/tika/config/TIKA-3137-mimes-uc.xml | 10 +- .../resources/org/apache/tika/config/TIKA-3594.xml | 6 +- .../org/apache/tika/config/emitters-config.xml | 8 +- .../org/apache/tika/config/fetchers-config.xml | 12 +- .../tika/config/fetchers-nobasepath-config.xml | 10 +- .../org/apache/tika/pipes/async/TIKA-3507.xml | 14 +- .../test/resources/configs/tika-fuzzing-config.xml | 76 +++---- .../opensearch/tika-config-opensearch.xml | 100 ++++---- .../resources/tika-config-s3-integration-test.xml | 98 ++++---- .../src/test/resources/tika-config-s3ToFs.xml | 34 ++- .../src/test/resources/tika-config-s3Tos3.xml | 56 ++--- .../src/test/resources/tika-config-solr-urls.xml | 102 ++++----- .../configs/tika-config-digests-pdf-only.xml | 12 +- .../test/resources/configs/tika-config-digests.xml | 12 +- ...ka-config-doubling-custom-handler-decorator.xml | 6 +- .../tika-config-geo-point-metadata-filter.xml | 4 +- .../resources/configs/tika-config-no-names.xml | 10 +- ...ka-config-upcasing-custom-handler-decorator.xml | 6 +- .../resources/configs/tika-config-with-names.xml | 10 +- .../org/apache/tika/parser/TIKA-3137-include.xml | 22 +- .../src/test/resources/tika-config-broken.xml | 18 +- .../test/resources/config/tika-config-az-blob.xml | 20 +- .../src/test/resources/config/tika-config-gcs.xml | 16 +- .../tika-config-jdbc-emitter-attachments.xml | 58 +++-- .../tika-config-jdbc-emitter-existing-table.xml | 36 ++- .../tika-config-jdbc-emitter-multivalued.xml | 42 ++-- .../resources/configs/tika-config-jdbc-emitter.xml | 60 +++-- .../test/resources/tika-config-simple-emitter.xml | 12 +- .../test/resources/tika-config-simple-emitter.xml | 24 +- .../src/test/resources/tika-config-az-blob.xml | 24 +- .../src/test/resources/tika-config-gcs.xml | 16 +- .../src/test/resources/tika-config-http.xml | 22 +- .../src/test/resources/tika-config-s3.xml | 18 +- .../pipesiterator/jdbc/TestJDBCPipesIterator.java | 2 - .../resources/configs/tika-config-excludes.xml | 10 +- .../resources/configs/tika-config-includes.xml | 10 +- .../main/resources/tika-server-config-default.xml | 1 + .../org/apache/tika/server/core/TikaPipesTest.java | 12 +- .../core/TikaServerAsyncIntegrationTest.java | 19 +- .../core/TikaServerPipesIntegrationTest.java | 14 +- .../resources/configs/metadata-filter-include.xml | 20 +- .../configs/tika-config-server-badjvmargs.xml | 22 +- .../resources/configs/tika-config-server-basic.xml | 24 +- .../configs/tika-config-server-emitter.xml | 26 +-- .../tika-config-server-fetcher-template.xml | 22 +- .../tika-config-server-fetchers-emitters.xml | 34 ++- .../configs/tika-config-server-timeout-10000.xml | 16 +- .../tika-config-server-tls-one-way-template.xml | 36 ++- .../tika-config-server-tls-two-way-template.xml | 42 ++-- .../resources/configs/tika-config-server-tls.xml | 40 ++-- .../test/resources/configs/tika-config-server.xml | 2 - 57 files changed, 734 insertions(+), 901 deletions(-) diff --git a/tika-app/src/test/java/org/apache/tika/cli/TikaCLITest.java b/tika-app/src/test/java/org/apache/tika/cli/TikaCLITest.java index 8b6e9e6e9..03544163d 100644 --- a/tika-app/src/test/java/org/apache/tika/cli/TikaCLITest.java +++ b/tika-app/src/test/java/org/apache/tika/cli/TikaCLITest.java @@ -63,18 +63,18 @@ public class TikaCLITest { @BeforeAll public static void setUpClass() throws Exception { ASYNC_CONFIG = Files.createTempFile(ASYNC_OUTPUT_DIR, "async-config-", ".xml"); - String xml = "<properties>" + "<async>" + "<params>" + "<numClients>3</numClients>" + - "<tikaConfig>" + ASYNC_CONFIG.toAbsolutePath() + "</tikaConfig>" + "</params>" + + String xml = "<properties>" + "<async>" + "<numClients>3</numClients>" + + "<tikaConfig>" + ASYNC_CONFIG.toAbsolutePath() + "</tikaConfig>" + "</async>" + "<fetchers>" + "<fetcher class=\"org.apache.tika.pipes.fetcher.fs.FileSystemFetcher\">" + - "<params>" + "<name>fsf</name>" + "<basePath>" + TEST_DATA_FILE.getAbsolutePath() + - "</basePath>" + "</params>" + "</fetcher>" + "</fetchers>" + "<emitters>" + + "<name>fsf</name>" + "<basePath>" + TEST_DATA_FILE.getAbsolutePath() + + "</basePath>" + "</fetcher>" + "</fetchers>" + "<emitters>" + "<emitter class=\"org.apache.tika.pipes.emitter.fs.FileSystemEmitter\">" + - "<params>" + "<name>fse</name>" + "<basePath>" + ASYNC_OUTPUT_DIR.toAbsolutePath() + - "</basePath>" + "</params></emitter>" + "</emitters>" + "<pipesIterator " + + "<name>fse</name>" + "<basePath>" + ASYNC_OUTPUT_DIR.toAbsolutePath() + + "</basePath>" + "</emitter>" + "</emitters>" + "<pipesIterator " + "class=\"org.apache.tika.pipes.pipesiterator.fs.FileSystemPipesIterator\">" + - "<params>" + "<basePath>" + TEST_DATA_FILE.getAbsolutePath() + "</basePath>" + - "<fetcherName>fsf</fetcherName>" + "<emitterName>fse</emitterName>" + "</params>" + + "<basePath>" + TEST_DATA_FILE.getAbsolutePath() + "</basePath>" + + "<fetcherName>fsf</fetcherName>" + "<emitterName>fse</emitterName>" + "</pipesIterator>" + "</properties>"; Files.write(ASYNC_CONFIG, xml.getBytes(UTF_8)); } diff --git a/tika-core/src/main/java/org/apache/tika/config/ConfigBase.java b/tika-core/src/main/java/org/apache/tika/config/ConfigBase.java index 734bc442c..41833ed9d 100644 --- a/tika-core/src/main/java/org/apache/tika/config/ConfigBase.java +++ b/tika-core/src/main/java/org/apache/tika/config/ConfigBase.java @@ -43,6 +43,10 @@ import org.apache.tika.utils.XMLReaderUtils; public abstract class ConfigBase { + private static Class[] SUPPORTED_PRIMITIVES = + new Class[]{String.class, boolean.class, long.class, int.class, double.class, + float.class}; + /** * Use this to build a single class, where the user specifies the instance class, e.g. * PipesIterator @@ -78,8 +82,7 @@ public abstract class ConfigBase { * @throws IOException */ protected static <T> T buildSingle(String itemName, Class<T> itemClass, Element properties, - T defaultValue) - throws TikaConfigException, IOException { + T defaultValue) throws TikaConfigException, IOException { NodeList children = properties.getChildNodes(); T toConfigure = null; @@ -90,12 +93,12 @@ public abstract class ConfigBase { } if (itemName.equals(child.getLocalName())) { if (toConfigure != null) { - throw new TikaConfigException("There can only be one " + itemName + - " in a config"); + throw new TikaConfigException( + "There can only be one " + itemName + " in a config"); } T item = buildClass(child, itemName, itemClass); setParams(item, child, new HashSet<>()); - toConfigure = (T)item; + toConfigure = (T) item; } } if (toConfigure == null) { @@ -134,8 +137,9 @@ public abstract class ConfigBase { } protected static <P, T> P buildComposite(String compositeElementName, Class<P> compositeClass, - String itemName, Class<T> itemClass, Element properties) throws TikaConfigException, - IOException { + String itemName, Class<T> itemClass, + Element properties) + throws TikaConfigException, IOException { if (!properties.getLocalName().equals("properties")) { throw new TikaConfigException("expect properties as root node"); @@ -156,7 +160,7 @@ public abstract class ConfigBase { setParams(composite, child, new HashSet<>(), itemName); return composite; } catch (NoSuchMethodException | InvocationTargetException | - InstantiationException | IllegalAccessException e) { + InstantiationException | IllegalAccessException e) { throw new TikaConfigException("can't build composite class", e); } } @@ -195,12 +199,13 @@ public abstract class ConfigBase { Class clazz = Class.forName(className); if (!itemClass.isAssignableFrom(clazz)) { throw new TikaConfigException( - elementName + " with class name " + className + " must be of type '" + - itemClass.getName() + "'"); + elementName + " with class name " + className + " must be of type '" + + itemClass.getName() + "'"); } return (T) clazz.newInstance(); } catch (InstantiationException | IllegalAccessException | ClassNotFoundException e) { - throw new TikaConfigException("problem loading " + elementName, e); + throw new TikaConfigException("problem loading " + elementName + + " with class " + itemClass.getName(), e); } } @@ -212,87 +217,128 @@ public abstract class ConfigBase { private static void setParams(Object object, Node targetNode, Set<String> settings, String exceptNodeName) throws TikaConfigException { NodeList children = targetNode.getChildNodes(); - NodeList params = null; + List<Node> params = new ArrayList<>(); for (int i = 0; i < children.getLength(); i++) { Node child = children.item(i); + if ("params".equals(child.getLocalName())) { - params = child.getChildNodes(); - } else if (child.getNodeType() == 1 && ! child.getLocalName().equals(exceptNodeName)) { - String itemName = child.getLocalName(); - SetterClassPair setterClassPair = findSetterClassPair(object, itemName); - Object item = buildClass(child, itemName, setterClassPair.itemClass); - setParams(setterClassPair.itemClass.cast(item), child, new HashSet<>()); - try { - setterClassPair.setterMethod.invoke(object, item); - } catch (IllegalAccessException | InvocationTargetException e) { - throw new TikaConfigException("problem creating " + itemName, e); + NodeList paramsList = child.getChildNodes(); + for (int j = 0; j < paramsList.getLength(); j++) { + params.add(paramsList.item(j)); } + } else { + params.add(child); } } - if (params != null) { - for (int i = 0; i < params.getLength(); i++) { - Node param = params.item(i); - if (param.getNodeType() != 1) { - continue; - } - String localName = param.getLocalName(); - if (localName == null || localName.equals(exceptNodeName)) { - continue; + for (int i = 0; i < params.size(); i++) { + Node param = params.get(i); + if (param.getNodeType() != 1) { + continue; + } + String localName = param.getLocalName(); + if (localName == null || localName.equals(exceptNodeName)) { + continue; + } + String txt = param.getTextContent(); + String itemName = param.getLocalName(); + SetterClassPair setterClassPair = findSetterClassPair(object, itemName); + boolean processed = false; + if (!hasClass(param)) { + if (setterClassPair.itemClass.isAssignableFrom(Map.class) && isMap(param)) { + tryToSetMap(object, param); + processed = true; + } else if (setterClassPair.itemClass.isAssignableFrom(List.class)) { + tryToSetList(object, param); + processed = true; } - String txt = param.getTextContent(); - if (hasChildNodes(param)) { - if (isMap(param)) { - tryToSetMap(object, param); - } else { - tryToSetList(object, param); - } + } + if (!processed) { + if (isPrimitive(setterClassPair.itemClass)) { + tryToSetPrimitive(object, setterClassPair, param.getTextContent()); } else { - tryToSet(object, localName, txt); + //tryToSetPrimitive(object, localName, txt); + Object item = buildClass(param, itemName, setterClassPair.itemClass); + setParams(setterClassPair.itemClass.cast(item), param, new HashSet<>()); + try { + setterClassPair.setterMethod.invoke(object, item); + } catch (IllegalAccessException | InvocationTargetException e) { + throw new TikaConfigException("problem creating " + itemName, e); + } } + } - if (txt != null) { - settings.add(localName); - } + if (txt != null) { + settings.add(localName); } } + if (object instanceof Initializable) { - ((Initializable)object).initialize(Collections.EMPTY_MAP); + ((Initializable) object).initialize(Collections.EMPTY_MAP); ((Initializable) object).checkInitialization(InitializableProblemHandler.THROW); } } + private static boolean isPrimitive(Class itemClass) { + for (int i = 0; i < SUPPORTED_PRIMITIVES.length; i++) { + if (SUPPORTED_PRIMITIVES[i].equals(itemClass)) { + return true; + } + } + return false; + } + + private static boolean hasClass(Node param) { + if (param.hasAttributes() && param.getAttributes().getNamedItem("class") != null) { + return true; + } + return false; + } + private static SetterClassPair findSetterClassPair(Object object, String itemName) throws TikaConfigException { - String setter = "set" + itemName.substring(0, 1).toUpperCase(Locale.US) + - itemName.substring(1); + + //TODO -- we could do more with info from the node -- is it complex, does it have + //a text value, does it have a class, etc... This works for now. + String setter = + "set" + itemName.substring(0, 1).toUpperCase(Locale.US) + itemName.substring(1); Class itemClass = null; Method setterMethod = null; for (Method method : object.getClass().getMethods()) { if (setter.equals(method.getName())) { Class<?>[] classes = method.getParameterTypes(); if (classes.length == 1) { - itemClass = classes[0]; - setterMethod = method; - return new SetterClassPair(setterMethod, itemClass); + //if both setX(String) and setX(Object), prefer setX(String) + if (itemClass == null || classes[0].equals(String.class)) { + itemClass = classes[0]; + setterMethod = method; + } } } } - - String adder = "add" + itemName.substring(0, 1).toUpperCase(Locale.US) + - itemName.substring(1); + if (setterMethod != null && itemClass != null) { + return new SetterClassPair(setterMethod, itemClass); + } + //now try adders + String adder = + "add" + itemName.substring(0, 1).toUpperCase(Locale.US) + itemName.substring(1); for (Method method : object.getClass().getMethods()) { if (adder.equals(method.getName())) { Class<?>[] classes = method.getParameterTypes(); if (classes.length == 1) { - itemClass = classes[0]; - setterMethod = method; - return new SetterClassPair(setterMethod, itemClass); + //if both setX(String) and setX(Object), prefer setX(String) + if (itemClass == null || classes[0].equals(String.class)) { + itemClass = classes[0]; + setterMethod = method; + } } } } - throw new TikaConfigException("Couldn't find setter '" + - setter + "' or adder '" + adder + "' for " + itemName + - " of class: " + object.getClass()); + if (setterMethod == null && itemClass == null) { + throw new TikaConfigException( + "Couldn't find setter '" + setter + "' or adder '" + adder + "' for " + itemName + + " of class: " + object.getClass()); + } + return new SetterClassPair(setterMethod, itemClass); } private static boolean hasChildNodes(Node param) { @@ -311,7 +357,7 @@ public abstract class ConfigBase { } private static void tryToSetList(Object object, Node param) throws TikaConfigException { - if (param.hasAttributes() && param.getAttributes().getNamedItem("class") != null) { + if (hasClass(param)) { tryToSetClassList(object, param); } else { tryToSetStringList(object, param); @@ -394,12 +440,12 @@ public abstract class ConfigBase { value = m.getNamedItem("v").getTextContent(); } if (key == null) { - throw new TikaConfigException("must specify a 'key' or 'from' value in a map " + - "object : " + param); + throw new TikaConfigException( + "must specify a 'key' or 'from' value in a map " + "object : " + param); } if (value == null) { - throw new TikaConfigException("must specify a 'value' or 'to' value in a " + - "map object : " + param); + throw new TikaConfigException( + "must specify a 'value' or 'to' value in a " + "map object : " + param); } map.put(key, value); } @@ -433,65 +479,25 @@ public abstract class ConfigBase { return false; } - private static void tryToSet(Object object, String name, String value) - throws TikaConfigException { - String setter = "set" + name.substring(0, 1).toUpperCase(Locale.US) + name.substring(1); - Class[] types = - new Class[]{String.class, boolean.class, long.class, int.class, double.class, - float.class}; - for (Class t : types) { - try { - Method m = object.getClass().getMethod(setter, t); - - if (t == int.class) { - try { - m.invoke(object, Integer.parseInt(value)); - return; - } catch (IllegalAccessException | InvocationTargetException e) { - throw new TikaConfigException("bad parameter " + setter, e); - } - } else if (t == long.class) { - try { - m.invoke(object, Long.parseLong(value)); - return; - } catch (IllegalAccessException | InvocationTargetException e) { - throw new TikaConfigException("bad parameter " + setter, e); - } - } else if (t == boolean.class) { - try { - m.invoke(object, Boolean.parseBoolean(value)); - return; - } catch (IllegalAccessException | InvocationTargetException e) { - throw new TikaConfigException("bad parameter " + setter, e); - } - } else if (t == float.class) { - try { - m.invoke(object, Float.parseFloat(value)); - return; - } catch (IllegalAccessException | InvocationTargetException e) { - throw new TikaConfigException("bad parameter " + setter, e); - } - } else if (t == double.class) { - try { - m.invoke(object, Double.parseDouble(value)); - return; - } catch (IllegalAccessException | InvocationTargetException e) { - throw new TikaConfigException("bad parameter " + setter, e); - } - } else { - try { - m.invoke(object, value); - return; - } catch (IllegalAccessException | InvocationTargetException e) { - throw new TikaConfigException("bad parameter " + setter, e); - } - } - } catch (NoSuchMethodException e) { - //swallow + private static void tryToSetPrimitive(Object object, SetterClassPair setterClassPair, + String value) throws TikaConfigException { + try { + if (setterClassPair.itemClass == int.class) { + setterClassPair.setterMethod.invoke(object, Integer.parseInt(value)); + } else if (setterClassPair.itemClass == long.class) { + setterClassPair.setterMethod.invoke(object, Long.parseLong(value)); + } else if (setterClassPair.itemClass == float.class) { + setterClassPair.setterMethod.invoke(object, Float.parseFloat(value)); + } else if (setterClassPair.itemClass == double.class) { + setterClassPair.setterMethod.invoke(object, Double.parseDouble(value)); + } else if (setterClassPair.itemClass == boolean.class) { + setterClassPair.setterMethod.invoke(object, Boolean.parseBoolean(value)); + } else { + setterClassPair.setterMethod.invoke(object, value); } + } catch (IllegalAccessException | InvocationTargetException e) { + throw new TikaConfigException("bad parameter " + setterClassPair + " " + value, e); } - throw new TikaConfigException( - "Couldn't find setter: " + setter + " for object " + object.getClass()); } @@ -544,9 +550,16 @@ public abstract class ConfigBase { private static class SetterClassPair { private final Method setterMethod; private final Class itemClass; + public SetterClassPair(Method setterMethod, Class itemClass) { this.setterMethod = setterMethod; this.itemClass = itemClass; } + + @Override + public String toString() { + return "SetterClassPair{" + "setterMethod=" + setterMethod + ", itemClass=" + + itemClass + '}'; + } } } diff --git a/tika-core/src/test/java/org/apache/tika/pipes/async/AsyncProcessorTest.java b/tika-core/src/test/java/org/apache/tika/pipes/async/AsyncProcessorTest.java index 4314f8379..4104866c7 100644 --- a/tika-core/src/test/java/org/apache/tika/pipes/async/AsyncProcessorTest.java +++ b/tika-core/src/test/java/org/apache/tika/pipes/async/AsyncProcessorTest.java @@ -80,18 +80,18 @@ public class AsyncProcessorTest { String xml = "<?xml version=\"1.0\" encoding=\"UTF-8\" ?>" + "<properties>" + " <emitters>" + " <emitter class=\"org.apache.tika.pipes.async.MockEmitter\">\n" + - " <params>\n" + " <name>mock</name>\n" + " </params>" + " </emitter>" + + " <name>mock</name>\n" + " </emitter>" + " </emitters>" + " <fetchers>" + " <fetcher class=\"org.apache.tika.pipes.fetcher.fs.FileSystemFetcher\">" + - " <params><name>mock</name>\n" + " <basePath>" + + " <name>mock</name>\n" + " <basePath>" + ProcessUtils.escapeCommandLine(inputDir.toAbsolutePath().toString()) + - "</basePath></params>\n" + " </fetcher>" + " </fetchers>" + - "<async><params><tikaConfig>" + + "</basePath>\n" + " </fetcher>" + " </fetchers>" + + "<async><tikaConfig>" + ProcessUtils.escapeCommandLine(tikaConfigPath.toAbsolutePath().toString()) + "</tikaConfig><forkedJvmArgs><arg>-Xmx512m</arg" + "></forkedJvmArgs><maxForEmitBatchBytes>1000000</maxForEmitBatchBytes>" + "<timeoutMillis>5000</timeoutMillis>" + - "<numClients>4</numClients></params></async>" + + "<numClients>4</numClients></async>" + "</properties>"; Files.write(tikaConfigPath, xml.getBytes(StandardCharsets.UTF_8)); Random r = new Random(); diff --git a/tika-core/src/test/java/org/apache/tika/pipes/async/MockReporterTest.java b/tika-core/src/test/java/org/apache/tika/pipes/async/MockReporterTest.java index a9bae3397..94eca802d 100644 --- a/tika-core/src/test/java/org/apache/tika/pipes/async/MockReporterTest.java +++ b/tika-core/src/test/java/org/apache/tika/pipes/async/MockReporterTest.java @@ -23,6 +23,7 @@ import java.nio.file.Path; import java.nio.file.Paths; import java.util.List; +import org.junit.jupiter.api.Disabled; import org.junit.jupiter.api.Test; import org.apache.tika.pipes.CompositePipesReporter; @@ -40,7 +41,8 @@ public class MockReporterTest { } @Test - public void testOlderCompositePipesReporter() throws Exception { + @Disabled + public void testDeprecatedCompositePipesReporter() throws Exception { Path configPath = Paths.get(this.getClass().getResource("TIKA-3865-deprecated.xml").toURI()); AsyncConfig asyncConfig = AsyncConfig.load(configPath); PipesReporter reporter = asyncConfig.getPipesReporter(); diff --git a/tika-core/src/test/resources/org/apache/tika/config/TIKA-3137-exclude.xml b/tika-core/src/test/resources/org/apache/tika/config/TIKA-3137-exclude.xml index 95ba73bd5..820084ed0 100644 --- a/tika-core/src/test/resources/org/apache/tika/config/TIKA-3137-exclude.xml +++ b/tika-core/src/test/resources/org/apache/tika/config/TIKA-3137-exclude.xml @@ -18,12 +18,10 @@ <properties> <metadataFilters> <metadataFilter class="org.apache.tika.metadata.filter.ExcludeFieldMetadataFilter"> - <params> - <exclude> - <field>title</field> - <field>author</field> - </exclude> - </params> + <exclude> + <field>title</field> + <field>author</field> + </exclude> </metadataFilter> </metadataFilters> </properties> diff --git a/tika-core/src/test/resources/org/apache/tika/config/TIKA-3137-field-mapping.xml b/tika-core/src/test/resources/org/apache/tika/config/TIKA-3137-field-mapping.xml index e5118b968..791bf21b0 100644 --- a/tika-core/src/test/resources/org/apache/tika/config/TIKA-3137-field-mapping.xml +++ b/tika-core/src/test/resources/org/apache/tika/config/TIKA-3137-field-mapping.xml @@ -18,16 +18,14 @@ <properties> <metadataFilters> <metadataFilter class="org.apache.tika.metadata.filter.FieldNameMappingFilter"> - <params> - <excludeUnmapped>true</excludeUnmapped> - <mappings> - <mapping from="X-TIKA:content" to="content"/> - <mapping from="a" to="b"/> - <!-- note that the mapping only works once...not recursively --> - <mapping from="b" to="c"/> - <mapping from="c" to="d"/> - </mappings> - </params> + <excludeUnmapped>true</excludeUnmapped> + <mappings> + <mapping from="X-TIKA:content" to="content"/> + <mapping from="a" to="b"/> + <!-- note that the mapping only works once...not recursively --> + <mapping from="b" to="c"/> + <mapping from="c" to="d"/> + </mappings> </metadataFilter> </metadataFilters> </properties> diff --git a/tika-core/src/test/resources/org/apache/tika/config/TIKA-3137-mimes-uc.xml b/tika-core/src/test/resources/org/apache/tika/config/TIKA-3137-mimes-uc.xml index 6278421f6..ebf1d65f2 100644 --- a/tika-core/src/test/resources/org/apache/tika/config/TIKA-3137-mimes-uc.xml +++ b/tika-core/src/test/resources/org/apache/tika/config/TIKA-3137-mimes-uc.xml @@ -18,12 +18,10 @@ <properties> <metadataFilters> <metadataFilter class="org.apache.tika.metadata.filter.ClearByMimeMetadataFilter"> - <params> - <mimes> - <mime>image/jpeg</mime> - <mime>application/pdf</mime> - </mimes> - </params> + <mimes> + <mime>image/jpeg</mime> + <mime>application/pdf</mime> + </mimes> </metadataFilter> <metadataFilter class="org.apache.tika.metadata.filter.MockUpperCaseFilter"/> </metadataFilters> diff --git a/tika-core/src/test/resources/org/apache/tika/config/TIKA-3594.xml b/tika-core/src/test/resources/org/apache/tika/config/TIKA-3594.xml index b789df53d..e9cbb34dc 100644 --- a/tika-core/src/test/resources/org/apache/tika/config/TIKA-3594.xml +++ b/tika-core/src/test/resources/org/apache/tika/config/TIKA-3594.xml @@ -20,9 +20,7 @@ <parser class="org.apache.tika.parser.DefaultParser"/> </parsers> <autoDetectParserConfig> - <params> - <spoolToDisk>12345</spoolToDisk> - <outputThreshold>6789</outputThreshold> - </params> + <spoolToDisk>12345</spoolToDisk> + <outputThreshold>6789</outputThreshold> </autoDetectParserConfig> </properties> diff --git a/tika-core/src/test/resources/org/apache/tika/config/emitters-config.xml b/tika-core/src/test/resources/org/apache/tika/config/emitters-config.xml index 087c85f9b..89e7826be 100644 --- a/tika-core/src/test/resources/org/apache/tika/config/emitters-config.xml +++ b/tika-core/src/test/resources/org/apache/tika/config/emitters-config.xml @@ -18,14 +18,10 @@ <properties> <emitters> <emitter class="org.apache.tika.pipes.async.MockEmitter"> - <params> - <name>em1</name> - </params> + <name>em1</name> </emitter> <emitter class="org.apache.tika.pipes.async.MockEmitter"> - <params> - <name>em2</name> - </params> + <name>em2</name> </emitter> </emitters> </properties> diff --git a/tika-core/src/test/resources/org/apache/tika/config/fetchers-config.xml b/tika-core/src/test/resources/org/apache/tika/config/fetchers-config.xml index 6858eef51..cc87ccee9 100644 --- a/tika-core/src/test/resources/org/apache/tika/config/fetchers-config.xml +++ b/tika-core/src/test/resources/org/apache/tika/config/fetchers-config.xml @@ -18,16 +18,12 @@ <properties> <fetchers> <fetcher class="org.apache.tika.pipes.fetcher.fs.FileSystemFetcher"> - <params> - <name>fs1</name> - <basePath>/my/base/path1</basePath> - </params> + <name>fs1</name> + <basePath>/my/base/path1</basePath> </fetcher> <fetcher class="org.apache.tika.pipes.fetcher.fs.FileSystemFetcher"> - <params> - <name>fs2</name> - <basePath>/my/base/path2</basePath> - </params> + <name>fs2</name> + <basePath>/my/base/path2</basePath> </fetcher> </fetchers> </properties> diff --git a/tika-core/src/test/resources/org/apache/tika/config/fetchers-nobasepath-config.xml b/tika-core/src/test/resources/org/apache/tika/config/fetchers-nobasepath-config.xml index 0cd79e4cd..74f5f9000 100644 --- a/tika-core/src/test/resources/org/apache/tika/config/fetchers-nobasepath-config.xml +++ b/tika-core/src/test/resources/org/apache/tika/config/fetchers-nobasepath-config.xml @@ -18,15 +18,11 @@ <properties> <fetchers> <fetcher class="org.apache.tika.pipes.fetcher.fs.FileSystemFetcher"> - <params> - <name>fs1</name> - <basePath>/my/base/path1</basePath> - </params> + <name>fs1</name> + <basePath>/my/base/path1</basePath> </fetcher> <fetcher class="org.apache.tika.pipes.fetcher.fs.FileSystemFetcher"> - <params> - <name>fs2</name> - </params> + <name>fs2</name> </fetcher> </fetchers> </properties> diff --git a/tika-core/src/test/resources/org/apache/tika/pipes/async/TIKA-3507.xml b/tika-core/src/test/resources/org/apache/tika/pipes/async/TIKA-3507.xml index 29e219a99..2d48bc511 100644 --- a/tika-core/src/test/resources/org/apache/tika/pipes/async/TIKA-3507.xml +++ b/tika-core/src/test/resources/org/apache/tika/pipes/async/TIKA-3507.xml @@ -19,16 +19,12 @@ --> <properties> <async> - <params> - <maxForEmitBatchBytes>10000</maxForEmitBatchBytes> - <emitMaxEstimatedBytes>100000</emitMaxEstimatedBytes> - <emitWithinMillis>60000</emitWithinMillis> - <numEmitters>1</numEmitters> - </params> + <maxForEmitBatchBytes>10000</maxForEmitBatchBytes> + <emitMaxEstimatedBytes>100000</emitMaxEstimatedBytes> + <emitWithinMillis>60000</emitWithinMillis> + <numEmitters>1</numEmitters> <pipesReporter class="org.apache.tika.pipes.async.MockReporter"> - <params> - <endpoint>somethingOrOther</endpoint> - </params> + <endpoint>somethingOrOther</endpoint> </pipesReporter> </async> </properties> \ No newline at end of file diff --git a/tika-fuzzing/src/test/resources/configs/tika-fuzzing-config.xml b/tika-fuzzing/src/test/resources/configs/tika-fuzzing-config.xml index 4b255b06b..2210ae68e 100644 --- a/tika-fuzzing/src/test/resources/configs/tika-fuzzing-config.xml +++ b/tika-fuzzing/src/test/resources/configs/tika-fuzzing-config.xml @@ -22,48 +22,36 @@ in the base paths. We need the "empty" fetchers and emitters to handle the temp files that are created via fuzzing--> <properties> - <fetchers> - <fetcher class="org.apache.tika.pipes.fetcher.fs.FileSystemFetcher"> - <params> - <name>fsf</name> - <basePath>{FILL_IN_HERE}</basePath> - </params> - </fetcher> - <fetcher class="org.apache.tika.pipes.fetcher.fs.FileSystemFetcher"> - <params> - <name>temp</name> - </params> - </fetcher> - </fetchers> - <emitters> - <emitter class="org.apache.tika.pipes.emitter.fs.FileSystemEmitter"> - <params> - <name>fse</name> - <basePath>{FILL_IN_HERE}</basePath> - </params> - </emitter> - <emitter class="org.apache.tika.pipes.emitter.fs.FileSystemEmitter"> - <params> - <name>temp</name> - </params> - </emitter> - </emitters> - <pipesIterator class="org.apache.tika.pipes.pipesiterator.fs.FileSystemPipesIterator"> - <params> - <basePath>{FILL_IN_HERE}</basePath> - <fetcherName>fsf</fetcherName> - <emitterName>fse</emitterName> - </params> - </pipesIterator> - <pipes> - <params> - <numClients>5</numClients> - <forkedJvmArgs> - <arg>-Xmx1g</arg> - <arg>-XX:ParallelGCThreads=2</arg> - <arg>-Dlog4j.configurationFile={FILL_IN_HERE}</arg> - </forkedJvmArgs> - <timeoutMillis>10000</timeoutMillis> - </params> - </pipes> + <fetchers> + <fetcher class="org.apache.tika.pipes.fetcher.fs.FileSystemFetcher"> + <name>fsf</name> + <basePath>{FILL_IN_HERE}</basePath> + </fetcher> + <fetcher class="org.apache.tika.pipes.fetcher.fs.FileSystemFetcher"> + <name>temp</name> + </fetcher> + </fetchers> + <emitters> + <emitter class="org.apache.tika.pipes.emitter.fs.FileSystemEmitter"> + <name>fse</name> + <basePath>{FILL_IN_HERE}</basePath> + </emitter> + <emitter class="org.apache.tika.pipes.emitter.fs.FileSystemEmitter"> + <name>temp</name> + </emitter> + </emitters> + <pipesIterator class="org.apache.tika.pipes.pipesiterator.fs.FileSystemPipesIterator"> + <basePath>{FILL_IN_HERE}</basePath> + <fetcherName>fsf</fetcherName> + <emitterName>fse</emitterName> + </pipesIterator> + <pipes> + <numClients>5</numClients> + <forkedJvmArgs> + <arg>-Xmx1g</arg> + <arg>-XX:ParallelGCThreads=2</arg> + <arg>-Dlog4j.configurationFile={FILL_IN_HERE}</arg> + </forkedJvmArgs> + <timeoutMillis>10000</timeoutMillis> + </pipes> </properties> \ No newline at end of file diff --git a/tika-integration-tests/tika-pipes-opensearch-integration-tests/src/test/resources/opensearch/tika-config-opensearch.xml b/tika-integration-tests/tika-pipes-opensearch-integration-tests/src/test/resources/opensearch/tika-config-opensearch.xml index f02f05c92..ca934d2f0 100644 --- a/tika-integration-tests/tika-pipes-opensearch-integration-tests/src/test/resources/opensearch/tika-config-opensearch.xml +++ b/tika-integration-tests/tika-pipes-opensearch-integration-tests/src/test/resources/opensearch/tika-config-opensearch.xml @@ -51,76 +51,64 @@ --> <metadataFilter class="org.apache.tika.metadata.filter.DateNormalizingMetadataFilter"/> <metadataFilter class="org.apache.tika.metadata.filter.FieldNameMappingFilter"> - <params> - <excludeUnmapped>true</excludeUnmapped> - <mappings> - <mapping from="X-TIKA:content" to="content"/> - <mapping from="Content-Length" to="length"/> - <mapping from="dc:creator" to="creators"/> - <mapping from="dc:title" to="title"/> - <mapping from="Content-Type" to="mime"/> - <mapping from="X-TIKA:EXCEPTION:container_exception" to="tika_exception"/> - </mappings> - </params> + <excludeUnmapped>true</excludeUnmapped> + <mappings> + <mapping from="X-TIKA:content" to="content"/> + <mapping from="Content-Length" to="length"/> + <mapping from="dc:creator" to="creators"/> + <mapping from="dc:title" to="title"/> + <mapping from="Content-Type" to="mime"/> + <mapping from="X-TIKA:EXCEPTION:container_exception" to="tika_exception"/> + </mappings> </metadataFilter> </metadataFilters> <async> - <params> - <maxForEmitBatchBytes>10000</maxForEmitBatchBytes> - <emitMaxEstimatedBytes>100000</emitMaxEstimatedBytes> - <emitWithinMillis>60000</emitWithinMillis> - <numEmitters>1</numEmitters> - <numClients>3</numClients> - <tikaConfig>{TIKA_CONFIG}</tikaConfig> - <forkedJvmArgs> - <arg>-Xmx512m</arg> - <arg>-XX:ParallelGCThreads=2</arg> - <arg>-Dlog4j.configurationFile={LOG4J_PROPERTIES_FILE}</arg> - </forkedJvmArgs> - <timeoutMillis>60000</timeoutMillis> - </params> + <maxForEmitBatchBytes>10000</maxForEmitBatchBytes> + <emitMaxEstimatedBytes>100000</emitMaxEstimatedBytes> + <emitWithinMillis>60000</emitWithinMillis> + <numEmitters>1</numEmitters> + <numClients>3</numClients> + <tikaConfig>{TIKA_CONFIG}</tikaConfig> + <forkedJvmArgs> + <arg>-Xmx512m</arg> + <arg>-XX:ParallelGCThreads=2</arg> + <arg>-Dlog4j.configurationFile={LOG4J_PROPERTIES_FILE}</arg> + </forkedJvmArgs> + <timeoutMillis>60000</timeoutMillis> <pipesReporter class="org.apache.tika.pipes.reporters.opensearch.OpenSearchPipesReporter"> - <params> - <openSearchUrl>{OPENSEARCH_CONNECTION}</openSearchUrl> - <keyPrefix>my_test_</keyPrefix> - <connectionTimeout>10000</connectionTimeout> - <socketTimeout>60000</socketTimeout> - <includeRouting>{INCLUDE_ROUTING}</includeRouting> - <userName>admin</userName> - <password>admin</password> - </params> + <openSearchUrl>{OPENSEARCH_CONNECTION}</openSearchUrl> + <keyPrefix>my_test_</keyPrefix> + <connectionTimeout>10000</connectionTimeout> + <socketTimeout>60000</socketTimeout> + <includeRouting>{INCLUDE_ROUTING}</includeRouting> + <userName>admin</userName> + <password>admin</password> </pipesReporter> </async> <fetchers> <fetcher class="org.apache.tika.pipes.fetcher.fs.FileSystemFetcher"> - <params> - <name>fsf</name> - <basePath>{PATH_TO_DOCS}</basePath> - </params> + <name>fsf</name> + <basePath>{PATH_TO_DOCS}</basePath> </fetcher> </fetchers> <emitters> <emitter class="org.apache.tika.pipes.emitter.opensearch.OpenSearchEmitter"> - <params> - <name>ose</name> - <openSearchUrl>{OPENSEARCH_CONNECTION}</openSearchUrl> - <updateStrategy>{UPDATE_STRATEGY}</updateStrategy> - <attachmentStrategy>{ATTACHMENT_STRATEGY}</attachmentStrategy> - <commitWithin>10</commitWithin> - <idField>_id</idField> - <connectionTimeout>10000</connectionTimeout> - <socketTimeout>60000</socketTimeout> - <userName>admin</userName> - <password>admin</password> - </params> + <name>ose</name> + <openSearchUrl>{OPENSEARCH_CONNECTION}</openSearchUrl> + <updateStrategy>{UPDATE_STRATEGY}</updateStrategy> + <attachmentStrategy>{ATTACHMENT_STRATEGY}</attachmentStrategy> + <commitWithin>10</commitWithin> + <idField>_id</idField> + <connectionTimeout>10000</connectionTimeout> + <socketTimeout>60000</socketTimeout> + <userName>admin</userName> + <password>admin</password> </emitter> </emitters> <pipesIterator class="org.apache.tika.pipes.pipesiterator.fs.FileSystemPipesIterator"> - <params> - <basePath>{PATH_TO_DOCS}</basePath> - <fetcherName>fsf</fetcherName> - <emitterName>ose</emitterName> - <parseMode>{PARSE_MODE}</parseMode> - </params> + <basePath>{PATH_TO_DOCS}</basePath> + <fetcherName>fsf</fetcherName> + <emitterName>ose</emitterName> + <parseMode>{PARSE_MODE}</parseMode> </pipesIterator> </properties> diff --git a/tika-integration-tests/tika-pipes-s3-integration-tests/src/test/resources/tika-config-s3-integration-test.xml b/tika-integration-tests/tika-pipes-s3-integration-tests/src/test/resources/tika-config-s3-integration-test.xml index 891e2cf19..7ddad32d3 100644 --- a/tika-integration-tests/tika-pipes-s3-integration-tests/src/test/resources/tika-config-s3-integration-test.xml +++ b/tika-integration-tests/tika-pipes-s3-integration-tests/src/test/resources/tika-config-s3-integration-test.xml @@ -51,75 +51,65 @@ --> <metadataFilter class="org.apache.tika.metadata.filter.DateNormalizingMetadataFilter"/> <metadataFilter class="org.apache.tika.metadata.filter.FieldNameMappingFilter"> - <params> - <excludeUnmapped>true</excludeUnmapped> - <mappings> - <mapping from="X-TIKA:content" to="content_s"/> - <mapping from="Content-Length" to="length_i"/> - <mapping from="dc:creator" to="creators_ss"/> - <mapping from="dc:title" to="title_s"/> - <mapping from="Content-Type" to="mime_s"/> - <mapping from="X-TIKA:EXCEPTION:container_exception" to="tika_exception_s"/> - </mappings> - </params> + <excludeUnmapped>true</excludeUnmapped> + <mappings> + <mapping from="X-TIKA:content" to="content_s"/> + <mapping from="Content-Length" to="length_i"/> + <mapping from="dc:creator" to="creators_ss"/> + <mapping from="dc:title" to="title_s"/> + <mapping from="Content-Type" to="mime_s"/> + <mapping from="X-TIKA:EXCEPTION:container_exception" to="tika_exception_s"/> + </mappings> </metadataFilter> </metadataFilters> <async> - <params> - <maxForEmitBatchBytes>10000</maxForEmitBatchBytes> - <emitMaxEstimatedBytes>100000</emitMaxEstimatedBytes> - <emitWithinMillis>10</emitWithinMillis> - <numEmitters>1</numEmitters> - <numClients>1</numClients> - <tikaConfig>{TIKA_CONFIG}</tikaConfig> - <forkedJvmArgs> - <arg>-Xmx1g</arg> - <arg>-XX:ParallelGCThreads=2</arg> - <arg>-XX:+ExitOnOutOfMemoryError</arg> - <arg>-Dlog4j.configurationFile={LOG4J_PROPERTIES_FILE}</arg> - </forkedJvmArgs> - <timeoutMillis>60000</timeoutMillis> - </params> + <maxForEmitBatchBytes>10000</maxForEmitBatchBytes> + <emitMaxEstimatedBytes>100000</emitMaxEstimatedBytes> + <emitWithinMillis>10</emitWithinMillis> + <numEmitters>1</numEmitters> + <numClients>1</numClients> + <tikaConfig>{TIKA_CONFIG}</tikaConfig> + <forkedJvmArgs> + <arg>-Xmx1g</arg> + <arg>-XX:ParallelGCThreads=2</arg> + <arg>-XX:+ExitOnOutOfMemoryError</arg> + <arg>-Dlog4j.configurationFile={LOG4J_PROPERTIES_FILE}</arg> + </forkedJvmArgs> + <timeoutMillis>60000</timeoutMillis> </async> <fetchers> <fetcher class="org.apache.tika.pipes.fetcher.s3.S3Fetcher"> - <params> - <name>s3f</name> - <region>{REGION}</region> - <bucket>{FETCH_BUCKET}</bucket> - <credentialsProvider>key_secret</credentialsProvider> - <accessKey>{ACCESS_KEY}</accessKey> - <secretKey>{SECRET_KEY}</secretKey> - <endpointConfigurationService>{ENDPOINT_CONFIGURATION_SERVICE}</endpointConfigurationService> - <pathStyleAccessEnabled>true</pathStyleAccessEnabled> - </params> - </fetcher> - </fetchers> - <pipesIterator class="org.apache.tika.pipes.pipesiterator.s3.S3PipesIterator"> - <params> - <emitterName>s3e</emitterName> - <fetcherName>s3f</fetcherName> + <name>s3f</name> <region>{REGION}</region> - <bucket>{PIPE_ITERATOR_BUCKET}</bucket> + <bucket>{FETCH_BUCKET}</bucket> <credentialsProvider>key_secret</credentialsProvider> <accessKey>{ACCESS_KEY}</accessKey> <secretKey>{SECRET_KEY}</secretKey> <endpointConfigurationService>{ENDPOINT_CONFIGURATION_SERVICE}</endpointConfigurationService> <pathStyleAccessEnabled>true</pathStyleAccessEnabled> - </params> + </fetcher> + </fetchers> + <pipesIterator class="org.apache.tika.pipes.pipesiterator.s3.S3PipesIterator"> + <emitterName>s3e</emitterName> + <fetcherName>s3f</fetcherName> + <region>{REGION}</region> + <bucket>{PIPE_ITERATOR_BUCKET}</bucket> + <credentialsProvider>key_secret</credentialsProvider> + <accessKey>{ACCESS_KEY}</accessKey> + <secretKey>{SECRET_KEY}</secretKey> + <endpointConfigurationService>{ENDPOINT_CONFIGURATION_SERVICE}</endpointConfigurationService> + <pathStyleAccessEnabled>true</pathStyleAccessEnabled> </pipesIterator> <emitters> <emitter class="org.apache.tika.pipes.emitter.s3.S3Emitter"> - <params> - <name>s3e</name> - <region>{REGION}</region> - <bucket>{EMIT_BUCKET}</bucket> - <credentialsProvider>key_secret</credentialsProvider> - <accessKey>{ACCESS_KEY}</accessKey> - <secretKey>{SECRET_KEY}</secretKey> - <endpointConfigurationService>{ENDPOINT_CONFIGURATION_SERVICE}</endpointConfigurationService> - <pathStyleAccessEnabled>true</pathStyleAccessEnabled> - </params> + <name>s3e</name> + <region>{REGION}</region> + <bucket>{EMIT_BUCKET}</bucket> + <credentialsProvider>key_secret</credentialsProvider> + <accessKey>{ACCESS_KEY}</accessKey> + <secretKey>{SECRET_KEY}</secretKey> + <endpointConfigurationService>{ENDPOINT_CONFIGURATION_SERVICE}</endpointConfigurationService> + <pathStyleAccessEnabled>true</pathStyleAccessEnabled> </emitter> </emitters> </properties> \ No newline at end of file diff --git a/tika-integration-tests/tika-pipes-s3-integration-tests/src/test/resources/tika-config-s3ToFs.xml b/tika-integration-tests/tika-pipes-s3-integration-tests/src/test/resources/tika-config-s3ToFs.xml index c0be9988e..ccb4f1b12 100644 --- a/tika-integration-tests/tika-pipes-s3-integration-tests/src/test/resources/tika-config-s3ToFs.xml +++ b/tika-integration-tests/tika-pipes-s3-integration-tests/src/test/resources/tika-config-s3ToFs.xml @@ -18,23 +18,19 @@ under the License. --> <properties> - <fetchers> - <fetcher class="org.apache.tika.pipes.fetcher.s3.S3Fetcher"> - <params> - <name>s3</name> - <region>us-east-1</region> - <profile><!-- fill in here --></profile> - </params> - </fetcher> - </fetchers> - <pipesIterators> - <pipesIterator class="org.apache.tika.pipes.pipesiterator.s3.S3PipesIterator"> - <params> - <fetcherName>s3</fetcherName> - <bucket><!-- fill in here --></bucket> - <region>us-east-1</region> - <profile><!-- fill in here --></profile> - </params> - </pipesIterator> - </pipesIterators> + <fetchers> + <fetcher class="org.apache.tika.pipes.fetcher.s3.S3Fetcher"> + <name>s3</name> + <region>us-east-1</region> + <profile><!-- fill in here --></profile> + </fetcher> + </fetchers> + <pipesIterators> + <pipesIterator class="org.apache.tika.pipes.pipesiterator.s3.S3PipesIterator"> + <fetcherName>s3</fetcherName> + <bucket><!-- fill in here --></bucket> + <region>us-east-1</region> + <profile><!-- fill in here --></profile> + </pipesIterator> + </pipesIterators> </properties> \ No newline at end of file diff --git a/tika-integration-tests/tika-pipes-s3-integration-tests/src/test/resources/tika-config-s3Tos3.xml b/tika-integration-tests/tika-pipes-s3-integration-tests/src/test/resources/tika-config-s3Tos3.xml index 3302b962f..755ef2da3 100644 --- a/tika-integration-tests/tika-pipes-s3-integration-tests/src/test/resources/tika-config-s3Tos3.xml +++ b/tika-integration-tests/tika-pipes-s3-integration-tests/src/test/resources/tika-config-s3Tos3.xml @@ -18,35 +18,29 @@ under the License. --> <properties> - <fetchers> - <fetcher class="org.apache.tika.pipes.fetcher.s3.S3Fetcher"> - <params> - <name>s3f</name> - <region>us-east-1</region> - <bucket><!-- fill in here --></bucket> - <profile><!-- fill in here --></profile> - </params> - </fetcher> - </fetchers> - <pipesIterators> - <pipesIterator class="org.apache.tika.pipes.pipesiterator.s3.S3PipesIterator"> - <params> - <fetcherName>s3f</fetcherName> - <region>us-east-1</region> - <bucket><!-- fill in here --></bucket> - <profile><!-- fill in here --></profile> - </params> - </pipesIterator> - </pipesIterators> - <emitters> - <emitter class="org.apache.tika.pipes.emitter.s3.S3Emitter"> - <params> - <name>s3e</name> - <region>us-east-1</region> - <bucket><!-- fill in here -->></bucket> - <profile><!-- fill in here --></profile> - <fileExtension></fileExtension> - </params> - </emitter> - </emitters> + <fetchers> + <fetcher class="org.apache.tika.pipes.fetcher.s3.S3Fetcher"> + <name>s3f</name> + <region>us-east-1</region> + <bucket><!-- fill in here --></bucket> + <profile><!-- fill in here --></profile> + </fetcher> + </fetchers> + <pipesIterators> + <pipesIterator class="org.apache.tika.pipes.pipesiterator.s3.S3PipesIterator"> + <fetcherName>s3f</fetcherName> + <region>us-east-1</region> + <bucket><!-- fill in here --></bucket> + <profile><!-- fill in here --></profile> + </pipesIterator> + </pipesIterators> + <emitters> + <emitter class="org.apache.tika.pipes.emitter.s3.S3Emitter"> + <name>s3e</name> + <region>us-east-1</region> + <bucket><!-- fill in here -->></bucket> + <profile><!-- fill in here --></profile> + <fileExtension></fileExtension> + </emitter> + </emitters> </properties> \ No newline at end of file diff --git a/tika-integration-tests/tika-pipes-solr-integration-tests/src/test/resources/tika-config-solr-urls.xml b/tika-integration-tests/tika-pipes-solr-integration-tests/src/test/resources/tika-config-solr-urls.xml index fc7629366..3345ab487 100644 --- a/tika-integration-tests/tika-pipes-solr-integration-tests/src/test/resources/tika-config-solr-urls.xml +++ b/tika-integration-tests/tika-pipes-solr-integration-tests/src/test/resources/tika-config-solr-urls.xml @@ -51,77 +51,65 @@ --> <metadataFilter class="org.apache.tika.metadata.filter.DateNormalizingMetadataFilter"/> <metadataFilter class="org.apache.tika.metadata.filter.FieldNameMappingFilter"> - <params> - <excludeUnmapped>true</excludeUnmapped> - <mappings> - <mapping from="X-TIKA:content" to="content_s"/> - <mapping from="Content-Length" to="length_i"/> - <mapping from="dc:creator" to="creators_ss"/> - <mapping from="dc:title" to="title_s"/> - <mapping from="Content-Type" to="mime_s"/> - <mapping from="X-TIKA:EXCEPTION:container_exception" to="tika_exception_s"/> - </mappings> - </params> + <excludeUnmapped>true</excludeUnmapped> + <mappings> + <mapping from="X-TIKA:content" to="content_s"/> + <mapping from="Content-Length" to="length_i"/> + <mapping from="dc:creator" to="creators_ss"/> + <mapping from="dc:title" to="title_s"/> + <mapping from="Content-Type" to="mime_s"/> + <mapping from="X-TIKA:EXCEPTION:container_exception" to="tika_exception_s"/> + </mappings> </metadataFilter> </metadataFilters> <async> - <params> - <maxForEmitBatchBytes>10000</maxForEmitBatchBytes> - <emitMaxEstimatedBytes>100000</emitMaxEstimatedBytes> - <emitWithinMillis>10</emitWithinMillis> - <numEmitters>1</numEmitters> - <numClients>1</numClients> - <tikaConfig>{TIKA_CONFIG}</tikaConfig> - <forkedJvmArgs> - <arg>-Xmx1g</arg> - <arg>-XX:ParallelGCThreads=2</arg> - <arg>-XX:+ExitOnOutOfMemoryError</arg> - <arg>-Dlog4j.configurationFile={LOG4J_PROPERTIES_FILE}</arg> - </forkedJvmArgs> - <timeoutMillis>60000</timeoutMillis> - </params> + <maxForEmitBatchBytes>10000</maxForEmitBatchBytes> + <emitMaxEstimatedBytes>100000</emitMaxEstimatedBytes> + <emitWithinMillis>10</emitWithinMillis> + <numEmitters>1</numEmitters> + <numClients>1</numClients> + <tikaConfig>{TIKA_CONFIG}</tikaConfig> + <forkedJvmArgs> + <arg>-Xmx1g</arg> + <arg>-XX:ParallelGCThreads=2</arg> + <arg>-XX:+ExitOnOutOfMemoryError</arg> + <arg>-Dlog4j.configurationFile={LOG4J_PROPERTIES_FILE}</arg> + </forkedJvmArgs> + <timeoutMillis>60000</timeoutMillis> </async> <fetchers> <fetcher class="org.apache.tika.pipes.fetcher.fs.FileSystemFetcher"> - <params> - <name>fsf</name> - <basePath>{PATH_TO_DOCS}</basePath> - </params> + <name>fsf</name> + <basePath>{PATH_TO_DOCS}</basePath> </fetcher> </fetchers> <emitters> <emitter class="org.apache.tika.pipes.emitter.solr.SolrEmitter"> - <params> - <name>se</name> - {SOLR_CONNECTION} - <updateStrategy>{UPDATE_STRATEGY}</updateStrategy> - <solrCollection>testcol</solrCollection> - <attachmentStrategy>{ATTACHMENT_STRATEGY}</attachmentStrategy> - <commitWithin>1</commitWithin> - <idField>id</idField> - <connectionTimeout>10000</connectionTimeout> - <socketTimeout>60000</socketTimeout> - </params> + <name>se</name> + {SOLR_CONNECTION} + <updateStrategy>{UPDATE_STRATEGY}</updateStrategy> + <solrCollection>testcol</solrCollection> + <attachmentStrategy>{ATTACHMENT_STRATEGY}</attachmentStrategy> + <commitWithin>1</commitWithin> + <idField>id</idField> + <connectionTimeout>10000</connectionTimeout> + <socketTimeout>60000</socketTimeout> </emitter> <emitter class="org.apache.tika.pipes.emitter.fs.FileSystemEmitter"> - <params> - <name>fse</name> - <basePath>/path/to/extracts</basePath> - </params> + <name>fse</name> + <basePath>/path/to/extracts</basePath> </emitter> </emitters> <pipesIterator class="org.apache.tika.pipes.pipesiterator.solr.SolrPipesIterator"> - <params> - <solrCollection>testcol</solrCollection> - {SOLR_CONNECTION} - <idField>id</idField> - <parsingIdField>parsing_id_i</parsingIdField> - <failCountField>fail_count_i</failCountField> - <sizeFieldName>size_i</sizeFieldName> - <parseMode>{PARSE_MODE}</parseMode> - <rows>100</rows> - <fetcherName>fsf</fetcherName> - <emitterName>se</emitterName> - </params> + <solrCollection>testcol</solrCollection> + {SOLR_CONNECTION} + <idField>id</idField> + <parsingIdField>parsing_id_i</parsingIdField> + <failCountField>fail_count_i</failCountField> + <sizeFieldName>size_i</sizeFieldName> + <parseMode>{PARSE_MODE}</parseMode> + <rows>100</rows> + <fetcherName>fsf</fetcherName> + <emitterName>se</emitterName> </pipesIterator> </properties> \ No newline at end of file diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/tika-config-digests-pdf-only.xml b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/tika-config-digests-pdf-only.xml index 8b8b12ac9..03be973bf 100644 --- a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/tika-config-digests-pdf-only.xml +++ b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/tika-config-digests-pdf-only.xml @@ -22,16 +22,12 @@ </parser> </parsers> <autoDetectParserConfig> - <params> - <spoolToDisk>1000000</spoolToDisk> - <outputThreshold>1000000</outputThreshold> - </params> + <spoolToDisk>1000000</spoolToDisk> + <outputThreshold>1000000</outputThreshold> <digesterFactory class="org.apache.tika.parser.digestutils.CommonsDigesterFactory"> - <params> - <markLimit>100000</markLimit> - <algorithmString>sha256:32,md5</algorithmString> - </params> + <markLimit>100000</markLimit> + <algorithmString>sha256:32,md5</algorithmString> </digesterFactory> </autoDetectParserConfig> </properties> diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/tika-config-digests.xml b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/tika-config-digests.xml index 0f71a3072..0ec913d50 100644 --- a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/tika-config-digests.xml +++ b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/tika-config-digests.xml @@ -20,16 +20,12 @@ <parser class="org.apache.tika.parser.DefaultParser"/> </parsers> <autoDetectParserConfig> - <params> - <spoolToDisk>1000000</spoolToDisk> - <outputThreshold>1000000</outputThreshold> - </params> + <spoolToDisk>1000000</spoolToDisk> + <outputThreshold>1000000</outputThreshold> <digesterFactory class="org.apache.tika.parser.digestutils.CommonsDigesterFactory"> - <params> - <markLimit>100000</markLimit> - <algorithmString>sha256:32,md5</algorithmString> - </params> + <markLimit>100000</markLimit> + <algorithmString>sha256:32,md5</algorithmString> </digesterFactory> </autoDetectParserConfig> </properties> diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/tika-config-doubling-custom-handler-decorator.xml b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/tika-config-doubling-custom-handler-decorator.xml index a15cb86b6..7892f4687 100644 --- a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/tika-config-doubling-custom-handler-decorator.xml +++ b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/tika-config-doubling-custom-handler-decorator.xml @@ -20,10 +20,8 @@ <parser class="org.apache.tika.parser.DefaultParser"/> </parsers> <autoDetectParserConfig> - <params> - <spoolToDisk>123450</spoolToDisk> - <outputThreshold>678900</outputThreshold> - </params> + <spoolToDisk>123450</spoolToDisk> + <outputThreshold>678900</outputThreshold> <contentHandlerDecoratorFactory class="org.apache.tika.sax.DoublingContentHandlerDecoratorFactory"/> </autoDetectParserConfig> </properties> diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/tika-config-geo-point-metadata-filter.xml b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/tika-config-geo-point-metadata-filter.xml index 7188aedd2..92942cfe1 100644 --- a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/tika-config-geo-point-metadata-filter.xml +++ b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/tika-config-geo-point-metadata-filter.xml @@ -18,9 +18,7 @@ <properties> <metadataFilters> <metadataFilter class="org.apache.tika.metadata.filter.GeoPointMetadataFilter"> - <params> - <geoPointFieldName>myGeoPoint</geoPointFieldName> - </params> + <geoPointFieldName>myGeoPoint</geoPointFieldName> </metadataFilter> </metadataFilters> </properties> diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/tika-config-no-names.xml b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/tika-config-no-names.xml index 6f7cc95e1..0e2f26bd2 100644 --- a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/tika-config-no-names.xml +++ b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/tika-config-no-names.xml @@ -20,14 +20,10 @@ <parser class="org.apache.tika.parser.DefaultParser"/> </parsers> <autoDetectParserConfig> - <params> - <spoolToDisk>123450</spoolToDisk> - <outputThreshold>678900</outputThreshold> - </params> + <spoolToDisk>123450</spoolToDisk> + <outputThreshold>678900</outputThreshold> <embeddedDocumentExtractorFactory class="org.apache.tika.extractor.ParsingEmbeddedDocumentExtractorFactory"> - <params> - <writeFileNameToContent>false</writeFileNameToContent> - </params> + <writeFileNameToContent>false</writeFileNameToContent> </embeddedDocumentExtractorFactory> </autoDetectParserConfig> </properties> diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/tika-config-upcasing-custom-handler-decorator.xml b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/tika-config-upcasing-custom-handler-decorator.xml index dabb47ded..9e978476b 100644 --- a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/tika-config-upcasing-custom-handler-decorator.xml +++ b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/tika-config-upcasing-custom-handler-decorator.xml @@ -20,10 +20,8 @@ <parser class="org.apache.tika.parser.DefaultParser"/> </parsers> <autoDetectParserConfig> - <params> - <spoolToDisk>123450</spoolToDisk> - <outputThreshold>678900</outputThreshold> - </params> + <spoolToDisk>123450</spoolToDisk> + <outputThreshold>678900</outputThreshold> <contentHandlerDecoratorFactory class="org.apache.tika.sax.UpcasingContentHandlerDecoratorFactory"/> </autoDetectParserConfig> </properties> diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/tika-config-with-names.xml b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/tika-config-with-names.xml index dae145194..f54eb9a0a 100644 --- a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/tika-config-with-names.xml +++ b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/tika-config-with-names.xml @@ -20,14 +20,10 @@ <parser class="org.apache.tika.parser.DefaultParser"/> </parsers> <autoDetectParserConfig> - <params> - <spoolToDisk>123450</spoolToDisk> - <outputThreshold>678900</outputThreshold> - </params> + <spoolToDisk>123450</spoolToDisk> + <outputThreshold>678900</outputThreshold> <embeddedDocumentExtractorFactory class="org.apache.tika.extractor.ParsingEmbeddedDocumentExtractorFactory"> - <params> - <writeFileNameToContent>true</writeFileNameToContent> - </params> + <writeFileNameToContent>true</writeFileNameToContent> </embeddedDocumentExtractorFactory> </autoDetectParserConfig> </properties> diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/org/apache/tika/parser/TIKA-3137-include.xml b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/org/apache/tika/parser/TIKA-3137-include.xml index 056e64c6f..cff7e8f96 100644 --- a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/org/apache/tika/parser/TIKA-3137-include.xml +++ b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/org/apache/tika/parser/TIKA-3137-include.xml @@ -18,21 +18,17 @@ <properties> <metadataFilters> <metadataFilter class="org.apache.tika.metadata.filter.IncludeFieldMetadataFilter"> - <params> - <include> - <field>X-TIKA:content</field> - <field>extended-properties:Application</field> - <field>Content-Type</field> - </include> - </params> + <include> + <field>X-TIKA:content</field> + <field>extended-properties:Application</field> + <field>Content-Type</field> + </include> </metadataFilter> <metadataFilter class="org.apache.tika.metadata.filter.ClearByMimeMetadataFilter"> - <params> - <mimes> - <mime>image/emf</mime> - <mime>text/plain</mime> - </mimes> - </params> + <mimes> + <mime>image/emf</mime> + <mime>text/plain</mime> + </mimes> </metadataFilter> </metadataFilters> </properties> diff --git a/tika-pipes/tika-async-cli/src/test/resources/tika-config-broken.xml b/tika-pipes/tika-async-cli/src/test/resources/tika-config-broken.xml index 75b10da92..5ee379e6f 100644 --- a/tika-pipes/tika-async-cli/src/test/resources/tika-config-broken.xml +++ b/tika-pipes/tika-async-cli/src/test/resources/tika-config-broken.xml @@ -20,17 +20,13 @@ <properties> <fetchers> <fetcher class="org.apache.tika.pipes.fetcher.s3.S3Fetcher"> - <params> - <name>s3</name> - <region>us-east-1</region> - <profile><!-- fill in here --></profile> - </params> + <name>s3</name> + <region>us-east-1</region> + <profile><!-- fill in here --></profile> </fetcher> </fetchers> - <pipesIterator class="org.apache.tika.pipes.pipesiterator.fs.FileSystemPipesIterator"> - <params> - <fetcherName>fs</fetcherName> - <basePath>basePath</basePath> - </params> - </pipesIterator> + <pipesIterator class="org.apache.tika.pipes.pipesiterator.fs.FileSystemPipesIterator"> + <fetcherName>fs</fetcherName> + <basePath>basePath</basePath> + </pipesIterator> </properties> \ No newline at end of file diff --git a/tika-pipes/tika-emitters/tika-emitter-az-blob/src/test/resources/config/tika-config-az-blob.xml b/tika-pipes/tika-emitters/tika-emitter-az-blob/src/test/resources/config/tika-config-az-blob.xml index 99a2e2b09..c1ba42b07 100644 --- a/tika-pipes/tika-emitters/tika-emitter-az-blob/src/test/resources/config/tika-config-az-blob.xml +++ b/tika-pipes/tika-emitters/tika-emitter-az-blob/src/test/resources/config/tika-config-az-blob.xml @@ -16,15 +16,13 @@ limitations under the License. --> <properties> - <emitters> - <emitter class="org.apache.tika.pipes.emitter.azblob.AZBlobEmitter"> - <params> - <name>az-blob</name> - <!-- these have to be non-null --> - <endpoint></endpoint> - <container></container> - <sasToken></sasToken> - </params> - </emitter> - </emitters> + <emitters> + <emitter class="org.apache.tika.pipes.emitter.azblob.AZBlobEmitter"> + <name>az-blob</name> + <!-- these have to be non-null --> + <endpoint></endpoint> + <container></container> + <sasToken></sasToken> + </emitter> + </emitters> </properties> \ No newline at end of file diff --git a/tika-pipes/tika-emitters/tika-emitter-gcs/src/test/resources/config/tika-config-gcs.xml b/tika-pipes/tika-emitters/tika-emitter-gcs/src/test/resources/config/tika-config-gcs.xml index b45ec310e..2ea06761e 100644 --- a/tika-pipes/tika-emitters/tika-emitter-gcs/src/test/resources/config/tika-config-gcs.xml +++ b/tika-pipes/tika-emitters/tika-emitter-gcs/src/test/resources/config/tika-config-gcs.xml @@ -16,13 +16,11 @@ limitations under the License. --> <properties> - <emitters> - <emitter class="org.apache.tika.pipes.emitter.gcs.GCSEmitter"> - <params> - <name>gcs</name> - <projectId>My First Project</projectId> - <bucket>tika-tallison-test-bucket</bucket> - </params> - </emitter> - </emitters> + <emitters> + <emitter class="org.apache.tika.pipes.emitter.gcs.GCSEmitter"> + <name>gcs</name> + <projectId>My First Project</projectId> + <bucket>tika-tallison-test-bucket</bucket> + </emitter> + </emitters> </properties> \ No newline at end of file diff --git a/tika-pipes/tika-emitters/tika-emitter-jdbc/src/test/resources/configs/tika-config-jdbc-emitter-attachments.xml b/tika-pipes/tika-emitters/tika-emitter-jdbc/src/test/resources/configs/tika-config-jdbc-emitter-attachments.xml index 93130db65..4bc2d8e87 100644 --- a/tika-pipes/tika-emitters/tika-emitter-jdbc/src/test/resources/configs/tika-config-jdbc-emitter-attachments.xml +++ b/tika-pipes/tika-emitters/tika-emitter-jdbc/src/test/resources/configs/tika-config-jdbc-emitter-attachments.xml @@ -20,36 +20,34 @@ <properties> <emitters> <emitter class="org.apache.tika.pipes.emitter.jdbc.JDBCEmitter"> - <params> - <name>jdbc</name> - <connection>CONNECTION_STRING</connection> - <createTable>create table test - (path varchar(512) not null, - attachment_num integer not null, - k1 boolean, - k2 varchar(512), - k3 integer, - k4 long); - </createTable> - <alterTable>alter table test add primary key (path, attachment_num)</alterTable> - <!-- the jdbc emitter always puts ths emitKey value as the first - item --> - <insert>insert into test (path, attachment_num, k1, k2, k3, k4) values (?,?,?,?,?,?); - </insert> - <!-- these are the keys in the metadata object. - The emitKey is added as the first element in the insert statement. - Then the these values are added in order. - They must be in the order of the insert statement. - The emit key is added as - --> - <keys> - <key k="k1" v="boolean"/> - <key k="k2" v="string"/> - <key k="k3" v="int"/> - <key k="k4" v="long"/> - </keys> - <attachmentStrategy>all</attachmentStrategy> - </params> + <name>jdbc</name> + <connection>CONNECTION_STRING</connection> + <createTable>create table test + (path varchar(512) not null, + attachment_num integer not null, + k1 boolean, + k2 varchar(512), + k3 integer, + k4 long); + </createTable> + <alterTable>alter table test add primary key (path, attachment_num)</alterTable> + <!-- the jdbc emitter always puts ths emitKey value as the first + item --> + <insert>insert into test (path, attachment_num, k1, k2, k3, k4) values (?,?,?,?,?,?); + </insert> + <!-- these are the keys in the metadata object. + The emitKey is added as the first element in the insert statement. + Then the these values are added in order. + They must be in the order of the insert statement. + The emit key is added as + --> + <keys> + <key k="k1" v="boolean"/> + <key k="k2" v="string"/> + <key k="k3" v="int"/> + <key k="k4" v="long"/> + </keys> + <attachmentStrategy>all</attachmentStrategy> </emitter> </emitters> </properties> \ No newline at end of file diff --git a/tika-pipes/tika-emitters/tika-emitter-jdbc/src/test/resources/configs/tika-config-jdbc-emitter-existing-table.xml b/tika-pipes/tika-emitters/tika-emitter-jdbc/src/test/resources/configs/tika-config-jdbc-emitter-existing-table.xml index 3b9befa66..654b279be 100644 --- a/tika-pipes/tika-emitters/tika-emitter-jdbc/src/test/resources/configs/tika-config-jdbc-emitter-existing-table.xml +++ b/tika-pipes/tika-emitters/tika-emitter-jdbc/src/test/resources/configs/tika-config-jdbc-emitter-existing-table.xml @@ -20,25 +20,23 @@ <properties> <emitters> <emitter class="org.apache.tika.pipes.emitter.jdbc.JDBCEmitter"> - <params> - <name>jdbc</name> - <connection>CONNECTION_STRING</connection> - <insert>insert into test (path, k1, k2, k3, k4) values (?,?,?,?,?); - </insert> - <!-- these are the keys in the metadata object. - The emitKey is added as the first element in the insert statement. - Then the these values are added in order. - They must be in the order of the insert statement. - The emit key is added as - --> - <keys> - <key k="k1" v="boolean"/> - <key k="k2" v="string"/> - <key k="k3" v="int"/> - <key k="k4" v="long"/> - </keys> - <attachmentStrategy>first_only</attachmentStrategy> - </params> + <name>jdbc</name> + <connection>CONNECTION_STRING</connection> + <insert>insert into test (path, k1, k2, k3, k4) values (?,?,?,?,?); + </insert> + <!-- these are the keys in the metadata object. + The emitKey is added as the first element in the insert statement. + Then the these values are added in order. + They must be in the order of the insert statement. + The emit key is added as + --> + <keys> + <key k="k1" v="boolean"/> + <key k="k2" v="string"/> + <key k="k3" v="int"/> + <key k="k4" v="long"/> + </keys> + <attachmentStrategy>first_only</attachmentStrategy> </emitter> </emitters> </properties> \ No newline at end of file diff --git a/tika-pipes/tika-emitters/tika-emitter-jdbc/src/test/resources/configs/tika-config-jdbc-emitter-multivalued.xml b/tika-pipes/tika-emitters/tika-emitter-jdbc/src/test/resources/configs/tika-config-jdbc-emitter-multivalued.xml index a46e145f0..eb966b54a 100644 --- a/tika-pipes/tika-emitters/tika-emitter-jdbc/src/test/resources/configs/tika-config-jdbc-emitter-multivalued.xml +++ b/tika-pipes/tika-emitters/tika-emitter-jdbc/src/test/resources/configs/tika-config-jdbc-emitter-multivalued.xml @@ -20,28 +20,26 @@ <properties> <emitters> <emitter class="org.apache.tika.pipes.emitter.jdbc.JDBCEmitter"> - <params> - <name>jdbc</name> - <connection>CONNECTION_STRING</connection> - <createTable>create table test - (path varchar(512) primary key, - k1 varchar(512)); - </createTable> - <!-- the jdbc emitter always puts ths emitKey value as the first - item --> - <insert>insert into test (path, k1) values (?,?); - </insert> - <!-- these are the keys in the metadata object. - The emitKey is added as the first element in the insert statement. - Then the these values are added in order. - They must be in the order of the insert statement. - --> - <keys> - <key k="k1" v="varchar(512)"/> - </keys> - <multivaluedFieldStrategy>concatenate</multivaluedFieldStrategy> - <multivaluedFieldDelimiter>, </multivaluedFieldDelimiter> - </params> + <name>jdbc</name> + <connection>CONNECTION_STRING</connection> + <createTable>create table test + (path varchar(512) primary key, + k1 varchar(512)); + </createTable> + <!-- the jdbc emitter always puts ths emitKey value as the first + item --> + <insert>insert into test (path, k1) values (?,?); + </insert> + <!-- these are the keys in the metadata object. + The emitKey is added as the first element in the insert statement. + Then the these values are added in order. + They must be in the order of the insert statement. + --> + <keys> + <key k="k1" v="varchar(512)"/> + </keys> + <multivaluedFieldStrategy>concatenate</multivaluedFieldStrategy> + <multivaluedFieldDelimiter>, </multivaluedFieldDelimiter> </emitter> </emitters> </properties> \ No newline at end of file diff --git a/tika-pipes/tika-emitters/tika-emitter-jdbc/src/test/resources/configs/tika-config-jdbc-emitter.xml b/tika-pipes/tika-emitters/tika-emitter-jdbc/src/test/resources/configs/tika-config-jdbc-emitter.xml index 7ec4c96db..c1a05bdec 100644 --- a/tika-pipes/tika-emitters/tika-emitter-jdbc/src/test/resources/configs/tika-config-jdbc-emitter.xml +++ b/tika-pipes/tika-emitters/tika-emitter-jdbc/src/test/resources/configs/tika-config-jdbc-emitter.xml @@ -20,37 +20,35 @@ <properties> <emitters> <emitter class="org.apache.tika.pipes.emitter.jdbc.JDBCEmitter"> - <params> - <name>jdbc</name> - <connection>CONNECTION_STRING</connection> - <createTable>create table test - (path varchar(512) primary key, - k1 boolean, - k2 varchar(512), - k3 integer, - k4 long, - k5 bigint, - k6 timestamp); - </createTable> - <!-- the jdbc emitter always puts ths emitKey value as the first - item --> - <insert>insert into test (path, k1, k2, k3, k4, k5, k6) values (?,?,?,?,?,?,?); - </insert> - <!-- these are the keys in the metadata object. - The emitKey is added as the first element in the insert statement. - Then the these values are added in order. - They must be in the order of the insert statement. - --> - <keys> - <key k="k1" v="boolean"/> - <key k="k2" v="string"/> - <key k="k3" v="int"/> - <key k="k4" v="long"/> - <key k="k5" v="bigint"/> - <key k="k6" v="timestamp"/> - </keys> - <attachmentStrategy>first_only</attachmentStrategy> - </params> + <name>jdbc</name> + <connection>CONNECTION_STRING</connection> + <createTable>create table test + (path varchar(512) primary key, + k1 boolean, + k2 varchar(512), + k3 integer, + k4 long, + k5 bigint, + k6 timestamp); + </createTable> + <!-- the jdbc emitter always puts ths emitKey value as the first + item --> + <insert>insert into test (path, k1, k2, k3, k4, k5, k6) values (?,?,?,?,?,?,?); + </insert> + <!-- these are the keys in the metadata object. + The emitKey is added as the first element in the insert statement. + Then the these values are added in order. + They must be in the order of the insert statement. + --> + <keys> + <key k="k1" v="boolean"/> + <key k="k2" v="string"/> + <key k="k3" v="int"/> + <key k="k4" v="long"/> + <key k="k5" v="bigint"/> + <key k="k6" v="timestamp"/> + </keys> + <attachmentStrategy>first_only</attachmentStrategy> </emitter> </emitters> </properties> \ No newline at end of file diff --git a/tika-pipes/tika-emitters/tika-emitter-opensearch/src/test/resources/tika-config-simple-emitter.xml b/tika-pipes/tika-emitters/tika-emitter-opensearch/src/test/resources/tika-config-simple-emitter.xml index 7959bf6d0..f6530a9e9 100644 --- a/tika-pipes/tika-emitters/tika-emitter-opensearch/src/test/resources/tika-config-simple-emitter.xml +++ b/tika-pipes/tika-emitters/tika-emitter-opensearch/src/test/resources/tika-config-simple-emitter.xml @@ -31,13 +31,11 @@ </metadataFilters> <emitters> <emitter class="org.apache.tika.pipes.emitter.opensearch.OpenSearchEmitter"> - <params> - <name>opensearch1</name> - <url>http://localhost:9200/tika-test</url> - <attachmentStrategy>concatenate-content</attachmentStrategy> - <contentField>content</contentField> - <commitWithin>10</commitWithin> - </params> + <name>opensearch1</name> + <url>http://localhost:9200/tika-test</url> + <attachmentStrategy>concatenate-content</attachmentStrategy> + <contentField>content</contentField> + <commitWithin>10</commitWithin> </emitter> </emitters> </properties> \ No newline at end of file diff --git a/tika-pipes/tika-emitters/tika-emitter-solr/src/test/resources/tika-config-simple-emitter.xml b/tika-pipes/tika-emitters/tika-emitter-solr/src/test/resources/tika-config-simple-emitter.xml index c52da5e6e..5b14a5441 100644 --- a/tika-pipes/tika-emitters/tika-emitter-solr/src/test/resources/tika-config-simple-emitter.xml +++ b/tika-pipes/tika-emitters/tika-emitter-solr/src/test/resources/tika-config-simple-emitter.xml @@ -31,22 +31,18 @@ </metadataFilters> <emitters> <emitter class="org.apache.tika.pipes.emitter.solr.SolrEmitter"> - <params> - <name>solr1</name> - <url>http://localhost:8983/solr/tika-test</url> - <attachmentStrategy>concatenate-content</attachmentStrategy> - <contentField>content</contentField> - <commitWithin>10</commitWithin> - </params> + <name>solr1</name> + <url>http://localhost:8983/solr/tika-test</url> + <attachmentStrategy>concatenate-content</attachmentStrategy> + <contentField>content</contentField> + <commitWithin>10</commitWithin> </emitter> <emitter class="org.apache.tika.pipes.emitter.solr.SolrEmitter"> - <params> - <name>solr2</name> - <url>http://localhost:8983/solr/tika-test</url> - <attachmentStrategy>parent-child</attachmentStrategy> - <contentField>content</contentField> - <commitWithin>10</commitWithin> - </params> + <name>solr2</name> + <url>http://localhost:8983/solr/tika-test</url> + <attachmentStrategy>parent-child</attachmentStrategy> + <contentField>content</contentField> + <commitWithin>10</commitWithin> </emitter> </emitters> </properties> \ No newline at end of file diff --git a/tika-pipes/tika-fetchers/tika-fetcher-az-blob/src/test/resources/tika-config-az-blob.xml b/tika-pipes/tika-fetchers/tika-fetcher-az-blob/src/test/resources/tika-config-az-blob.xml index e2d74b207..2aa6ba9a5 100644 --- a/tika-pipes/tika-fetchers/tika-fetcher-az-blob/src/test/resources/tika-config-az-blob.xml +++ b/tika-pipes/tika-fetchers/tika-fetcher-az-blob/src/test/resources/tika-config-az-blob.xml @@ -16,17 +16,15 @@ limitations under the License. --> <properties> - <fetchers> - <fetcher class="org.apache.tika.pipes.fetcher.azblob.AZBlobFetcher"> - <params> - <name>az-blob</name> - <!-- Either configure these three and send in the path for the blob OR - do not configure any of these and send in the full SAS url for the blob - as the fetchkey--> - <endpoint></endpoint> - <container></container> - <sasToken></sasToken> - </params> - </fetcher> - </fetchers> + <fetchers> + <fetcher class="org.apache.tika.pipes.fetcher.azblob.AZBlobFetcher"> + <name>az-blob</name> + <!-- Either configure these three and send in the path for the blob OR + do not configure any of these and send in the full SAS url for the blob + as the fetchkey--> + <endpoint></endpoint> + <container></container> + <sasToken></sasToken> + </fetcher> + </fetchers> </properties> \ No newline at end of file diff --git a/tika-pipes/tika-fetchers/tika-fetcher-gcs/src/test/resources/tika-config-gcs.xml b/tika-pipes/tika-fetchers/tika-fetcher-gcs/src/test/resources/tika-config-gcs.xml index eee110dee..7e9ba6817 100644 --- a/tika-pipes/tika-fetchers/tika-fetcher-gcs/src/test/resources/tika-config-gcs.xml +++ b/tika-pipes/tika-fetchers/tika-fetcher-gcs/src/test/resources/tika-config-gcs.xml @@ -16,13 +16,11 @@ limitations under the License. --> <properties> - <fetchers> - <fetcher class="org.apache.tika.pipes.fetcher.gcs.GCSFetcher"> - <params> - <name>gcs</name> - <projectId>My First Project</projectId> - <bucket>tika-tallison-test-bucket</bucket> - </params> - </fetcher> - </fetchers> + <fetchers> + <fetcher class="org.apache.tika.pipes.fetcher.gcs.GCSFetcher"> + <name>gcs</name> + <projectId>My First Project</projectId> + <bucket>tika-tallison-test-bucket</bucket> + </fetcher> + </fetchers> </properties> \ No newline at end of file diff --git a/tika-pipes/tika-fetchers/tika-fetcher-http/src/test/resources/tika-config-http.xml b/tika-pipes/tika-fetchers/tika-fetcher-http/src/test/resources/tika-config-http.xml index 2336899e0..bd77de4ba 100644 --- a/tika-pipes/tika-fetchers/tika-fetcher-http/src/test/resources/tika-config-http.xml +++ b/tika-pipes/tika-fetchers/tika-fetcher-http/src/test/resources/tika-config-http.xml @@ -16,16 +16,14 @@ limitations under the License. --> <properties> - <fetchers> - <fetcher class="org.apache.tika.pipes.fetcher.http.HttpFetcher"> - <params> - <name>http</name> - <httpHeaders> - <header>Connection</header> - <header>Expires</header> - <header>Content-Length</header> - </httpHeaders> - </params> - </fetcher> - </fetchers> + <fetchers> + <fetcher class="org.apache.tika.pipes.fetcher.http.HttpFetcher"> + <name>http</name> + <httpHeaders> + <header>Connection</header> + <header>Expires</header> + <header>Content-Length</header> + </httpHeaders> + </fetcher> + </fetchers> </properties> \ No newline at end of file diff --git a/tika-pipes/tika-fetchers/tika-fetcher-s3/src/test/resources/tika-config-s3.xml b/tika-pipes/tika-fetchers/tika-fetcher-s3/src/test/resources/tika-config-s3.xml index 5139405bd..aa3166536 100644 --- a/tika-pipes/tika-fetchers/tika-fetcher-s3/src/test/resources/tika-config-s3.xml +++ b/tika-pipes/tika-fetchers/tika-fetcher-s3/src/test/resources/tika-config-s3.xml @@ -16,14 +16,12 @@ limitations under the License. --> <properties> - <fetchers> - <fetcher class="org.apache.tika.pipes.fetcher.s3.S3Fetcher"> - <params> - <name>s3</name> - <region>us-east-1</region> - <profile>my_profile</profile> - <credentialsProvider>profile</credentialsProvider> - </params> - </fetcher> - </fetchers> + <fetchers> + <fetcher class="org.apache.tika.pipes.fetcher.s3.S3Fetcher"> + <name>s3</name> + <region>us-east-1</region> + <profile>my_profile</profile> + <credentialsProvider>profile</credentialsProvider> + </fetcher> + </fetchers> </properties> \ No newline at end of file diff --git a/tika-pipes/tika-pipes-iterators/tika-pipes-iterator-jdbc/src/test/java/org/apache/tika/pipes/pipesiterator/jdbc/TestJDBCPipesIterator.java b/tika-pipes/tika-pipes-iterators/tika-pipes-iterator-jdbc/src/test/java/org/apache/tika/pipes/pipesiterator/jdbc/TestJDBCPipesIterator.java index 042c66160..a993559e9 100644 --- a/tika-pipes/tika-pipes-iterators/tika-pipes-iterator-jdbc/src/test/java/org/apache/tika/pipes/pipesiterator/jdbc/TestJDBCPipesIterator.java +++ b/tika-pipes/tika-pipes-iterators/tika-pipes-iterator-jdbc/src/test/java/org/apache/tika/pipes/pipesiterator/jdbc/TestJDBCPipesIterator.java @@ -144,7 +144,6 @@ public class TestJDBCPipesIterator { String config = "<?xml version=\"1.0\" encoding=\"UTF-8\" ?><properties>\n" + " <pipesIterator " + " class=\"org.apache.tika.pipes.pipesiterator.jdbc.JDBCPipesIterator\">\n" + - " <params>\n" + " <fetcherName>s3f</fetcherName>\n" + " <emitterName>s3e</emitterName>\n" + " <queueSize>57</queueSize>\n" + @@ -156,7 +155,6 @@ public class TestJDBCPipesIterator { "from fetchkeys</select>\n" + " <connection>jdbc:h2:file:" + DB_DIR.toAbsolutePath() + "/" + db + "</connection>\n" + - " </params>\n" + " </pipesIterator>\n" + "</properties>"; Path tmp = Files.createTempFile("tika-jdbc-", ".xml"); diff --git a/tika-pipes/tika-pipes-reporters/tika-pipes-reporter-jdbc/src/test/resources/configs/tika-config-excludes.xml b/tika-pipes/tika-pipes-reporters/tika-pipes-reporter-jdbc/src/test/resources/configs/tika-config-excludes.xml index ab7682237..7131ea3cc 100644 --- a/tika-pipes/tika-pipes-reporters/tika-pipes-reporter-jdbc/src/test/resources/configs/tika-config-excludes.xml +++ b/tika-pipes/tika-pipes-reporters/tika-pipes-reporter-jdbc/src/test/resources/configs/tika-config-excludes.xml @@ -19,7 +19,6 @@ --> <properties> <async> - <params> <maxForEmitBatchBytes>10000</maxForEmitBatchBytes> <emitMaxEstimatedBytes>100000</emitMaxEstimatedBytes> <emitWithinMillis>60000</emitWithinMillis> @@ -32,15 +31,12 @@ <arg>-Dlog4j.configurationFile={LOG4J_PROPERTIES_FILE}</arg> </forkedJvmArgs> <timeoutMillis>60000</timeoutMillis> - </params> - <pipesReporter class="org.apache.tika.pipes.reporters.jdbc.JDBCPipesReporter"> - <params> + <pipesReporter class="org.apache.tika.pipes.reporters.jdbc.JDBCPipesReporter"> <connection>CONNECTION_STRING</connection> <excludes> <exclude>PARSE_SUCCESS</exclude> <exclude>PARSE_SUCCESS_WITH_EXCEPTION</exclude> </excludes> - </params> - </pipesReporter> -</async> + </pipesReporter> + </async> </properties> diff --git a/tika-pipes/tika-pipes-reporters/tika-pipes-reporter-jdbc/src/test/resources/configs/tika-config-includes.xml b/tika-pipes/tika-pipes-reporters/tika-pipes-reporter-jdbc/src/test/resources/configs/tika-config-includes.xml index 1c3c68663..a2ebae791 100644 --- a/tika-pipes/tika-pipes-reporters/tika-pipes-reporter-jdbc/src/test/resources/configs/tika-config-includes.xml +++ b/tika-pipes/tika-pipes-reporters/tika-pipes-reporter-jdbc/src/test/resources/configs/tika-config-includes.xml @@ -19,7 +19,6 @@ --> <properties> <async> - <params> <maxForEmitBatchBytes>10000</maxForEmitBatchBytes> <emitMaxEstimatedBytes>100000</emitMaxEstimatedBytes> <emitWithinMillis>60000</emitWithinMillis> @@ -32,15 +31,12 @@ <arg>-Dlog4j.configurationFile={LOG4J_PROPERTIES_FILE}</arg> </forkedJvmArgs> <timeoutMillis>60000</timeoutMillis> - </params> - <pipesReporter class="org.apache.tika.pipes.reporters.jdbc.JDBCPipesReporter"> - <params> + <pipesReporter class="org.apache.tika.pipes.reporters.jdbc.JDBCPipesReporter"> <connection>CONNECTION_STRING</connection> <includes> <include>PARSE_SUCCESS</include> <include>PARSE_SUCCESS_WITH_EXCEPTION</include> </includes> - </params> - </pipesReporter> -</async> + </pipesReporter> + </async> </properties> diff --git a/tika-server/tika-server-core/src/main/resources/tika-server-config-default.xml b/tika-server/tika-server-core/src/main/resources/tika-server-config-default.xml index 5593d7d2d..007a1be5a 100644 --- a/tika-server/tika-server-core/src/main/resources/tika-server-config-default.xml +++ b/tika-server/tika-server-core/src/main/resources/tika-server-config-default.xml @@ -17,6 +17,7 @@ --> <properties> <server> + <!-- as of Tika 2.7.0, we do not require the params element here --> <params> <!-- which port to start the server on. If you specify a range, e.g. 9995-9998, TikaServerCli will start four forked servers, diff --git a/tika-server/tika-server-core/src/test/java/org/apache/tika/server/core/TikaPipesTest.java b/tika-server/tika-server-core/src/test/java/org/apache/tika/server/core/TikaPipesTest.java index b1b73a896..78ab70f40 100644 --- a/tika-server/tika-server-core/src/test/java/org/apache/tika/server/core/TikaPipesTest.java +++ b/tika-server/tika-server-core/src/test/java/org/apache/tika/server/core/TikaPipesTest.java @@ -107,15 +107,15 @@ public class TikaPipesTest extends CXFTestBase { TIKA_CONFIG_XML = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>" + "<properties>" + "<fetchers>" + "<fetcher class=\"org.apache.tika.pipes.fetcher.fs.FileSystemFetcher\">" + - "<params>" + "<name>fsf</name>" + + "<name>fsf</name>" + "<basePath>" + inputDir.toAbsolutePath() + - "</basePath>" + "</params>" + "</fetcher>" + "</fetchers>" + "<emitters>" + + "</basePath>" + "</fetcher>" + "</fetchers>" + "<emitters>" + "<emitter class=\"org.apache.tika.pipes.emitter.fs.FileSystemEmitter\">" + - "<params>" + "<name>fse</name>" + + "<name>fse</name>" + "<basePath>" + - TMP_OUTPUT_DIR.toAbsolutePath() + "</basePath>" + "</params>" + + TMP_OUTPUT_DIR.toAbsolutePath() + "</basePath>" + "</emitter>" + - "</emitters>" + "<pipes><params><tikaConfig>" + + "</emitters>" + "<pipes><tikaConfig>" + ProcessUtils.escapeCommandLine(TIKA_CONFIG_PATH.toAbsolutePath().toString()) + "</tikaConfig><numClients>10</numClients>" + "<forkedJvmArgs>" + @@ -123,7 +123,7 @@ public class TikaPipesTest extends CXFTestBase { "<arg>-Dlog4j.configurationFile=file:" + ProcessUtils.escapeCommandLine(TIKA_PIPES_LOG4j2_PATH.toAbsolutePath().toString()) + "</arg>" + "</forkedJvmArgs>" + - "</params></pipes>" + "</properties>"; + "</pipes>" + "</properties>"; Files.write(TIKA_CONFIG_PATH, TIKA_CONFIG_XML.getBytes(StandardCharsets.UTF_8)); } diff --git a/tika-server/tika-server-core/src/test/java/org/apache/tika/server/core/TikaServerAsyncIntegrationTest.java b/tika-server/tika-server-core/src/test/java/org/apache/tika/server/core/TikaServerAsyncIntegrationTest.java index eeaf09449..33c1bc530 100644 --- a/tika-server/tika-server-core/src/test/java/org/apache/tika/server/core/TikaServerAsyncIntegrationTest.java +++ b/tika-server/tika-server-core/src/test/java/org/apache/tika/server/core/TikaServerAsyncIntegrationTest.java @@ -102,25 +102,22 @@ public class TikaServerAsyncIntegrationTest extends IntegrationTestBase { TIKA_CONFIG_XML = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>" + "<properties>" + "<fetchers>" + "<fetcher class=\"org.apache.tika.pipes.fetcher.fs.FileSystemFetcher\">" + - "<params>" + "<name>" + FETCHER_NAME + - "</name>" + "<basePath>" + - inputDir.toAbsolutePath() + "</basePath>" + "</params>" + "</fetcher>" + + "<name>" + FETCHER_NAME + "</name>" + + "<basePath>" + inputDir.toAbsolutePath() + "</basePath>" + "</fetcher>" + "</fetchers>" + "<emitters>" + "<emitter class=\"org.apache.tika.pipes.emitter.fs.FileSystemEmitter\">" + - "<params>" + "<name>" + EMITTER_NAME + - "</name>" + - + "<name>" + EMITTER_NAME + "</name>" + "<basePath>" + - TMP_OUTPUT_DIR.toAbsolutePath() + "</basePath>" + "</params>" + + TMP_OUTPUT_DIR.toAbsolutePath() + "</basePath>" + "</emitter>" + "</emitters>" + - "<server><params><endpoints><endpoint>async</endpoint></endpoints>" + - "<enableUnsecureFeatures>true</enableUnsecureFeatures></params></server>" + - "<async><params><tikaConfig>" + + "<server><endpoints><endpoint>async</endpoint></endpoints>" + + "<enableUnsecureFeatures>true</enableUnsecureFeatures></server>" + + "<async><tikaConfig>" + ProcessUtils.escapeCommandLine(TIKA_CONFIG.toAbsolutePath().toString()) + "</tikaConfig><numClients>10</numClients><forkedJvmArgs><arg>-Xmx256m" + "</arg></forkedJvmArgs><timeoutMillis>5000</timeoutMillis>" + - "</params></async>" + + "</async>" + "</properties>"; FileUtils.write(TIKA_CONFIG.toFile(), TIKA_CONFIG_XML, UTF_8); diff --git a/tika-server/tika-server-core/src/test/java/org/apache/tika/server/core/TikaServerPipesIntegrationTest.java b/tika-server/tika-server-core/src/test/java/org/apache/tika/server/core/TikaServerPipesIntegrationTest.java index c59eaea6d..36937e0c6 100644 --- a/tika-server/tika-server-core/src/test/java/org/apache/tika/server/core/TikaServerPipesIntegrationTest.java +++ b/tika-server/tika-server-core/src/test/java/org/apache/tika/server/core/TikaServerPipesIntegrationTest.java @@ -80,24 +80,24 @@ public class TikaServerPipesIntegrationTest extends IntegrationTestBase { TIKA_CONFIG_TIMEOUT = TEMP_WORKING_DIR.resolve("tika-config-timeout.xml"); //TODO -- clean this up so that port is sufficient and we don't need portString String xml1 = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>" + "<properties>" + "<fetchers>" + - "<fetcher class=\"org.apache.tika.pipes.fetcher.fs.FileSystemFetcher\">" + "<params>" + + "<fetcher class=\"org.apache.tika.pipes.fetcher.fs.FileSystemFetcher\">" + "<name>" + FETCHER_NAME + "</name>" + "<basePath>" + inputDir.toAbsolutePath() + - "</basePath>" + "</params>" + "</fetcher>" + "</fetchers>" + "<emitters>" + + "</basePath>" + "</fetcher>" + "</fetchers>" + "<emitters>" + "<emitter class=\"org.apache.tika.pipes.emitter.fs.FileSystemEmitter\">" + - "<params>" + "<name>" + EMITTER_NAME + "</name>" + + "<name>" + EMITTER_NAME + "</name>" + "<basePath>" + TEMP_OUTPUT_DIR.toAbsolutePath() + - "</basePath>" + "</params>" + "</emitter>" + "</emitters>" + "<server><params>" + + "</basePath>" + "</emitter>" + "</emitters>" + "<server>" + "<enableUnsecureFeatures>true</enableUnsecureFeatures>" + "<port>9999</port>" + "<endpoints>" + "<endpoint>pipes</endpoint>" + "<endpoint>status</endpoint>" + "</endpoints>"; - String xml2 = "</params></server>" + - "<pipes><params><tikaConfig>" + + String xml2 = "</server>" + + "<pipes><tikaConfig>" + ProcessUtils.escapeCommandLine(TIKA_CONFIG.toAbsolutePath().toString()) + "</tikaConfig><numClients>10</numClients><forkedJvmArgs><arg>-Xmx256m" + "</arg>" + //TODO: need to add logging config here "</forkedJvmArgs><timeoutMillis>5000</timeoutMillis>" + - "</params></pipes>" + "</properties>"; + "</pipes>" + "</properties>"; String tikaConfigXML = xml1 + xml2; diff --git a/tika-server/tika-server-core/src/test/resources/configs/metadata-filter-include.xml b/tika-server/tika-server-core/src/test/resources/configs/metadata-filter-include.xml index 96ce8e5c8..6dd088e74 100644 --- a/tika-server/tika-server-core/src/test/resources/configs/metadata-filter-include.xml +++ b/tika-server/tika-server-core/src/test/resources/configs/metadata-filter-include.xml @@ -16,15 +16,13 @@ limitations under the License. --> <properties> - <metadataFilters> - <metadataFilter class="org.apache.tika.metadata.filter.IncludeFieldMetadataFilter"> - <params> - <include> - <field>X-TIKA:content</field> - <field>extended-properties:Application</field> - <field>Content-Type</field> - </include> - </params> - </metadataFilter> - </metadataFilters> + <metadataFilters> + <metadataFilter class="org.apache.tika.metadata.filter.IncludeFieldMetadataFilter"> + <include> + <field>X-TIKA:content</field> + <field>extended-properties:Application</field> + <field>Content-Type</field> + </include> + </metadataFilter> + </metadataFilters> </properties> diff --git a/tika-server/tika-server-core/src/test/resources/configs/tika-config-server-badjvmargs.xml b/tika-server/tika-server-core/src/test/resources/configs/tika-config-server-badjvmargs.xml index 9ec9cc737..57e198fbc 100644 --- a/tika-server/tika-server-core/src/test/resources/configs/tika-config-server-badjvmargs.xml +++ b/tika-server/tika-server-core/src/test/resources/configs/tika-config-server-badjvmargs.xml @@ -17,17 +17,15 @@ --> <properties> <server> - <params> - <port>9999</port> - <taskTimeoutMillis>54321</taskTimeoutMillis> - <enableUnsecureFeatures>true</enableUnsecureFeatures> - <forkedJvmArgs> - <arg>-Xms20m</arg> - <arg>-Xmx10m</arg> - </forkedJvmArgs> - <endpoints> - <endpoint>rmeta</endpoint> - </endpoints> - </params> + <port>9999</port> + <taskTimeoutMillis>54321</taskTimeoutMillis> + <enableUnsecureFeatures>true</enableUnsecureFeatures> + <forkedJvmArgs> + <arg>-Xms20m</arg> + <arg>-Xmx10m</arg> + </forkedJvmArgs> + <endpoints> + <endpoint>rmeta</endpoint> + </endpoints> </server> </properties> diff --git a/tika-server/tika-server-core/src/test/resources/configs/tika-config-server-basic.xml b/tika-server/tika-server-core/src/test/resources/configs/tika-config-server-basic.xml index 7937a2b56..082561278 100644 --- a/tika-server/tika-server-core/src/test/resources/configs/tika-config-server-basic.xml +++ b/tika-server/tika-server-core/src/test/resources/configs/tika-config-server-basic.xml @@ -17,18 +17,16 @@ --> <properties> <server> - <params> - <taskTimeoutMillis>120000</taskTimeoutMillis> - <minimumTimeoutMillis>10</minimumTimeoutMillis> - <port>9999</port> - <maxFiles>1000</maxFiles> - <forkedJvmArgs> - <arg>-Xmx512m</arg> - </forkedJvmArgs> - <endpoints> - <endpoint>rmeta</endpoint> - <endpoint>status</endpoint> - </endpoints> - </params> + <taskTimeoutMillis>120000</taskTimeoutMillis> + <minimumTimeoutMillis>10</minimumTimeoutMillis> + <port>9999</port> + <maxFiles>1000</maxFiles> + <forkedJvmArgs> + <arg>-Xmx512m</arg> + </forkedJvmArgs> + <endpoints> + <endpoint>rmeta</endpoint> + <endpoint>status</endpoint> + </endpoints> </server> </properties> diff --git a/tika-server/tika-server-core/src/test/resources/configs/tika-config-server-emitter.xml b/tika-server/tika-server-core/src/test/resources/configs/tika-config-server-emitter.xml index 29c8eec5e..201f552f1 100644 --- a/tika-server/tika-server-core/src/test/resources/configs/tika-config-server-emitter.xml +++ b/tika-server/tika-server-core/src/test/resources/configs/tika-config-server-emitter.xml @@ -17,19 +17,17 @@ --> <properties> <server> - <params> - <taskTimeoutMillis>120000</taskTimeoutMillis> - <port>9999</port> - <maxFiles>1000</maxFiles> - <forkedJvmArgs> - <arg>-Xmx512m</arg> - </forkedJvmArgs> - <enableUnsecure>true</enableUnsecure> - <endpoints> - <endpoint>emit</endpoint> - <endpoint>async</endpoint> - <endpoint>status</endpoint> - </endpoints> - </params> + <taskTimeoutMillis>120000</taskTimeoutMillis> + <port>9999</port> + <maxFiles>1000</maxFiles> + <forkedJvmArgs> + <arg>-Xmx512m</arg> + </forkedJvmArgs> + <enableUnsecure>true</enableUnsecure> + <endpoints> + <endpoint>emit</endpoint> + <endpoint>async</endpoint> + <endpoint>status</endpoint> + </endpoints> </server> </properties> diff --git a/tika-server/tika-server-core/src/test/resources/configs/tika-config-server-fetcher-template.xml b/tika-server/tika-server-core/src/test/resources/configs/tika-config-server-fetcher-template.xml index 11616ceee..1f859c504 100644 --- a/tika-server/tika-server-core/src/test/resources/configs/tika-config-server-fetcher-template.xml +++ b/tika-server/tika-server-core/src/test/resources/configs/tika-config-server-fetcher-template.xml @@ -18,21 +18,17 @@ <properties> <fetchers> <fetcher class="org.apache.tika.pipes.fetcher.fs.FileSystemFetcher"> - <params> - <name>fsf</name> - <basePath>{FETCHER_BASE_PATH}</basePath> - </params> + <name>fsf</name> + <basePath>{FETCHER_BASE_PATH}</basePath> </fetcher> </fetchers> <server> - <params> - <port>{PORT}</port> - <taskTimeoutMillis>54321</taskTimeoutMillis> - <enableUnsecureFeatures>true</enableUnsecureFeatures> - <noFork>true</noFork> - <endpoints> - <endpoint>tika</endpoint> - </endpoints> - </params> + <port>{PORT}</port> + <taskTimeoutMillis>54321</taskTimeoutMillis> + <enableUnsecureFeatures>true</enableUnsecureFeatures> + <noFork>true</noFork> + <endpoints> + <endpoint>tika</endpoint> + </endpoints> </server> </properties> diff --git a/tika-server/tika-server-core/src/test/resources/configs/tika-config-server-fetchers-emitters.xml b/tika-server/tika-server-core/src/test/resources/configs/tika-config-server-fetchers-emitters.xml index 360c9e144..4ed9ab03c 100644 --- a/tika-server/tika-server-core/src/test/resources/configs/tika-config-server-fetchers-emitters.xml +++ b/tika-server/tika-server-core/src/test/resources/configs/tika-config-server-fetchers-emitters.xml @@ -18,32 +18,26 @@ <properties> <fetchers> <fetcher class="org.apache.tika.pipes.fetcher.fs.FileSystemFetcher"> - <params> - <name>fsf</name> - <basePath>/somePathOrOther</basePath> - </params> + <name>fsf</name> + <basePath>/somePathOrOther</basePath> </fetcher> </fetchers> <emitters> <emitter class="org.apache.tika.pipes.emitter.fs.FileSystemEmitter"> - <params> - <name>fse</name> - <basePath>/path/or/other/extracts</basePath> - </params> + <name>fse</name> + <basePath>/path/or/other/extracts</basePath> </emitter> </emitters> <server> - <params> - <port>9999</port> - <taskTimeoutMillis>54321</taskTimeoutMillis> - <enableUnsecureFeatures>true</enableUnsecureFeatures> - <maxFiles>20</maxFiles> - <forkedJvmArgs> - <arg>-Xmx2g</arg> - </forkedJvmArgs> - <endpoints> - <endpoint>rmeta</endpoint> - </endpoints> - </params> + <port>9999</port> + <taskTimeoutMillis>54321</taskTimeoutMillis> + <enableUnsecureFeatures>true</enableUnsecureFeatures> + <maxFiles>20</maxFiles> + <forkedJvmArgs> + <arg>-Xmx2g</arg> + </forkedJvmArgs> + <endpoints> + <endpoint>rmeta</endpoint> + </endpoints> </server> </properties> diff --git a/tika-server/tika-server-core/src/test/resources/configs/tika-config-server-timeout-10000.xml b/tika-server/tika-server-core/src/test/resources/configs/tika-config-server-timeout-10000.xml index 01d887d81..bc87efe11 100644 --- a/tika-server/tika-server-core/src/test/resources/configs/tika-config-server-timeout-10000.xml +++ b/tika-server/tika-server-core/src/test/resources/configs/tika-config-server-timeout-10000.xml @@ -17,14 +17,12 @@ --> <properties> <server> - <params> - <port>9999</port> - <taskTimeoutMillis>10000</taskTimeoutMillis> - <taskPulseMillis>100</taskPulseMillis> - <maxFiles>20</maxFiles> - <forkedJvmArgs> - <arg>-Xmx512m</arg> - </forkedJvmArgs> - </params> + <port>9999</port> + <taskTimeoutMillis>10000</taskTimeoutMillis> + <taskPulseMillis>100</taskPulseMillis> + <maxFiles>20</maxFiles> + <forkedJvmArgs> + <arg>-Xmx512m</arg> + </forkedJvmArgs> </server> </properties> diff --git a/tika-server/tika-server-core/src/test/resources/configs/tika-config-server-tls-one-way-template.xml b/tika-server/tika-server-core/src/test/resources/configs/tika-config-server-tls-one-way-template.xml index 04090f7fd..67290118d 100644 --- a/tika-server/tika-server-core/src/test/resources/configs/tika-config-server-tls-one-way-template.xml +++ b/tika-server/tika-server-core/src/test/resources/configs/tika-config-server-tls-one-way-template.xml @@ -17,27 +17,23 @@ --> <properties> <server> - <params> - <port>9999</port> - <taskTimeoutMillis>1000000</taskTimeoutMillis> - <minimumTimeoutMillis>10000</minimumTimeoutMillis> - <maxFiles>10000</maxFiles> - <forkedJvmArgs> - <arg>-Xmx1g</arg> - </forkedJvmArgs> - <endpoints> - <endpoint>rmeta</endpoint> - </endpoints> - </params> + <port>9999</port> + <taskTimeoutMillis>1000000</taskTimeoutMillis> + <minimumTimeoutMillis>10000</minimumTimeoutMillis> + <maxFiles>10000</maxFiles> + <forkedJvmArgs> + <arg>-Xmx1g</arg> + </forkedJvmArgs> + <endpoints> + <endpoint>rmeta</endpoint> + </endpoints> <tlsConfig> - <params> - <active>true</active> - <keyStoreType>PKCS12</keyStoreType> - <keyStorePassword>tika-secret</keyStorePassword> - <keyStoreFile>{SSL_KEYS}/tika-server-keystore.p12</keyStoreFile> - <clientAuthenticationWanted>false</clientAuthenticationWanted> - <clientAuthenticationRequired>false</clientAuthenticationRequired> - </params> + <active>true</active> + <keyStoreType>PKCS12</keyStoreType> + <keyStorePassword>tika-secret</keyStorePassword> + <keyStoreFile>{SSL_KEYS}/tika-server-keystore.p12</keyStoreFile> + <clientAuthenticationWanted>false</clientAuthenticationWanted> + <clientAuthenticationRequired>false</clientAuthenticationRequired> </tlsConfig> </server> </properties> diff --git a/tika-server/tika-server-core/src/test/resources/configs/tika-config-server-tls-two-way-template.xml b/tika-server/tika-server-core/src/test/resources/configs/tika-config-server-tls-two-way-template.xml index a99579a61..5dcd8e3fc 100644 --- a/tika-server/tika-server-core/src/test/resources/configs/tika-config-server-tls-two-way-template.xml +++ b/tika-server/tika-server-core/src/test/resources/configs/tika-config-server-tls-two-way-template.xml @@ -17,30 +17,26 @@ --> <properties> <server> - <params> - <port>9999</port> - <taskTimeoutMillis>1000000</taskTimeoutMillis> - <minimumTimeoutMillis>10000</minimumTimeoutMillis> - <maxFiles>10000</maxFiles> - <forkedJvmArgs> - <arg>-Xmx1g</arg> - </forkedJvmArgs> - <endpoints> - <endpoint>rmeta</endpoint> - </endpoints> - </params> + <port>9999</port> + <taskTimeoutMillis>1000000</taskTimeoutMillis> + <minimumTimeoutMillis>10000</minimumTimeoutMillis> + <maxFiles>10000</maxFiles> + <forkedJvmArgs> + <arg>-Xmx1g</arg> + </forkedJvmArgs> + <endpoints> + <endpoint>rmeta</endpoint> + </endpoints> <tlsConfig> - <params> - <active>true</active> - <keyStoreType>PKCS12</keyStoreType> - <keyStorePassword>tika-secret</keyStorePassword> - <keyStoreFile>{SSL_KEYS}/tika-server-keystore.p12</keyStoreFile> - <trustStoreType>PKCS12</trustStoreType> - <trustStorePassword>tika-secret</trustStorePassword> - <trustStoreFile>{SSL_KEYS}/tika-server-truststore.p12</trustStoreFile> - <clientAuthenticationWanted>true</clientAuthenticationWanted> - <clientAuthenticationRequired>true</clientAuthenticationRequired> - </params> + <active>true</active> + <keyStoreType>PKCS12</keyStoreType> + <keyStorePassword>tika-secret</keyStorePassword> + <keyStoreFile>{SSL_KEYS}/tika-server-keystore.p12</keyStoreFile> + <trustStoreType>PKCS12</trustStoreType> + <trustStorePassword>tika-secret</trustStorePassword> + <trustStoreFile>{SSL_KEYS}/tika-server-truststore.p12</trustStoreFile> + <clientAuthenticationWanted>true</clientAuthenticationWanted> + <clientAuthenticationRequired>true</clientAuthenticationRequired> </tlsConfig> </server> </properties> diff --git a/tika-server/tika-server-core/src/test/resources/configs/tika-config-server-tls.xml b/tika-server/tika-server-core/src/test/resources/configs/tika-config-server-tls.xml index a5186eec3..5c3fef7bc 100644 --- a/tika-server/tika-server-core/src/test/resources/configs/tika-config-server-tls.xml +++ b/tika-server/tika-server-core/src/test/resources/configs/tika-config-server-tls.xml @@ -17,29 +17,25 @@ --> <properties> <server> - <params> - <port>9999</port> - <taskTimeoutMillis>54321</taskTimeoutMillis> - <minimumTimeoutMillis>10</minimumTimeoutMillis> - <enableUnsecureFeatures>true</enableUnsecureFeatures> - <maxFiles>20</maxFiles> - <forkedJvmArgs> - <arg>-Xmx2g</arg> - </forkedJvmArgs> - <endpoints> - <endpoint>rmeta</endpoint> - </endpoints> - </params> + <port>9999</port> + <taskTimeoutMillis>54321</taskTimeoutMillis> + <minimumTimeoutMillis>10</minimumTimeoutMillis> + <enableUnsecureFeatures>true</enableUnsecureFeatures> + <maxFiles>20</maxFiles> + <forkedJvmArgs> + <arg>-Xmx2g</arg> + </forkedJvmArgs> + <endpoints> + <endpoint>rmeta</endpoint> + </endpoints> <tlsConfig> - <params> - <active>true</active> - <keyStoreType>myType</keyStoreType> - <keyStorePassword>pass</keyStorePassword> - <keyStoreFile>/something/or/other</keyStoreFile> - <trustStoreType>myType2</trustStoreType> - <trustStorePassword>pass2</trustStorePassword> - <trustStoreFile>/something/or/other2</trustStoreFile> - </params> + <active>true</active> + <keyStoreType>myType</keyStoreType> + <keyStorePassword>pass</keyStorePassword> + <keyStoreFile>/something/or/other</keyStoreFile> + <trustStoreType>myType2</trustStoreType> + <trustStorePassword>pass2</trustStorePassword> + <trustStoreFile>/something/or/other2</trustStoreFile> </tlsConfig> </server> </properties> diff --git a/tika-server/tika-server-core/src/test/resources/configs/tika-config-server.xml b/tika-server/tika-server-core/src/test/resources/configs/tika-config-server.xml index fcfa69ff2..09f62afc2 100644 --- a/tika-server/tika-server-core/src/test/resources/configs/tika-config-server.xml +++ b/tika-server/tika-server-core/src/test/resources/configs/tika-config-server.xml @@ -17,7 +17,6 @@ --> <properties> <server> - <params> <port>9999</port> <taskTimeoutMillis>54321</taskTimeoutMillis> <minimumTimeoutMillis>10</minimumTimeoutMillis> @@ -29,6 +28,5 @@ <endpoints> <endpoint>rmeta</endpoint> </endpoints> - </params> </server> </properties>
