This is an automated email from the ASF dual-hosted git repository.
tallison pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tika.git
The following commit(s) were added to refs/heads/main by this push:
new 47d54f6 TIKA-3393 -- refactor metadata filters in 2.x
47d54f6 is described below
commit 47d54f6604fea7d8f14dcebd57d421b75d78a6cf
Author: tallison <[email protected]>
AuthorDate: Tue May 11 13:29:03 2021 -0400
TIKA-3393 -- refactor metadata filters in 2.x
---
.../java/org/apache/tika/config/ConfigBase.java | 16 ++-
.../java/org/apache/tika/config/TikaConfig.java | 105 +------------------
.../metadata/filter/ClearByMimeMetadataFilter.java | 3 +-
.../metadata/filter/CompositeMetadataFilter.java | 2 +-
.../filter/ExcludeFieldMetadataFilter.java | 2 +-
.../metadata/filter/FieldNameMappingFilter.java | 41 ++------
.../filter/IncludeFieldMetadataFilter.java | 2 +-
.../tika/metadata/filter/MetadataFilter.java | 31 +++++-
.../apache/tika/metadata/filter/NoOpFilter.java | 2 +-
.../tika/metadata/filter/MockUpperCaseFilter.java | 2 +-
.../tika/metadata/filter/TestMetadataFilter.java | 18 +++-
.../org/apache/tika/config/TIKA-3137-exclude.xml | 8 +-
...137-exclude.xml => TIKA-3137-field-mapping.xml} | 14 ++-
.../apache/tika/config/TIKA-3137-include-uc.xml | 8 +-
.../org/apache/tika/config/TIKA-3137-include.xml | 8 +-
.../org/apache/tika/config/TIKA-3137-mimes-uc.xml | 8 +-
.../eval/core/metadata/TikaEvalMetadataFilter.java | 2 +-
.../tika/parser/RecursiveParserWrapperTest.java | 1 +
.../org/apache/tika/parser/TIKA-3137-include.xml | 18 ++--
.../apache/tika/pipes/emitter/solr/TestBasic.java | 2 +-
.../test/resources/tika-config-simple-emitter.xml | 64 ++++++------
.../test/resources/config/TIKA-3137-include.xml | 18 ++--
.../resources/tika-config-simple-fs-emitter.xml | 114 ++++++++++-----------
.../resources/configs/metadata-filter-include.xml | 10 +-
24 files changed, 219 insertions(+), 280 deletions(-)
diff --git a/tika-core/src/main/java/org/apache/tika/config/ConfigBase.java
b/tika-core/src/main/java/org/apache/tika/config/ConfigBase.java
index e332807..7b1b436 100644
--- a/tika-core/src/main/java/org/apache/tika/config/ConfigBase.java
+++ b/tika-core/src/main/java/org/apache/tika/config/ConfigBase.java
@@ -23,13 +23,14 @@ import java.lang.reflect.InvocationTargetException;
import java.lang.reflect.Method;
import java.util.ArrayList;
import java.util.Collections;
-import java.util.HashMap;
import java.util.HashSet;
+import java.util.LinkedHashMap;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Set;
+import org.w3c.dom.Element;
import org.w3c.dom.NamedNodeMap;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
@@ -101,7 +102,7 @@ public abstract class ConfigBase {
protected static <P, T> P buildComposite(String compositeElementName,
Class<P> compositeClass,
String itemName, Class<T>
itemClass, InputStream is)
throws TikaConfigException, IOException {
- Node properties = null;
+ Element properties = null;
try {
properties = XMLReaderUtils.buildDOM(is).getDocumentElement();
} catch (SAXException e) {
@@ -109,6 +110,14 @@ public abstract class ConfigBase {
} catch (TikaException e) {
throw new TikaConfigException("problem loading xml to dom", e);
}
+ return buildComposite(compositeElementName, compositeClass, itemName,
itemClass,
+ properties);
+ }
+
+ protected static <P, T> P buildComposite(String compositeElementName,
Class<P> compositeClass,
+ String itemName, Class<T> itemClass, Element properties)
throws TikaConfigException,
+ IOException {
+
if (!properties.getLocalName().equals("properties")) {
throw new TikaConfigException("expect properties as root node");
}
@@ -264,7 +273,8 @@ public abstract class ConfigBase {
private static void tryToSetMap(Object object, Node param) throws
TikaConfigException {
String name = param.getLocalName();
//only supports string, string at this point
- Map<String, String> map = new HashMap<>();
+ //use LinkedHashMap to keep insertion order!
+ Map<String, String> map = new LinkedHashMap<>();
NodeList nodeList = param.getChildNodes();
for (int i = 0; i < nodeList.getLength(); i++) {
Node n = nodeList.item(i);
diff --git a/tika-core/src/main/java/org/apache/tika/config/TikaConfig.java
b/tika-core/src/main/java/org/apache/tika/config/TikaConfig.java
index a561ca3..800b8e0 100644
--- a/tika-core/src/main/java/org/apache/tika/config/TikaConfig.java
+++ b/tika-core/src/main/java/org/apache/tika/config/TikaConfig.java
@@ -58,9 +58,8 @@ import org.apache.tika.exception.TikaConfigException;
import org.apache.tika.exception.TikaException;
import org.apache.tika.language.translate.DefaultTranslator;
import org.apache.tika.language.translate.Translator;
-import org.apache.tika.metadata.filter.CompositeMetadataFilter;
-import org.apache.tika.metadata.filter.DefaultMetadataFilter;
import org.apache.tika.metadata.filter.MetadataFilter;
+import org.apache.tika.metadata.filter.NoOpFilter;
import org.apache.tika.mime.MediaType;
import org.apache.tika.mime.MediaTypeRegistry;
import org.apache.tika.mime.MimeTypeException;
@@ -149,7 +148,6 @@ public class TikaConfig {
TranslatorXmlLoader translatorLoader = new TranslatorXmlLoader();
ExecutorServiceXmlLoader executorLoader = new
ExecutorServiceXmlLoader();
EncodingDetectorXmlLoader encodingDetectorXmlLoader = new
EncodingDetectorXmlLoader();
- MetadataFilterXmlLoader metadataFilterXmlLoader = new
MetadataFilterXmlLoader();
updateXMLReaderUtils(element);
this.mimeTypes = typesFromDomElement(element);
this.detector = detectorLoader.loadOverall(element, mimeTypes, loader);
@@ -159,7 +157,7 @@ public class TikaConfig {
this.parser = parserLoader.loadOverall(element, mimeTypes, loader);
this.translator = translatorLoader.loadOverall(element, mimeTypes,
loader);
this.executorService = executorLoader.loadOverall(element, mimeTypes,
loader);
- this.metadataFilter = metadataFilterXmlLoader.loadOverall(element,
mimeTypes, loader);
+ this.metadataFilter = MetadataFilter.load(element, true);
this.serviceLoader = loader;
TIMES_INSTANTIATED.incrementAndGet();
}
@@ -184,7 +182,7 @@ public class TikaConfig {
this.parser = getDefaultParser(mimeTypes, serviceLoader,
encodingDetector);
this.translator = getDefaultTranslator(serviceLoader);
this.executorService = getDefaultExecutorService();
- this.metadataFilter = getDefaultMetadataFilter(serviceLoader);
+ this.metadataFilter = new NoOpFilter();
TIMES_INSTANTIATED.incrementAndGet();
}
@@ -220,7 +218,7 @@ public class TikaConfig {
this.detector = getDefaultDetector(mimeTypes, serviceLoader);
this.translator = getDefaultTranslator(serviceLoader);
this.executorService = getDefaultExecutorService();
- this.metadataFilter = getDefaultMetadataFilter(serviceLoader);
+ this.metadataFilter = new NoOpFilter();
} else {
ServiceLoader tmpServiceLoader = new ServiceLoader();
try (InputStream stream = getConfigInputStream(config,
tmpServiceLoader)) {
@@ -231,7 +229,6 @@ public class TikaConfig {
EncodingDetectorXmlLoader encodingDetectorLoader = new
EncodingDetectorXmlLoader();
TranslatorXmlLoader translatorLoader = new
TranslatorXmlLoader();
ExecutorServiceXmlLoader executorLoader = new
ExecutorServiceXmlLoader();
- MetadataFilterXmlLoader metadataFilterXmlLoader = new
MetadataFilterXmlLoader();
this.mimeTypes = typesFromDomElement(element);
this.encodingDetector =
@@ -244,8 +241,7 @@ public class TikaConfig {
this.translator = translatorLoader.loadOverall(element,
mimeTypes, serviceLoader);
this.executorService =
executorLoader.loadOverall(element, mimeTypes,
serviceLoader);
- this.metadataFilter =
- metadataFilterXmlLoader.loadOverall(element,
mimeTypes, serviceLoader);
+ this.metadataFilter = MetadataFilter.load(element, true);
} catch (SAXException e) {
throw new TikaException("Specified Tika configuration has
syntax errors: " + config,
e);
@@ -279,10 +275,6 @@ public class TikaConfig {
return new SimpleThreadPoolExecutor();
}
- private static MetadataFilter getDefaultMetadataFilter(ServiceLoader
loader) {
- return new DefaultMetadataFilter(loader);
- }
-
private static InputStream getConfigInputStream(String config,
ServiceLoader serviceLoader)
throws TikaException, IOException {
InputStream stream = null;
@@ -1310,91 +1302,4 @@ public class TikaConfig {
}
}
- private static class MetadataFilterXmlLoader extends
XmlLoader<MetadataFilter, MetadataFilter> {
-
- boolean supportsComposite() {
- return true;
- }
-
- String getParentTagName() {
- return "metadataFilters";
- }
-
- String getLoaderTagName() {
- return "metadataFilter";
- }
-
- @Override
- Class<? extends MetadataFilter> getLoaderClass() {
- return MetadataFilter.class;
- }
-
-
- @Override
- boolean isComposite(MetadataFilter loaded) {
- return loaded instanceof CompositeMetadataFilter;
- }
-
- @Override
- boolean isComposite(Class<? extends MetadataFilter> loadedClass) {
- return CompositeMetadataFilter.class.isAssignableFrom(loadedClass);
- }
-
- @Override
- MetadataFilter preLoadOne(Class<? extends MetadataFilter> loadedClass,
String classname,
- MimeTypes mimeTypes) throws TikaException {
- // Check for classes which can't be set in config
- // Continue with normal loading
- return null;
- }
-
- @Override
- MetadataFilter createDefault(MimeTypes mimeTypes, ServiceLoader
loader) {
- return getDefaultMetadataFilter(loader);
- }
-
- //this ignores the service loader
- @Override
- MetadataFilter createComposite(List<MetadataFilter> loaded, MimeTypes
mimeTypes,
- ServiceLoader loader) {
- return new DefaultMetadataFilter(loaded);
- }
-
- @Override
- MetadataFilter createComposite(Class<? extends MetadataFilter>
metadataFilterClass,
- List<MetadataFilter>
childMetadataFilters,
- Set<Class<? extends MetadataFilter>>
excludeFilters,
- Map<String, Param> params, MimeTypes
mimeTypes,
- ServiceLoader loader)
- throws InvocationTargetException, IllegalAccessException,
InstantiationException {
- MetadataFilter metadataFilter = null;
- Constructor<? extends MetadataFilter> c;
-
- // Try the possible default and composite detector constructors
- if (metadataFilter == null) {
- try {
- c =
metadataFilterClass.getConstructor(ServiceLoader.class, Collection.class);
- metadataFilter = c.newInstance(loader, excludeFilters);
- } catch (NoSuchMethodException me) {
- me.printStackTrace();
- }
- }
- if (metadataFilter == null) {
- try {
- c = metadataFilterClass.getConstructor(List.class);
- metadataFilter = c.newInstance(childMetadataFilters);
- } catch (NoSuchMethodException me) {
- me.printStackTrace();
- }
- }
-
- return metadataFilter;
- }
-
- @Override
- MetadataFilter decorate(MetadataFilter created, Element element) {
- return created; // No decoration of MetadataFilters
- }
- }
-
}
diff --git
a/tika-core/src/main/java/org/apache/tika/metadata/filter/ClearByMimeMetadataFilter.java
b/tika-core/src/main/java/org/apache/tika/metadata/filter/ClearByMimeMetadataFilter.java
index 3998994..f196436 100644
---
a/tika-core/src/main/java/org/apache/tika/metadata/filter/ClearByMimeMetadataFilter.java
+++
b/tika-core/src/main/java/org/apache/tika/metadata/filter/ClearByMimeMetadataFilter.java
@@ -30,7 +30,7 @@ import org.apache.tika.mime.MediaType;
* mime matches the mime filter. The idea is that you might not want
* to store/transmit metadata for images or specific file types.
*/
-public class ClearByMimeMetadataFilter implements MetadataFilter {
+public class ClearByMimeMetadataFilter extends MetadataFilter {
private final Set<String> mimes;
public ClearByMimeMetadataFilter() {
@@ -55,7 +55,6 @@ public class ClearByMimeMetadataFilter implements
MetadataFilter {
for (String n : metadata.names()) {
metadata.remove(n);
}
-
}
}
diff --git
a/tika-core/src/main/java/org/apache/tika/metadata/filter/CompositeMetadataFilter.java
b/tika-core/src/main/java/org/apache/tika/metadata/filter/CompositeMetadataFilter.java
index a058163..2c7d976 100644
---
a/tika-core/src/main/java/org/apache/tika/metadata/filter/CompositeMetadataFilter.java
+++
b/tika-core/src/main/java/org/apache/tika/metadata/filter/CompositeMetadataFilter.java
@@ -21,7 +21,7 @@ import java.util.List;
import org.apache.tika.exception.TikaException;
import org.apache.tika.metadata.Metadata;
-public class CompositeMetadataFilter implements MetadataFilter {
+public class CompositeMetadataFilter extends MetadataFilter {
private final List<MetadataFilter> filters;
diff --git
a/tika-core/src/main/java/org/apache/tika/metadata/filter/ExcludeFieldMetadataFilter.java
b/tika-core/src/main/java/org/apache/tika/metadata/filter/ExcludeFieldMetadataFilter.java
index 2aa4167..59d10d9 100644
---
a/tika-core/src/main/java/org/apache/tika/metadata/filter/ExcludeFieldMetadataFilter.java
+++
b/tika-core/src/main/java/org/apache/tika/metadata/filter/ExcludeFieldMetadataFilter.java
@@ -24,7 +24,7 @@ import org.apache.tika.config.Field;
import org.apache.tika.exception.TikaException;
import org.apache.tika.metadata.Metadata;
-public class ExcludeFieldMetadataFilter implements MetadataFilter {
+public class ExcludeFieldMetadataFilter extends MetadataFilter {
private final Set<String> excludeSet;
public ExcludeFieldMetadataFilter() {
diff --git
a/tika-core/src/main/java/org/apache/tika/metadata/filter/FieldNameMappingFilter.java
b/tika-core/src/main/java/org/apache/tika/metadata/filter/FieldNameMappingFilter.java
index 891f8e8..db16f5d 100644
---
a/tika-core/src/main/java/org/apache/tika/metadata/filter/FieldNameMappingFilter.java
+++
b/tika-core/src/main/java/org/apache/tika/metadata/filter/FieldNameMappingFilter.java
@@ -16,18 +16,16 @@
*/
package org.apache.tika.metadata.filter;
-import java.util.HashMap;
-import java.util.List;
+import java.util.LinkedHashMap;
import java.util.Map;
import org.apache.tika.config.Field;
import org.apache.tika.exception.TikaException;
import org.apache.tika.metadata.Metadata;
-public class FieldNameMappingFilter implements MetadataFilter {
- private static final String MAPPING_OPERATOR = "->";
+public class FieldNameMappingFilter extends MetadataFilter {
- Map<String, String> mapping = new HashMap<>();
+ Map<String, String> mappings = new LinkedHashMap<>();
boolean excludeUnmapped = true;
@@ -35,23 +33,23 @@ public class FieldNameMappingFilter implements
MetadataFilter {
public void filter(Metadata metadata) throws TikaException {
if (excludeUnmapped) {
for (String n : metadata.names()) {
- if (mapping.containsKey(n)) {
+ if (mappings.containsKey(n)) {
String[] vals = metadata.getValues(n);
metadata.remove(n);
for (String val : vals) {
- metadata.add(mapping.get(n), val);
+ metadata.add(mappings.get(n), val);
}
} else {
- mapping.remove(n);
+ metadata.remove(n);
}
}
} else {
for (String n : metadata.names()) {
- if (mapping.containsKey(n)) {
+ if (mappings.containsKey(n)) {
String[] vals = metadata.getValues(n);
metadata.remove(n);
for (String val : vals) {
- metadata.add(mapping.get(n), val);
+ metadata.add(mappings.get(n), val);
}
}
}
@@ -72,26 +70,9 @@ public class FieldNameMappingFilter implements
MetadataFilter {
}
@Field
- public void setMappings(List<String> mappings) {
- for (String m : mappings) {
- String[] args = m.split(MAPPING_OPERATOR);
- if (args.length == 0 || args.length == 1) {
- throw new IllegalArgumentException("Can't find mapping
operator '->' in: " + m);
- } else if (args.length > 2) {
- throw new IllegalArgumentException(
- "Must have only one mapping operator. I found more
than one: " + m);
- }
- String from = args[0].trim();
- if (from.length() == 0) {
- throw new IllegalArgumentException(
- "Must contain content before the " + "mapping operator
'->'");
- }
- String to = args[1].trim();
- if (to.length() == 0) {
- throw new IllegalArgumentException(
- "Must contain content after the " + "mapping operator
'->'");
- }
- mapping.put(from, to);
+ public void setMappings(Map<String, String> mappings) {
+ for (Map.Entry<String, String> e : mappings.entrySet()) {
+ this.mappings.put(e.getKey(), e.getValue());
}
}
}
diff --git
a/tika-core/src/main/java/org/apache/tika/metadata/filter/IncludeFieldMetadataFilter.java
b/tika-core/src/main/java/org/apache/tika/metadata/filter/IncludeFieldMetadataFilter.java
index 0a82590..b75de6a 100644
---
a/tika-core/src/main/java/org/apache/tika/metadata/filter/IncludeFieldMetadataFilter.java
+++
b/tika-core/src/main/java/org/apache/tika/metadata/filter/IncludeFieldMetadataFilter.java
@@ -24,7 +24,7 @@ import org.apache.tika.config.Field;
import org.apache.tika.exception.TikaException;
import org.apache.tika.metadata.Metadata;
-public class IncludeFieldMetadataFilter implements MetadataFilter {
+public class IncludeFieldMetadataFilter extends MetadataFilter {
private final Set<String> includeSet;
public IncludeFieldMetadataFilter() {
diff --git
a/tika-core/src/main/java/org/apache/tika/metadata/filter/MetadataFilter.java
b/tika-core/src/main/java/org/apache/tika/metadata/filter/MetadataFilter.java
index 59c5f3b..21eb3ec 100644
---
a/tika-core/src/main/java/org/apache/tika/metadata/filter/MetadataFilter.java
+++
b/tika-core/src/main/java/org/apache/tika/metadata/filter/MetadataFilter.java
@@ -17,17 +17,42 @@
package org.apache.tika.metadata.filter;
+import java.io.IOException;
import java.io.Serializable;
+import org.w3c.dom.Element;
+
+import org.apache.tika.config.ConfigBase;
+import org.apache.tika.exception.TikaConfigException;
import org.apache.tika.exception.TikaException;
import org.apache.tika.metadata.Metadata;
/**
- * Filters the metadata in place
+ * Filters the metadata in place after the parse
*
* @since Apache Tika 1.25
*/
-public interface MetadataFilter extends Serializable {
+public abstract class MetadataFilter extends ConfigBase implements
Serializable {
+
+ /**
+ * Loads the metadata filter from the config file if it exists, otherwise
returns NoOpFilter
+ * @param root
+ * @return
+ * @throws TikaConfigException
+ * @throws IOException
+ */
+ public static MetadataFilter load(Element root, boolean allowMissing)
throws TikaConfigException,
+ IOException {
+ try {
+ return buildComposite("metadataFilters",
CompositeMetadataFilter.class,
+ "metadataFilter", MetadataFilter.class, root);
+ } catch (TikaConfigException e) {
+ if (allowMissing && e.getMessage().contains("could not find
metadataFilters")) {
+ return new NoOpFilter();
+ }
+ throw e;
+ }
+ }
- void filter(Metadata metadata) throws TikaException;
+ public abstract void filter(Metadata metadata) throws TikaException;
}
diff --git
a/tika-core/src/main/java/org/apache/tika/metadata/filter/NoOpFilter.java
b/tika-core/src/main/java/org/apache/tika/metadata/filter/NoOpFilter.java
index 9cd1ec3..f4e1090 100644
--- a/tika-core/src/main/java/org/apache/tika/metadata/filter/NoOpFilter.java
+++ b/tika-core/src/main/java/org/apache/tika/metadata/filter/NoOpFilter.java
@@ -23,7 +23,7 @@ import org.apache.tika.metadata.Metadata;
* This filter performs no operations on the metadata
* and leaves it untouched.
*/
-public class NoOpFilter implements MetadataFilter {
+public class NoOpFilter extends MetadataFilter {
public static NoOpFilter NOOP_FILTER = new NoOpFilter();
diff --git
a/tika-core/src/test/java/org/apache/tika/metadata/filter/MockUpperCaseFilter.java
b/tika-core/src/test/java/org/apache/tika/metadata/filter/MockUpperCaseFilter.java
index fe12b82..ac64734 100644
---
a/tika-core/src/test/java/org/apache/tika/metadata/filter/MockUpperCaseFilter.java
+++
b/tika-core/src/test/java/org/apache/tika/metadata/filter/MockUpperCaseFilter.java
@@ -24,7 +24,7 @@ import org.apache.tika.metadata.Metadata;
/**
* Mock Filter for testing uppercasing of all values
*/
-public class MockUpperCaseFilter implements MetadataFilter {
+public class MockUpperCaseFilter extends MetadataFilter {
@Override
public void filter(Metadata metadata) throws TikaException {
diff --git
a/tika-core/src/test/java/org/apache/tika/metadata/filter/TestMetadataFilter.java
b/tika-core/src/test/java/org/apache/tika/metadata/filter/TestMetadataFilter.java
index d12d7ed..d77e373 100644
---
a/tika-core/src/test/java/org/apache/tika/metadata/filter/TestMetadataFilter.java
+++
b/tika-core/src/test/java/org/apache/tika/metadata/filter/TestMetadataFilter.java
@@ -30,6 +30,7 @@ import org.junit.Test;
import org.apache.tika.config.AbstractTikaConfigTest;
import org.apache.tika.config.TikaConfig;
import org.apache.tika.metadata.Metadata;
+import org.apache.tika.metadata.TikaCoreProperties;
import org.apache.tika.mime.MediaType;
public class TestMetadataFilter extends AbstractTikaConfigTest {
@@ -153,7 +154,6 @@ public class TestMetadataFilter extends
AbstractTikaConfigTest {
MetadataFilter filter = config.getMetadataFilter();
filter.filter(metadata);
- debug(metadata);
assertEquals(0, metadata.size());
metadata.set(Metadata.CONTENT_TYPE,
MediaType.text("plain").toString());
@@ -161,6 +161,22 @@ public class TestMetadataFilter extends
AbstractTikaConfigTest {
filter.filter(metadata);
assertEquals(2, metadata.size());
assertEquals("AUTHOR", metadata.get("author"));
+ }
+
+ @Test
+ public void testFieldNameMapping() throws Exception {
+ TikaConfig config = getConfig("TIKA-3137-field-mapping.xml");
+ Metadata metadata = new Metadata();
+ metadata.set(TikaCoreProperties.TIKA_CONTENT, "quick brown fox");
+ metadata.set("author", "author");
+ metadata.set("a", "a-value");
+
+ MetadataFilter filter = config.getMetadataFilter();
+ filter.filter(metadata);
+ assertEquals("quick brown fox", metadata.get("content"));
+ assertEquals("a-value", metadata.get("b"));
+ assertNull(metadata.get("author"));
+ assertNull(metadata.get("a"));
}
}
diff --git
a/tika-core/src/test/resources/org/apache/tika/config/TIKA-3137-exclude.xml
b/tika-core/src/test/resources/org/apache/tika/config/TIKA-3137-exclude.xml
index 96dac44..95ba73b 100644
--- a/tika-core/src/test/resources/org/apache/tika/config/TIKA-3137-exclude.xml
+++ b/tika-core/src/test/resources/org/apache/tika/config/TIKA-3137-exclude.xml
@@ -19,10 +19,10 @@
<metadataFilters>
<metadataFilter
class="org.apache.tika.metadata.filter.ExcludeFieldMetadataFilter">
<params>
- <param name="exclude" type="list">
- <string>title</string>
- <string>author</string>
- </param>
+ <exclude>
+ <field>title</field>
+ <field>author</field>
+ </exclude>
</params>
</metadataFilter>
</metadataFilters>
diff --git
a/tika-core/src/test/resources/org/apache/tika/config/TIKA-3137-exclude.xml
b/tika-core/src/test/resources/org/apache/tika/config/TIKA-3137-field-mapping.xml
similarity index 69%
copy from
tika-core/src/test/resources/org/apache/tika/config/TIKA-3137-exclude.xml
copy to
tika-core/src/test/resources/org/apache/tika/config/TIKA-3137-field-mapping.xml
index 96dac44..e5118b9 100644
--- a/tika-core/src/test/resources/org/apache/tika/config/TIKA-3137-exclude.xml
+++
b/tika-core/src/test/resources/org/apache/tika/config/TIKA-3137-field-mapping.xml
@@ -17,12 +17,16 @@
-->
<properties>
<metadataFilters>
- <metadataFilter
class="org.apache.tika.metadata.filter.ExcludeFieldMetadataFilter">
+ <metadataFilter
class="org.apache.tika.metadata.filter.FieldNameMappingFilter">
<params>
- <param name="exclude" type="list">
- <string>title</string>
- <string>author</string>
- </param>
+ <excludeUnmapped>true</excludeUnmapped>
+ <mappings>
+ <mapping from="X-TIKA:content" to="content"/>
+ <mapping from="a" to="b"/>
+ <!-- note that the mapping only works once...not recursively -->
+ <mapping from="b" to="c"/>
+ <mapping from="c" to="d"/>
+ </mappings>
</params>
</metadataFilter>
</metadataFilters>
diff --git
a/tika-core/src/test/resources/org/apache/tika/config/TIKA-3137-include-uc.xml
b/tika-core/src/test/resources/org/apache/tika/config/TIKA-3137-include-uc.xml
index f960e94..15eb62c 100644
---
a/tika-core/src/test/resources/org/apache/tika/config/TIKA-3137-include-uc.xml
+++
b/tika-core/src/test/resources/org/apache/tika/config/TIKA-3137-include-uc.xml
@@ -19,10 +19,10 @@
<metadataFilters>
<metadataFilter
class="org.apache.tika.metadata.filter.IncludeFieldMetadataFilter">
<params>
- <param name="include" type="list">
- <string>title</string>
- <string>author</string>
- </param>
+ <include>
+ <field>title</field>
+ <field>author</field>
+ </include>
</params>
</metadataFilter>
<metadataFilter
class="org.apache.tika.metadata.filter.MockUpperCaseFilter"/>
diff --git
a/tika-core/src/test/resources/org/apache/tika/config/TIKA-3137-include.xml
b/tika-core/src/test/resources/org/apache/tika/config/TIKA-3137-include.xml
index 8832915..f8ffa90 100644
--- a/tika-core/src/test/resources/org/apache/tika/config/TIKA-3137-include.xml
+++ b/tika-core/src/test/resources/org/apache/tika/config/TIKA-3137-include.xml
@@ -19,10 +19,10 @@
<metadataFilters>
<metadataFilter
class="org.apache.tika.metadata.filter.IncludeFieldMetadataFilter">
<params>
- <param name="include" type="list">
- <string>title</string>
- <string>author</string>
- </param>
+ <include>
+ <field>title</field>
+ <field>author</field>
+ </include>
</params>
</metadataFilter>
</metadataFilters>
diff --git
a/tika-core/src/test/resources/org/apache/tika/config/TIKA-3137-mimes-uc.xml
b/tika-core/src/test/resources/org/apache/tika/config/TIKA-3137-mimes-uc.xml
index a151665..6278421 100644
--- a/tika-core/src/test/resources/org/apache/tika/config/TIKA-3137-mimes-uc.xml
+++ b/tika-core/src/test/resources/org/apache/tika/config/TIKA-3137-mimes-uc.xml
@@ -19,10 +19,10 @@
<metadataFilters>
<metadataFilter
class="org.apache.tika.metadata.filter.ClearByMimeMetadataFilter">
<params>
- <param name="mimes" type="list">
- <string>image/jpeg</string>
- <string>application/pdf</string>
- </param>
+ <mimes>
+ <mime>image/jpeg</mime>
+ <mime>application/pdf</mime>
+ </mimes>
</params>
</metadataFilter>
<metadataFilter
class="org.apache.tika.metadata.filter.MockUpperCaseFilter"/>
diff --git
a/tika-eval/tika-eval-core/src/main/java/org/apache/tika/eval/core/metadata/TikaEvalMetadataFilter.java
b/tika-eval/tika-eval-core/src/main/java/org/apache/tika/eval/core/metadata/TikaEvalMetadataFilter.java
index bb5a848..0ac65d2 100644
---
a/tika-eval/tika-eval-core/src/main/java/org/apache/tika/eval/core/metadata/TikaEvalMetadataFilter.java
+++
b/tika-eval/tika-eval-core/src/main/java/org/apache/tika/eval/core/metadata/TikaEvalMetadataFilter.java
@@ -36,7 +36,7 @@ import org.apache.tika.metadata.Property;
import org.apache.tika.metadata.TikaCoreProperties;
import org.apache.tika.metadata.filter.MetadataFilter;
-public class TikaEvalMetadataFilter implements MetadataFilter {
+public class TikaEvalMetadataFilter extends MetadataFilter {
public static String TIKA_EVAL_NS = "tika-eval" +
TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER;
diff --git
a/tika-parsers/tika-parsers-classic/tika-parsers-classic-package/src/test/java/org/apache/tika/parser/RecursiveParserWrapperTest.java
b/tika-parsers/tika-parsers-classic/tika-parsers-classic-package/src/test/java/org/apache/tika/parser/RecursiveParserWrapperTest.java
index 0222f65..86915c5 100644
---
a/tika-parsers/tika-parsers-classic/tika-parsers-classic-package/src/test/java/org/apache/tika/parser/RecursiveParserWrapperTest.java
+++
b/tika-parsers/tika-parsers-classic/tika-parsers-classic-package/src/test/java/org/apache/tika/parser/RecursiveParserWrapperTest.java
@@ -318,6 +318,7 @@ public class RecursiveParserWrapperTest extends TikaTest {
Metadata metadata = new Metadata();
TikaConfig tikaConfig = null;
Parser p = null;
+
System.out.println(getResourceAsFile("TIKA-3137-include.xml").getAbsolutePath());
try (InputStream is = getResourceAsStream("TIKA-3137-include.xml")) {
tikaConfig = new TikaConfig(is);
p = new AutoDetectParser(tikaConfig);
diff --git
a/tika-parsers/tika-parsers-classic/tika-parsers-classic-package/src/test/resources/org/apache/tika/parser/TIKA-3137-include.xml
b/tika-parsers/tika-parsers-classic/tika-parsers-classic-package/src/test/resources/org/apache/tika/parser/TIKA-3137-include.xml
index b99af0b..056e64c 100644
---
a/tika-parsers/tika-parsers-classic/tika-parsers-classic-package/src/test/resources/org/apache/tika/parser/TIKA-3137-include.xml
+++
b/tika-parsers/tika-parsers-classic/tika-parsers-classic-package/src/test/resources/org/apache/tika/parser/TIKA-3137-include.xml
@@ -19,19 +19,19 @@
<metadataFilters>
<metadataFilter
class="org.apache.tika.metadata.filter.IncludeFieldMetadataFilter">
<params>
- <param name="include" type="list">
- <string>X-TIKA:content</string>
- <string>extended-properties:Application</string>
- <string>Content-Type</string>
- </param>
+ <include>
+ <field>X-TIKA:content</field>
+ <field>extended-properties:Application</field>
+ <field>Content-Type</field>
+ </include>
</params>
</metadataFilter>
<metadataFilter
class="org.apache.tika.metadata.filter.ClearByMimeMetadataFilter">
<params>
- <param name="mimes" type="list">
- <string>image/emf</string>
- <string>text/plain</string>
- </param>
+ <mimes>
+ <mime>image/emf</mime>
+ <mime>text/plain</mime>
+ </mimes>
</params>
</metadataFilter>
</metadataFilters>
diff --git
a/tika-pipes/tika-emitters/tika-emitter-solr/src/test/java/org/apache/tika/pipes/emitter/solr/TestBasic.java
b/tika-pipes/tika-emitters/tika-emitter-solr/src/test/java/org/apache/tika/pipes/emitter/solr/TestBasic.java
index 9afcc51..d88947e 100644
---
a/tika-pipes/tika-emitters/tika-emitter-solr/src/test/java/org/apache/tika/pipes/emitter/solr/TestBasic.java
+++
b/tika-pipes/tika-emitters/tika-emitter-solr/src/test/java/org/apache/tika/pipes/emitter/solr/TestBasic.java
@@ -34,7 +34,7 @@ import org.apache.tika.pipes.emitter.EmitKey;
import org.apache.tika.pipes.emitter.Emitter;
import org.apache.tika.pipes.emitter.EmitterManager;
-@Ignore("requires solr to be up and running")
+@Ignore("requires solr to be up and running; please dockerize some tests,
please, please")
public class TestBasic {
@Test
diff --git
a/tika-pipes/tika-emitters/tika-emitter-solr/src/test/resources/tika-config-simple-emitter.xml
b/tika-pipes/tika-emitters/tika-emitter-solr/src/test/resources/tika-config-simple-emitter.xml
index ba68b7f..c52da5e 100644
---
a/tika-pipes/tika-emitters/tika-emitter-solr/src/test/resources/tika-config-simple-emitter.xml
+++
b/tika-pipes/tika-emitters/tika-emitter-solr/src/test/resources/tika-config-simple-emitter.xml
@@ -18,37 +18,35 @@
under the License.
-->
<properties>
- <metadataFilters>
- <metadataFilter
class="org.apache.tika.metadata.filter.FieldNameMappingFilter">
- <params>
- <param name="mappings" type="list">
- <string>X-TIKA:content->content</string>
-
<string>X-TIKA:embedded_resource_path->embedded_path</string>
- <string>Content-Length->length</string>
- <string>dc:creator->creators</string>
- <string>dc:title->title</string>
- </param>
- </params>
- </metadataFilter>
- </metadataFilters>
- <emitters>
- <emitter class="org.apache.tika.pipes.emitter.solr.SolrEmitter">
- <params>
- <name>solr1</name>
- <url>http://localhost:8983/solr/tika-test</url>
- <attachmentStrategy>concatenate-content</attachmentStrategy>
- <contentField>content</contentField>
- <commitWithin>10</commitWithin>
- </params>
- </emitter>
- <emitter class="org.apache.tika.pipes.emitter.solr.SolrEmitter">
- <params>
- <name>solr2</name>
- <url>http://localhost:8983/solr/tika-test</url>
- <attachmentStrategy>parent-child</attachmentStrategy>
- <contentField>content</contentField>
- <commitWithin>10</commitWithin>
- </params>
- </emitter>
- </emitters>
+ <metadataFilters>
+ <metadataFilter
class="org.apache.tika.metadata.filter.FieldNameMappingFilter">
+ <mappings>
+ <mapping from="X-TIKA:content" to="content"/>
+ <mapping from="X-TIKA:embedded_resource_path" to="embedded_path"/>
+ <mapping from="Content-Length" to="length"/>
+ <mapping from="dc:creator" to="creators"/>
+ <mapping from="dc:title" to="title"/>
+ </mappings>
+ </metadataFilter>
+ </metadataFilters>
+ <emitters>
+ <emitter class="org.apache.tika.pipes.emitter.solr.SolrEmitter">
+ <params>
+ <name>solr1</name>
+ <url>http://localhost:8983/solr/tika-test</url>
+ <attachmentStrategy>concatenate-content</attachmentStrategy>
+ <contentField>content</contentField>
+ <commitWithin>10</commitWithin>
+ </params>
+ </emitter>
+ <emitter class="org.apache.tika.pipes.emitter.solr.SolrEmitter">
+ <params>
+ <name>solr2</name>
+ <url>http://localhost:8983/solr/tika-test</url>
+ <attachmentStrategy>parent-child</attachmentStrategy>
+ <contentField>content</contentField>
+ <commitWithin>10</commitWithin>
+ </params>
+ </emitter>
+ </emitters>
</properties>
\ No newline at end of file
diff --git
a/tika-server/tika-server-classic/src/test/resources/config/TIKA-3137-include.xml
b/tika-server/tika-server-classic/src/test/resources/config/TIKA-3137-include.xml
index 5e563bf..da1182a 100644
---
a/tika-server/tika-server-classic/src/test/resources/config/TIKA-3137-include.xml
+++
b/tika-server/tika-server-classic/src/test/resources/config/TIKA-3137-include.xml
@@ -19,19 +19,19 @@
<metadataFilters>
<metadataFilter
class="org.apache.tika.metadata.filter.IncludeFieldMetadataFilter">
<params>
- <param name="include" type="list">
- <string>X-TIKA:content</string>
- <string>extended-properties:Application</string>
- <string>Content-Type</string>
- </param>
+ <include>
+ <field>X-TIKA:content</field>
+ <field>extended-properties:Application</field>
+ <field>Content-Type</field>
+ </include>
</params>
</metadataFilter>
<metadataFilter
class="org.apache.tika.metadata.filter.ClearByMimeMetadataFilter">
<params>
- <param name="mimes" type="list">
- <string>image/emf</string>
- <string>text/plain</string>
- </param>
+ <mimes>
+ <mime>image/emf</mime>
+ <mime>text/plain</mime>
+ </mimes>
</params>
</metadataFilter>
</metadataFilters>
diff --git
a/tika-server/tika-server-client/src/test/resources/tika-config-simple-fs-emitter.xml
b/tika-server/tika-server-client/src/test/resources/tika-config-simple-fs-emitter.xml
index 0b029d8..62e692f 100644
---
a/tika-server/tika-server-client/src/test/resources/tika-config-simple-fs-emitter.xml
+++
b/tika-server/tika-server-client/src/test/resources/tika-config-simple-fs-emitter.xml
@@ -18,61 +18,61 @@
under the License.
-->
<properties>
- <service-loader initializableProblemHandler="throw"/>
- <pipesIterators>
- <pipesIterator
class="org.apache.tika.pipes.pipesiterator.FileSystemPipesIterator">
- <params>
- <fetcherName>fs</fetcherName>
- <basePath>fix</basePath>
- </params>
- </pipesIterator>
- </pipesIterators>
- <fetchers>
- <fetcher class="org.apache.tika.pipes.fetcher.FileSystemFetcher">
- <params>
- <name>fs</name>
- <basePath>fix</basePath>
- </params>
- </fetcher>
- </fetchers>
- <metadataFilters>
- <metadataFilter
class="org.apache.tika.metadata.filter.FieldNameMappingFilter">
- <params>
- <param name="mappings" type="list">
- <string>X-TIKA:content->content</string>
-
<string>X-TIKA:embedded_resource_path->embedded_path</string>
- <string>Content-Length->length</string>
- <string>dc:creator->creators</string>
- <string>dc:title->title</string>
- </param>
- </params>
- </metadataFilter>
- </metadataFilters>
- <emitters>
- <emitter class="org.apache.tika.pipes.emitter.fs.FileSystemEmitter">
- <params>
- <name>fs</name>
- <basePath>fix</basePath>
- </params>
- </emitter>
- <!--
- <emitter class="org.apache.tika.pipes.emitter.solr.SolrEmitter">
- <params>
- <param name="name" type="string">solr1</param>
- <param name="url"
type="string">http://localhost:8983/solr/tika-test</param>
- <param name="attachmentStrategy"
type="string">concatenate-content</param>
- <param name="contentField" type="string">content</param>
- <param name="commitWithin" type="int">10</param>
- </params>
- </emitter>
- <emitter class="org.apache.tika.pipes.emitter.solr.SolrEmitter">
- <params>
- <param name="name" type="string">solr2</param>
- <param name="url"
type="string">http://localhost:8983/solr/tika-test</param>
- <param name="attachmentStrategy"
type="string">parent-child</param>
- <param name="contentField" type="string">content</param>
- <param name="commitWithin" type="int">10</param>
- </params>
- </emitter>-->
- </emitters>
+ <service-loader initializableProblemHandler="throw"/>
+ <pipesIterators>
+ <pipesIterator
class="org.apache.tika.pipes.pipesiterator.FileSystemPipesIterator">
+ <params>
+ <fetcherName>fs</fetcherName>
+ <basePath>fix</basePath>
+ </params>
+ </pipesIterator>
+ </pipesIterators>
+ <fetchers>
+ <fetcher class="org.apache.tika.pipes.fetcher.FileSystemFetcher">
+ <params>
+ <name>fs</name>
+ <basePath>fix</basePath>
+ </params>
+ </fetcher>
+ </fetchers>
+ <metadataFilters>
+ <metadataFilter
class="org.apache.tika.metadata.filter.FieldNameMappingFilter">
+ <params>
+ <mappings>
+ <mapping from="X-TIKA:content" to="content"/>
+ <mapping from="X-TIKA:embedded_resource_path" to="embedded_path"/>
+ <mapping from="Content-Length" to="length"/>
+ <mapping from="dc:creator" to="creators"/>
+ <mapping from="dc:title" to="title"/>
+ </mappings>
+ </params>
+ </metadataFilter>
+ </metadataFilters>
+ <emitters>
+ <emitter class="org.apache.tika.pipes.emitter.fs.FileSystemEmitter">
+ <params>
+ <name>fs</name>
+ <basePath>fix</basePath>
+ </params>
+ </emitter>
+ <!--
+ <emitter class="org.apache.tika.pipes.emitter.solr.SolrEmitter">
+ <params>
+ <param name="name" type="string">solr1</param>
+ <param name="url"
type="string">http://localhost:8983/solr/tika-test</param>
+ <param name="attachmentStrategy"
type="string">concatenate-content</param>
+ <param name="contentField" type="string">content</param>
+ <param name="commitWithin" type="int">10</param>
+ </params>
+ </emitter>
+ <emitter class="org.apache.tika.pipes.emitter.solr.SolrEmitter">
+ <params>
+ <param name="name" type="string">solr2</param>
+ <param name="url"
type="string">http://localhost:8983/solr/tika-test</param>
+ <param name="attachmentStrategy" type="string">parent-child</param>
+ <param name="contentField" type="string">content</param>
+ <param name="commitWithin" type="int">10</param>
+ </params>
+ </emitter>-->
+ </emitters>
</properties>
\ No newline at end of file
diff --git
a/tika-server/tika-server-core/src/test/resources/configs/metadata-filter-include.xml
b/tika-server/tika-server-core/src/test/resources/configs/metadata-filter-include.xml
index 3a7a7c1..96ce8e5 100644
---
a/tika-server/tika-server-core/src/test/resources/configs/metadata-filter-include.xml
+++
b/tika-server/tika-server-core/src/test/resources/configs/metadata-filter-include.xml
@@ -19,11 +19,11 @@
<metadataFilters>
<metadataFilter
class="org.apache.tika.metadata.filter.IncludeFieldMetadataFilter">
<params>
- <param name="include" type="list">
- <string>X-TIKA:content</string>
- <string>extended-properties:Application</string>
- <string>Content-Type</string>
- </param>
+ <include>
+ <field>X-TIKA:content</field>
+ <field>extended-properties:Application</field>
+ <field>Content-Type</field>
+ </include>
</params>
</metadataFilter>
</metadataFilters>