This is an automated email from the ASF dual-hosted git repository.

tallison pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tika.git


The following commit(s) were added to refs/heads/main by this push:
     new 47d54f6  TIKA-3393 -- refactor metadata filters in 2.x
47d54f6 is described below

commit 47d54f6604fea7d8f14dcebd57d421b75d78a6cf
Author: tallison <[email protected]>
AuthorDate: Tue May 11 13:29:03 2021 -0400

    TIKA-3393 -- refactor metadata filters in 2.x
---
 .../java/org/apache/tika/config/ConfigBase.java    |  16 ++-
 .../java/org/apache/tika/config/TikaConfig.java    | 105 +------------------
 .../metadata/filter/ClearByMimeMetadataFilter.java |   3 +-
 .../metadata/filter/CompositeMetadataFilter.java   |   2 +-
 .../filter/ExcludeFieldMetadataFilter.java         |   2 +-
 .../metadata/filter/FieldNameMappingFilter.java    |  41 ++------
 .../filter/IncludeFieldMetadataFilter.java         |   2 +-
 .../tika/metadata/filter/MetadataFilter.java       |  31 +++++-
 .../apache/tika/metadata/filter/NoOpFilter.java    |   2 +-
 .../tika/metadata/filter/MockUpperCaseFilter.java  |   2 +-
 .../tika/metadata/filter/TestMetadataFilter.java   |  18 +++-
 .../org/apache/tika/config/TIKA-3137-exclude.xml   |   8 +-
 ...137-exclude.xml => TIKA-3137-field-mapping.xml} |  14 ++-
 .../apache/tika/config/TIKA-3137-include-uc.xml    |   8 +-
 .../org/apache/tika/config/TIKA-3137-include.xml   |   8 +-
 .../org/apache/tika/config/TIKA-3137-mimes-uc.xml  |   8 +-
 .../eval/core/metadata/TikaEvalMetadataFilter.java |   2 +-
 .../tika/parser/RecursiveParserWrapperTest.java    |   1 +
 .../org/apache/tika/parser/TIKA-3137-include.xml   |  18 ++--
 .../apache/tika/pipes/emitter/solr/TestBasic.java  |   2 +-
 .../test/resources/tika-config-simple-emitter.xml  |  64 ++++++------
 .../test/resources/config/TIKA-3137-include.xml    |  18 ++--
 .../resources/tika-config-simple-fs-emitter.xml    | 114 ++++++++++-----------
 .../resources/configs/metadata-filter-include.xml  |  10 +-
 24 files changed, 219 insertions(+), 280 deletions(-)

diff --git a/tika-core/src/main/java/org/apache/tika/config/ConfigBase.java 
b/tika-core/src/main/java/org/apache/tika/config/ConfigBase.java
index e332807..7b1b436 100644
--- a/tika-core/src/main/java/org/apache/tika/config/ConfigBase.java
+++ b/tika-core/src/main/java/org/apache/tika/config/ConfigBase.java
@@ -23,13 +23,14 @@ import java.lang.reflect.InvocationTargetException;
 import java.lang.reflect.Method;
 import java.util.ArrayList;
 import java.util.Collections;
-import java.util.HashMap;
 import java.util.HashSet;
+import java.util.LinkedHashMap;
 import java.util.List;
 import java.util.Locale;
 import java.util.Map;
 import java.util.Set;
 
+import org.w3c.dom.Element;
 import org.w3c.dom.NamedNodeMap;
 import org.w3c.dom.Node;
 import org.w3c.dom.NodeList;
@@ -101,7 +102,7 @@ public abstract class ConfigBase {
     protected static <P, T> P buildComposite(String compositeElementName, 
Class<P> compositeClass,
                                              String itemName, Class<T> 
itemClass, InputStream is)
             throws TikaConfigException, IOException {
-        Node properties = null;
+        Element properties = null;
         try {
             properties = XMLReaderUtils.buildDOM(is).getDocumentElement();
         } catch (SAXException e) {
@@ -109,6 +110,14 @@ public abstract class ConfigBase {
         } catch (TikaException e) {
             throw new TikaConfigException("problem loading xml to dom", e);
         }
+        return buildComposite(compositeElementName, compositeClass, itemName, 
itemClass,
+                properties);
+    }
+
+    protected static <P, T> P buildComposite(String compositeElementName, 
Class<P> compositeClass,
+                String itemName, Class<T> itemClass, Element properties) 
throws TikaConfigException,
+            IOException {
+
         if (!properties.getLocalName().equals("properties")) {
             throw new TikaConfigException("expect properties as root node");
         }
@@ -264,7 +273,8 @@ public abstract class ConfigBase {
     private static void tryToSetMap(Object object, Node param) throws 
TikaConfigException {
         String name = param.getLocalName();
         //only supports string, string at this point
-        Map<String, String> map = new HashMap<>();
+        //use LinkedHashMap to keep insertion order!
+        Map<String, String> map = new LinkedHashMap<>();
         NodeList nodeList = param.getChildNodes();
         for (int i = 0; i < nodeList.getLength(); i++) {
             Node n = nodeList.item(i);
diff --git a/tika-core/src/main/java/org/apache/tika/config/TikaConfig.java 
b/tika-core/src/main/java/org/apache/tika/config/TikaConfig.java
index a561ca3..800b8e0 100644
--- a/tika-core/src/main/java/org/apache/tika/config/TikaConfig.java
+++ b/tika-core/src/main/java/org/apache/tika/config/TikaConfig.java
@@ -58,9 +58,8 @@ import org.apache.tika.exception.TikaConfigException;
 import org.apache.tika.exception.TikaException;
 import org.apache.tika.language.translate.DefaultTranslator;
 import org.apache.tika.language.translate.Translator;
-import org.apache.tika.metadata.filter.CompositeMetadataFilter;
-import org.apache.tika.metadata.filter.DefaultMetadataFilter;
 import org.apache.tika.metadata.filter.MetadataFilter;
+import org.apache.tika.metadata.filter.NoOpFilter;
 import org.apache.tika.mime.MediaType;
 import org.apache.tika.mime.MediaTypeRegistry;
 import org.apache.tika.mime.MimeTypeException;
@@ -149,7 +148,6 @@ public class TikaConfig {
         TranslatorXmlLoader translatorLoader = new TranslatorXmlLoader();
         ExecutorServiceXmlLoader executorLoader = new 
ExecutorServiceXmlLoader();
         EncodingDetectorXmlLoader encodingDetectorXmlLoader = new 
EncodingDetectorXmlLoader();
-        MetadataFilterXmlLoader metadataFilterXmlLoader = new 
MetadataFilterXmlLoader();
         updateXMLReaderUtils(element);
         this.mimeTypes = typesFromDomElement(element);
         this.detector = detectorLoader.loadOverall(element, mimeTypes, loader);
@@ -159,7 +157,7 @@ public class TikaConfig {
         this.parser = parserLoader.loadOverall(element, mimeTypes, loader);
         this.translator = translatorLoader.loadOverall(element, mimeTypes, 
loader);
         this.executorService = executorLoader.loadOverall(element, mimeTypes, 
loader);
-        this.metadataFilter = metadataFilterXmlLoader.loadOverall(element, 
mimeTypes, loader);
+        this.metadataFilter = MetadataFilter.load(element, true);
         this.serviceLoader = loader;
         TIMES_INSTANTIATED.incrementAndGet();
     }
@@ -184,7 +182,7 @@ public class TikaConfig {
         this.parser = getDefaultParser(mimeTypes, serviceLoader, 
encodingDetector);
         this.translator = getDefaultTranslator(serviceLoader);
         this.executorService = getDefaultExecutorService();
-        this.metadataFilter = getDefaultMetadataFilter(serviceLoader);
+        this.metadataFilter = new NoOpFilter();
         TIMES_INSTANTIATED.incrementAndGet();
     }
 
@@ -220,7 +218,7 @@ public class TikaConfig {
             this.detector = getDefaultDetector(mimeTypes, serviceLoader);
             this.translator = getDefaultTranslator(serviceLoader);
             this.executorService = getDefaultExecutorService();
-            this.metadataFilter = getDefaultMetadataFilter(serviceLoader);
+            this.metadataFilter = new NoOpFilter();
         } else {
             ServiceLoader tmpServiceLoader = new ServiceLoader();
             try (InputStream stream = getConfigInputStream(config, 
tmpServiceLoader)) {
@@ -231,7 +229,6 @@ public class TikaConfig {
                 EncodingDetectorXmlLoader encodingDetectorLoader = new 
EncodingDetectorXmlLoader();
                 TranslatorXmlLoader translatorLoader = new 
TranslatorXmlLoader();
                 ExecutorServiceXmlLoader executorLoader = new 
ExecutorServiceXmlLoader();
-                MetadataFilterXmlLoader metadataFilterXmlLoader = new 
MetadataFilterXmlLoader();
 
                 this.mimeTypes = typesFromDomElement(element);
                 this.encodingDetector =
@@ -244,8 +241,7 @@ public class TikaConfig {
                 this.translator = translatorLoader.loadOverall(element, 
mimeTypes, serviceLoader);
                 this.executorService =
                         executorLoader.loadOverall(element, mimeTypes, 
serviceLoader);
-                this.metadataFilter =
-                        metadataFilterXmlLoader.loadOverall(element, 
mimeTypes, serviceLoader);
+                this.metadataFilter = MetadataFilter.load(element, true);
             } catch (SAXException e) {
                 throw new TikaException("Specified Tika configuration has 
syntax errors: " + config,
                         e);
@@ -279,10 +275,6 @@ public class TikaConfig {
         return new SimpleThreadPoolExecutor();
     }
 
-    private static MetadataFilter getDefaultMetadataFilter(ServiceLoader 
loader) {
-        return new DefaultMetadataFilter(loader);
-    }
-
     private static InputStream getConfigInputStream(String config, 
ServiceLoader serviceLoader)
             throws TikaException, IOException {
         InputStream stream = null;
@@ -1310,91 +1302,4 @@ public class TikaConfig {
         }
     }
 
-    private static class MetadataFilterXmlLoader extends 
XmlLoader<MetadataFilter, MetadataFilter> {
-
-        boolean supportsComposite() {
-            return true;
-        }
-
-        String getParentTagName() {
-            return "metadataFilters";
-        }
-
-        String getLoaderTagName() {
-            return "metadataFilter";
-        }
-
-        @Override
-        Class<? extends MetadataFilter> getLoaderClass() {
-            return MetadataFilter.class;
-        }
-
-
-        @Override
-        boolean isComposite(MetadataFilter loaded) {
-            return loaded instanceof CompositeMetadataFilter;
-        }
-
-        @Override
-        boolean isComposite(Class<? extends MetadataFilter> loadedClass) {
-            return CompositeMetadataFilter.class.isAssignableFrom(loadedClass);
-        }
-
-        @Override
-        MetadataFilter preLoadOne(Class<? extends MetadataFilter> loadedClass, 
String classname,
-                                  MimeTypes mimeTypes) throws TikaException {
-            // Check for classes which can't be set in config
-            // Continue with normal loading
-            return null;
-        }
-
-        @Override
-        MetadataFilter createDefault(MimeTypes mimeTypes, ServiceLoader 
loader) {
-            return getDefaultMetadataFilter(loader);
-        }
-
-        //this ignores the service loader
-        @Override
-        MetadataFilter createComposite(List<MetadataFilter> loaded, MimeTypes 
mimeTypes,
-                                       ServiceLoader loader) {
-            return new DefaultMetadataFilter(loaded);
-        }
-
-        @Override
-        MetadataFilter createComposite(Class<? extends MetadataFilter> 
metadataFilterClass,
-                                       List<MetadataFilter> 
childMetadataFilters,
-                                       Set<Class<? extends MetadataFilter>> 
excludeFilters,
-                                       Map<String, Param> params, MimeTypes 
mimeTypes,
-                                       ServiceLoader loader)
-                throws InvocationTargetException, IllegalAccessException, 
InstantiationException {
-            MetadataFilter metadataFilter = null;
-            Constructor<? extends MetadataFilter> c;
-
-            // Try the possible default and composite detector constructors
-            if (metadataFilter == null) {
-                try {
-                    c = 
metadataFilterClass.getConstructor(ServiceLoader.class, Collection.class);
-                    metadataFilter = c.newInstance(loader, excludeFilters);
-                } catch (NoSuchMethodException me) {
-                    me.printStackTrace();
-                }
-            }
-            if (metadataFilter == null) {
-                try {
-                    c = metadataFilterClass.getConstructor(List.class);
-                    metadataFilter = c.newInstance(childMetadataFilters);
-                } catch (NoSuchMethodException me) {
-                    me.printStackTrace();
-                }
-            }
-
-            return metadataFilter;
-        }
-
-        @Override
-        MetadataFilter decorate(MetadataFilter created, Element element) {
-            return created; // No decoration of MetadataFilters
-        }
-    }
-
 }
diff --git 
a/tika-core/src/main/java/org/apache/tika/metadata/filter/ClearByMimeMetadataFilter.java
 
b/tika-core/src/main/java/org/apache/tika/metadata/filter/ClearByMimeMetadataFilter.java
index 3998994..f196436 100644
--- 
a/tika-core/src/main/java/org/apache/tika/metadata/filter/ClearByMimeMetadataFilter.java
+++ 
b/tika-core/src/main/java/org/apache/tika/metadata/filter/ClearByMimeMetadataFilter.java
@@ -30,7 +30,7 @@ import org.apache.tika.mime.MediaType;
  * mime matches the mime filter.  The idea is that you might not want
  * to store/transmit metadata for images or specific file types.
  */
-public class ClearByMimeMetadataFilter implements MetadataFilter {
+public class ClearByMimeMetadataFilter extends MetadataFilter {
     private final Set<String> mimes;
 
     public ClearByMimeMetadataFilter() {
@@ -55,7 +55,6 @@ public class ClearByMimeMetadataFilter implements 
MetadataFilter {
             for (String n : metadata.names()) {
                 metadata.remove(n);
             }
-
         }
     }
 
diff --git 
a/tika-core/src/main/java/org/apache/tika/metadata/filter/CompositeMetadataFilter.java
 
b/tika-core/src/main/java/org/apache/tika/metadata/filter/CompositeMetadataFilter.java
index a058163..2c7d976 100644
--- 
a/tika-core/src/main/java/org/apache/tika/metadata/filter/CompositeMetadataFilter.java
+++ 
b/tika-core/src/main/java/org/apache/tika/metadata/filter/CompositeMetadataFilter.java
@@ -21,7 +21,7 @@ import java.util.List;
 import org.apache.tika.exception.TikaException;
 import org.apache.tika.metadata.Metadata;
 
-public class CompositeMetadataFilter implements MetadataFilter {
+public class CompositeMetadataFilter extends MetadataFilter {
 
     private final List<MetadataFilter> filters;
 
diff --git 
a/tika-core/src/main/java/org/apache/tika/metadata/filter/ExcludeFieldMetadataFilter.java
 
b/tika-core/src/main/java/org/apache/tika/metadata/filter/ExcludeFieldMetadataFilter.java
index 2aa4167..59d10d9 100644
--- 
a/tika-core/src/main/java/org/apache/tika/metadata/filter/ExcludeFieldMetadataFilter.java
+++ 
b/tika-core/src/main/java/org/apache/tika/metadata/filter/ExcludeFieldMetadataFilter.java
@@ -24,7 +24,7 @@ import org.apache.tika.config.Field;
 import org.apache.tika.exception.TikaException;
 import org.apache.tika.metadata.Metadata;
 
-public class ExcludeFieldMetadataFilter implements MetadataFilter {
+public class ExcludeFieldMetadataFilter extends MetadataFilter {
     private final Set<String> excludeSet;
 
     public ExcludeFieldMetadataFilter() {
diff --git 
a/tika-core/src/main/java/org/apache/tika/metadata/filter/FieldNameMappingFilter.java
 
b/tika-core/src/main/java/org/apache/tika/metadata/filter/FieldNameMappingFilter.java
index 891f8e8..db16f5d 100644
--- 
a/tika-core/src/main/java/org/apache/tika/metadata/filter/FieldNameMappingFilter.java
+++ 
b/tika-core/src/main/java/org/apache/tika/metadata/filter/FieldNameMappingFilter.java
@@ -16,18 +16,16 @@
  */
 package org.apache.tika.metadata.filter;
 
-import java.util.HashMap;
-import java.util.List;
+import java.util.LinkedHashMap;
 import java.util.Map;
 
 import org.apache.tika.config.Field;
 import org.apache.tika.exception.TikaException;
 import org.apache.tika.metadata.Metadata;
 
-public class FieldNameMappingFilter implements MetadataFilter {
-    private static final String MAPPING_OPERATOR = "->";
+public class FieldNameMappingFilter extends MetadataFilter {
 
-    Map<String, String> mapping = new HashMap<>();
+    Map<String, String> mappings = new LinkedHashMap<>();
 
     boolean excludeUnmapped = true;
 
@@ -35,23 +33,23 @@ public class FieldNameMappingFilter implements 
MetadataFilter {
     public void filter(Metadata metadata) throws TikaException {
         if (excludeUnmapped) {
             for (String n : metadata.names()) {
-                if (mapping.containsKey(n)) {
+                if (mappings.containsKey(n)) {
                     String[] vals = metadata.getValues(n);
                     metadata.remove(n);
                     for (String val : vals) {
-                        metadata.add(mapping.get(n), val);
+                        metadata.add(mappings.get(n), val);
                     }
                 } else {
-                    mapping.remove(n);
+                    metadata.remove(n);
                 }
             }
         } else {
             for (String n : metadata.names()) {
-                if (mapping.containsKey(n)) {
+                if (mappings.containsKey(n)) {
                     String[] vals = metadata.getValues(n);
                     metadata.remove(n);
                     for (String val : vals) {
-                        metadata.add(mapping.get(n), val);
+                        metadata.add(mappings.get(n), val);
                     }
                 }
             }
@@ -72,26 +70,9 @@ public class FieldNameMappingFilter implements 
MetadataFilter {
     }
 
     @Field
-    public void setMappings(List<String> mappings) {
-        for (String m : mappings) {
-            String[] args = m.split(MAPPING_OPERATOR);
-            if (args.length == 0 || args.length == 1) {
-                throw new IllegalArgumentException("Can't find mapping 
operator '->' in: " + m);
-            } else if (args.length > 2) {
-                throw new IllegalArgumentException(
-                        "Must have only one mapping operator. I found more 
than one: " + m);
-            }
-            String from = args[0].trim();
-            if (from.length() == 0) {
-                throw new IllegalArgumentException(
-                        "Must contain content before the " + "mapping operator 
'->'");
-            }
-            String to = args[1].trim();
-            if (to.length() == 0) {
-                throw new IllegalArgumentException(
-                        "Must contain content after the " + "mapping operator 
'->'");
-            }
-            mapping.put(from, to);
+    public void setMappings(Map<String, String> mappings) {
+        for (Map.Entry<String, String> e : mappings.entrySet()) {
+            this.mappings.put(e.getKey(), e.getValue());
         }
     }
 }
diff --git 
a/tika-core/src/main/java/org/apache/tika/metadata/filter/IncludeFieldMetadataFilter.java
 
b/tika-core/src/main/java/org/apache/tika/metadata/filter/IncludeFieldMetadataFilter.java
index 0a82590..b75de6a 100644
--- 
a/tika-core/src/main/java/org/apache/tika/metadata/filter/IncludeFieldMetadataFilter.java
+++ 
b/tika-core/src/main/java/org/apache/tika/metadata/filter/IncludeFieldMetadataFilter.java
@@ -24,7 +24,7 @@ import org.apache.tika.config.Field;
 import org.apache.tika.exception.TikaException;
 import org.apache.tika.metadata.Metadata;
 
-public class IncludeFieldMetadataFilter implements MetadataFilter {
+public class IncludeFieldMetadataFilter extends MetadataFilter {
     private final Set<String> includeSet;
 
     public IncludeFieldMetadataFilter() {
diff --git 
a/tika-core/src/main/java/org/apache/tika/metadata/filter/MetadataFilter.java 
b/tika-core/src/main/java/org/apache/tika/metadata/filter/MetadataFilter.java
index 59c5f3b..21eb3ec 100644
--- 
a/tika-core/src/main/java/org/apache/tika/metadata/filter/MetadataFilter.java
+++ 
b/tika-core/src/main/java/org/apache/tika/metadata/filter/MetadataFilter.java
@@ -17,17 +17,42 @@
 
 package org.apache.tika.metadata.filter;
 
+import java.io.IOException;
 import java.io.Serializable;
 
+import org.w3c.dom.Element;
+
+import org.apache.tika.config.ConfigBase;
+import org.apache.tika.exception.TikaConfigException;
 import org.apache.tika.exception.TikaException;
 import org.apache.tika.metadata.Metadata;
 
 /**
- * Filters the metadata in place
+ * Filters the metadata in place after the parse
  *
  * @since Apache Tika 1.25
  */
-public interface MetadataFilter extends Serializable {
+public abstract class MetadataFilter extends ConfigBase implements 
Serializable {
+
+    /**
+     * Loads the metadata filter from the config file if it exists, otherwise 
returns NoOpFilter
+     * @param root
+     * @return
+     * @throws TikaConfigException
+     * @throws IOException
+     */
+    public static MetadataFilter load(Element root, boolean allowMissing) 
throws TikaConfigException,
+            IOException {
+        try {
+            return buildComposite("metadataFilters", 
CompositeMetadataFilter.class,
+                    "metadataFilter", MetadataFilter.class, root);
+        } catch (TikaConfigException e) {
+            if (allowMissing && e.getMessage().contains("could not find 
metadataFilters")) {
+                return new NoOpFilter();
+            }
+            throw e;
+        }
+    }
 
-    void filter(Metadata metadata) throws TikaException;
+    public abstract void filter(Metadata metadata) throws TikaException;
 }
diff --git 
a/tika-core/src/main/java/org/apache/tika/metadata/filter/NoOpFilter.java 
b/tika-core/src/main/java/org/apache/tika/metadata/filter/NoOpFilter.java
index 9cd1ec3..f4e1090 100644
--- a/tika-core/src/main/java/org/apache/tika/metadata/filter/NoOpFilter.java
+++ b/tika-core/src/main/java/org/apache/tika/metadata/filter/NoOpFilter.java
@@ -23,7 +23,7 @@ import org.apache.tika.metadata.Metadata;
  * This filter performs no operations on the metadata
  * and leaves it untouched.
  */
-public class NoOpFilter implements MetadataFilter {
+public class NoOpFilter extends MetadataFilter {
 
     public static NoOpFilter NOOP_FILTER = new NoOpFilter();
 
diff --git 
a/tika-core/src/test/java/org/apache/tika/metadata/filter/MockUpperCaseFilter.java
 
b/tika-core/src/test/java/org/apache/tika/metadata/filter/MockUpperCaseFilter.java
index fe12b82..ac64734 100644
--- 
a/tika-core/src/test/java/org/apache/tika/metadata/filter/MockUpperCaseFilter.java
+++ 
b/tika-core/src/test/java/org/apache/tika/metadata/filter/MockUpperCaseFilter.java
@@ -24,7 +24,7 @@ import org.apache.tika.metadata.Metadata;
 /**
  * Mock Filter for testing uppercasing of all values
  */
-public class MockUpperCaseFilter implements MetadataFilter {
+public class MockUpperCaseFilter extends MetadataFilter {
 
     @Override
     public void filter(Metadata metadata) throws TikaException {
diff --git 
a/tika-core/src/test/java/org/apache/tika/metadata/filter/TestMetadataFilter.java
 
b/tika-core/src/test/java/org/apache/tika/metadata/filter/TestMetadataFilter.java
index d12d7ed..d77e373 100644
--- 
a/tika-core/src/test/java/org/apache/tika/metadata/filter/TestMetadataFilter.java
+++ 
b/tika-core/src/test/java/org/apache/tika/metadata/filter/TestMetadataFilter.java
@@ -30,6 +30,7 @@ import org.junit.Test;
 import org.apache.tika.config.AbstractTikaConfigTest;
 import org.apache.tika.config.TikaConfig;
 import org.apache.tika.metadata.Metadata;
+import org.apache.tika.metadata.TikaCoreProperties;
 import org.apache.tika.mime.MediaType;
 
 public class TestMetadataFilter extends AbstractTikaConfigTest {
@@ -153,7 +154,6 @@ public class TestMetadataFilter extends 
AbstractTikaConfigTest {
 
         MetadataFilter filter = config.getMetadataFilter();
         filter.filter(metadata);
-        debug(metadata);
         assertEquals(0, metadata.size());
 
         metadata.set(Metadata.CONTENT_TYPE, 
MediaType.text("plain").toString());
@@ -161,6 +161,22 @@ public class TestMetadataFilter extends 
AbstractTikaConfigTest {
         filter.filter(metadata);
         assertEquals(2, metadata.size());
         assertEquals("AUTHOR", metadata.get("author"));
+    }
+
+    @Test
+    public void testFieldNameMapping() throws Exception {
+        TikaConfig config = getConfig("TIKA-3137-field-mapping.xml");
 
+        Metadata metadata = new Metadata();
+        metadata.set(TikaCoreProperties.TIKA_CONTENT, "quick brown fox");
+        metadata.set("author", "author");
+        metadata.set("a", "a-value");
+
+        MetadataFilter filter = config.getMetadataFilter();
+        filter.filter(metadata);
+        assertEquals("quick brown fox", metadata.get("content"));
+        assertEquals("a-value", metadata.get("b"));
+        assertNull(metadata.get("author"));
+        assertNull(metadata.get("a"));
     }
 }
diff --git 
a/tika-core/src/test/resources/org/apache/tika/config/TIKA-3137-exclude.xml 
b/tika-core/src/test/resources/org/apache/tika/config/TIKA-3137-exclude.xml
index 96dac44..95ba73b 100644
--- a/tika-core/src/test/resources/org/apache/tika/config/TIKA-3137-exclude.xml
+++ b/tika-core/src/test/resources/org/apache/tika/config/TIKA-3137-exclude.xml
@@ -19,10 +19,10 @@
   <metadataFilters>
     <metadataFilter 
class="org.apache.tika.metadata.filter.ExcludeFieldMetadataFilter">
       <params>
-        <param name="exclude" type="list">
-          <string>title</string>
-          <string>author</string>
-        </param>
+        <exclude>
+          <field>title</field>
+          <field>author</field>
+        </exclude>
       </params>
     </metadataFilter>
   </metadataFilters>
diff --git 
a/tika-core/src/test/resources/org/apache/tika/config/TIKA-3137-exclude.xml 
b/tika-core/src/test/resources/org/apache/tika/config/TIKA-3137-field-mapping.xml
similarity index 69%
copy from 
tika-core/src/test/resources/org/apache/tika/config/TIKA-3137-exclude.xml
copy to 
tika-core/src/test/resources/org/apache/tika/config/TIKA-3137-field-mapping.xml
index 96dac44..e5118b9 100644
--- a/tika-core/src/test/resources/org/apache/tika/config/TIKA-3137-exclude.xml
+++ 
b/tika-core/src/test/resources/org/apache/tika/config/TIKA-3137-field-mapping.xml
@@ -17,12 +17,16 @@
 -->
 <properties>
   <metadataFilters>
-    <metadataFilter 
class="org.apache.tika.metadata.filter.ExcludeFieldMetadataFilter">
+    <metadataFilter 
class="org.apache.tika.metadata.filter.FieldNameMappingFilter">
       <params>
-        <param name="exclude" type="list">
-          <string>title</string>
-          <string>author</string>
-        </param>
+        <excludeUnmapped>true</excludeUnmapped>
+        <mappings>
+          <mapping from="X-TIKA:content" to="content"/>
+          <mapping from="a" to="b"/>
+          <!-- note that the mapping only works once...not recursively -->
+          <mapping from="b" to="c"/>
+          <mapping from="c" to="d"/>
+        </mappings>
       </params>
     </metadataFilter>
   </metadataFilters>
diff --git 
a/tika-core/src/test/resources/org/apache/tika/config/TIKA-3137-include-uc.xml 
b/tika-core/src/test/resources/org/apache/tika/config/TIKA-3137-include-uc.xml
index f960e94..15eb62c 100644
--- 
a/tika-core/src/test/resources/org/apache/tika/config/TIKA-3137-include-uc.xml
+++ 
b/tika-core/src/test/resources/org/apache/tika/config/TIKA-3137-include-uc.xml
@@ -19,10 +19,10 @@
   <metadataFilters>
     <metadataFilter 
class="org.apache.tika.metadata.filter.IncludeFieldMetadataFilter">
       <params>
-        <param name="include" type="list">
-          <string>title</string>
-          <string>author</string>
-        </param>
+        <include>
+          <field>title</field>
+          <field>author</field>
+        </include>
       </params>
     </metadataFilter>
     <metadataFilter 
class="org.apache.tika.metadata.filter.MockUpperCaseFilter"/>
diff --git 
a/tika-core/src/test/resources/org/apache/tika/config/TIKA-3137-include.xml 
b/tika-core/src/test/resources/org/apache/tika/config/TIKA-3137-include.xml
index 8832915..f8ffa90 100644
--- a/tika-core/src/test/resources/org/apache/tika/config/TIKA-3137-include.xml
+++ b/tika-core/src/test/resources/org/apache/tika/config/TIKA-3137-include.xml
@@ -19,10 +19,10 @@
   <metadataFilters>
     <metadataFilter 
class="org.apache.tika.metadata.filter.IncludeFieldMetadataFilter">
       <params>
-        <param name="include" type="list">
-          <string>title</string>
-          <string>author</string>
-        </param>
+        <include>
+          <field>title</field>
+          <field>author</field>
+        </include>
       </params>
     </metadataFilter>
   </metadataFilters>
diff --git 
a/tika-core/src/test/resources/org/apache/tika/config/TIKA-3137-mimes-uc.xml 
b/tika-core/src/test/resources/org/apache/tika/config/TIKA-3137-mimes-uc.xml
index a151665..6278421 100644
--- a/tika-core/src/test/resources/org/apache/tika/config/TIKA-3137-mimes-uc.xml
+++ b/tika-core/src/test/resources/org/apache/tika/config/TIKA-3137-mimes-uc.xml
@@ -19,10 +19,10 @@
   <metadataFilters>
     <metadataFilter 
class="org.apache.tika.metadata.filter.ClearByMimeMetadataFilter">
       <params>
-        <param name="mimes" type="list">
-          <string>image/jpeg</string>
-          <string>application/pdf</string>
-        </param>
+        <mimes>
+          <mime>image/jpeg</mime>
+          <mime>application/pdf</mime>
+        </mimes>
       </params>
     </metadataFilter>
     <metadataFilter 
class="org.apache.tika.metadata.filter.MockUpperCaseFilter"/>
diff --git 
a/tika-eval/tika-eval-core/src/main/java/org/apache/tika/eval/core/metadata/TikaEvalMetadataFilter.java
 
b/tika-eval/tika-eval-core/src/main/java/org/apache/tika/eval/core/metadata/TikaEvalMetadataFilter.java
index bb5a848..0ac65d2 100644
--- 
a/tika-eval/tika-eval-core/src/main/java/org/apache/tika/eval/core/metadata/TikaEvalMetadataFilter.java
+++ 
b/tika-eval/tika-eval-core/src/main/java/org/apache/tika/eval/core/metadata/TikaEvalMetadataFilter.java
@@ -36,7 +36,7 @@ import org.apache.tika.metadata.Property;
 import org.apache.tika.metadata.TikaCoreProperties;
 import org.apache.tika.metadata.filter.MetadataFilter;
 
-public class TikaEvalMetadataFilter implements MetadataFilter {
+public class TikaEvalMetadataFilter extends MetadataFilter {
 
     public static String TIKA_EVAL_NS = "tika-eval" + 
TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER;
 
diff --git 
a/tika-parsers/tika-parsers-classic/tika-parsers-classic-package/src/test/java/org/apache/tika/parser/RecursiveParserWrapperTest.java
 
b/tika-parsers/tika-parsers-classic/tika-parsers-classic-package/src/test/java/org/apache/tika/parser/RecursiveParserWrapperTest.java
index 0222f65..86915c5 100644
--- 
a/tika-parsers/tika-parsers-classic/tika-parsers-classic-package/src/test/java/org/apache/tika/parser/RecursiveParserWrapperTest.java
+++ 
b/tika-parsers/tika-parsers-classic/tika-parsers-classic-package/src/test/java/org/apache/tika/parser/RecursiveParserWrapperTest.java
@@ -318,6 +318,7 @@ public class RecursiveParserWrapperTest extends TikaTest {
         Metadata metadata = new Metadata();
         TikaConfig tikaConfig = null;
         Parser p = null;
+        
System.out.println(getResourceAsFile("TIKA-3137-include.xml").getAbsolutePath());
         try (InputStream is = getResourceAsStream("TIKA-3137-include.xml")) {
             tikaConfig = new TikaConfig(is);
             p = new AutoDetectParser(tikaConfig);
diff --git 
a/tika-parsers/tika-parsers-classic/tika-parsers-classic-package/src/test/resources/org/apache/tika/parser/TIKA-3137-include.xml
 
b/tika-parsers/tika-parsers-classic/tika-parsers-classic-package/src/test/resources/org/apache/tika/parser/TIKA-3137-include.xml
index b99af0b..056e64c 100644
--- 
a/tika-parsers/tika-parsers-classic/tika-parsers-classic-package/src/test/resources/org/apache/tika/parser/TIKA-3137-include.xml
+++ 
b/tika-parsers/tika-parsers-classic/tika-parsers-classic-package/src/test/resources/org/apache/tika/parser/TIKA-3137-include.xml
@@ -19,19 +19,19 @@
   <metadataFilters>
     <metadataFilter 
class="org.apache.tika.metadata.filter.IncludeFieldMetadataFilter">
       <params>
-        <param name="include" type="list">
-          <string>X-TIKA:content</string>
-          <string>extended-properties:Application</string>
-          <string>Content-Type</string>
-        </param>
+        <include>
+          <field>X-TIKA:content</field>
+          <field>extended-properties:Application</field>
+          <field>Content-Type</field>
+        </include>
       </params>
     </metadataFilter>
     <metadataFilter 
class="org.apache.tika.metadata.filter.ClearByMimeMetadataFilter">
       <params>
-        <param name="mimes" type="list">
-          <string>image/emf</string>
-          <string>text/plain</string>
-        </param>
+        <mimes>
+          <mime>image/emf</mime>
+          <mime>text/plain</mime>
+        </mimes>
       </params>
     </metadataFilter>
   </metadataFilters>
diff --git 
a/tika-pipes/tika-emitters/tika-emitter-solr/src/test/java/org/apache/tika/pipes/emitter/solr/TestBasic.java
 
b/tika-pipes/tika-emitters/tika-emitter-solr/src/test/java/org/apache/tika/pipes/emitter/solr/TestBasic.java
index 9afcc51..d88947e 100644
--- 
a/tika-pipes/tika-emitters/tika-emitter-solr/src/test/java/org/apache/tika/pipes/emitter/solr/TestBasic.java
+++ 
b/tika-pipes/tika-emitters/tika-emitter-solr/src/test/java/org/apache/tika/pipes/emitter/solr/TestBasic.java
@@ -34,7 +34,7 @@ import org.apache.tika.pipes.emitter.EmitKey;
 import org.apache.tika.pipes.emitter.Emitter;
 import org.apache.tika.pipes.emitter.EmitterManager;
 
-@Ignore("requires solr to be up and running")
+@Ignore("requires solr to be up and running; please dockerize some tests, 
please, please")
 public class TestBasic {
 
     @Test
diff --git 
a/tika-pipes/tika-emitters/tika-emitter-solr/src/test/resources/tika-config-simple-emitter.xml
 
b/tika-pipes/tika-emitters/tika-emitter-solr/src/test/resources/tika-config-simple-emitter.xml
index ba68b7f..c52da5e 100644
--- 
a/tika-pipes/tika-emitters/tika-emitter-solr/src/test/resources/tika-config-simple-emitter.xml
+++ 
b/tika-pipes/tika-emitters/tika-emitter-solr/src/test/resources/tika-config-simple-emitter.xml
@@ -18,37 +18,35 @@
   under the License.
 -->
 <properties>
-    <metadataFilters>
-        <metadataFilter 
class="org.apache.tika.metadata.filter.FieldNameMappingFilter">
-            <params>
-                <param name="mappings" type="list">
-                    <string>X-TIKA:content->content</string>
-                    
<string>X-TIKA:embedded_resource_path->embedded_path</string>
-                    <string>Content-Length->length</string>
-                    <string>dc:creator->creators</string>
-                    <string>dc:title->title</string>
-                </param>
-            </params>
-        </metadataFilter>
-    </metadataFilters>
-    <emitters>
-        <emitter class="org.apache.tika.pipes.emitter.solr.SolrEmitter">
-            <params>
-                <name>solr1</name>
-                <url>http://localhost:8983/solr/tika-test</url>
-                <attachmentStrategy>concatenate-content</attachmentStrategy>
-                <contentField>content</contentField>
-                <commitWithin>10</commitWithin>
-            </params>
-        </emitter>
-        <emitter class="org.apache.tika.pipes.emitter.solr.SolrEmitter">
-            <params>
-                <name>solr2</name>
-                <url>http://localhost:8983/solr/tika-test</url>
-                <attachmentStrategy>parent-child</attachmentStrategy>
-                <contentField>content</contentField>
-                <commitWithin>10</commitWithin>
-            </params>
-        </emitter>
-    </emitters>
+  <metadataFilters>
+    <metadataFilter 
class="org.apache.tika.metadata.filter.FieldNameMappingFilter">
+      <mappings>
+        <mapping from="X-TIKA:content" to="content"/>
+        <mapping from="X-TIKA:embedded_resource_path" to="embedded_path"/>
+        <mapping from="Content-Length" to="length"/>
+        <mapping from="dc:creator" to="creators"/>
+        <mapping from="dc:title" to="title"/>
+      </mappings>
+    </metadataFilter>
+  </metadataFilters>
+  <emitters>
+    <emitter class="org.apache.tika.pipes.emitter.solr.SolrEmitter">
+      <params>
+        <name>solr1</name>
+        <url>http://localhost:8983/solr/tika-test</url>
+        <attachmentStrategy>concatenate-content</attachmentStrategy>
+        <contentField>content</contentField>
+        <commitWithin>10</commitWithin>
+      </params>
+    </emitter>
+    <emitter class="org.apache.tika.pipes.emitter.solr.SolrEmitter">
+      <params>
+        <name>solr2</name>
+        <url>http://localhost:8983/solr/tika-test</url>
+        <attachmentStrategy>parent-child</attachmentStrategy>
+        <contentField>content</contentField>
+        <commitWithin>10</commitWithin>
+      </params>
+    </emitter>
+  </emitters>
 </properties>
\ No newline at end of file
diff --git 
a/tika-server/tika-server-classic/src/test/resources/config/TIKA-3137-include.xml
 
b/tika-server/tika-server-classic/src/test/resources/config/TIKA-3137-include.xml
index 5e563bf..da1182a 100644
--- 
a/tika-server/tika-server-classic/src/test/resources/config/TIKA-3137-include.xml
+++ 
b/tika-server/tika-server-classic/src/test/resources/config/TIKA-3137-include.xml
@@ -19,19 +19,19 @@
     <metadataFilters>
         <metadataFilter 
class="org.apache.tika.metadata.filter.IncludeFieldMetadataFilter">
             <params>
-                <param name="include" type="list">
-                    <string>X-TIKA:content</string>
-                    <string>extended-properties:Application</string>
-                    <string>Content-Type</string>
-                </param>
+                <include>
+                    <field>X-TIKA:content</field>
+                    <field>extended-properties:Application</field>
+                    <field>Content-Type</field>
+                </include>
             </params>
         </metadataFilter>
         <metadataFilter 
class="org.apache.tika.metadata.filter.ClearByMimeMetadataFilter">
             <params>
-                <param name="mimes" type="list">
-                    <string>image/emf</string>
-                    <string>text/plain</string>
-                </param>
+                <mimes>
+                    <mime>image/emf</mime>
+                    <mime>text/plain</mime>
+                </mimes>
             </params>
         </metadataFilter>
     </metadataFilters>
diff --git 
a/tika-server/tika-server-client/src/test/resources/tika-config-simple-fs-emitter.xml
 
b/tika-server/tika-server-client/src/test/resources/tika-config-simple-fs-emitter.xml
index 0b029d8..62e692f 100644
--- 
a/tika-server/tika-server-client/src/test/resources/tika-config-simple-fs-emitter.xml
+++ 
b/tika-server/tika-server-client/src/test/resources/tika-config-simple-fs-emitter.xml
@@ -18,61 +18,61 @@
   under the License.
 -->
 <properties>
-    <service-loader initializableProblemHandler="throw"/>
-    <pipesIterators>
-        <pipesIterator 
class="org.apache.tika.pipes.pipesiterator.FileSystemPipesIterator">
-            <params>
-                <fetcherName>fs</fetcherName>
-                <basePath>fix</basePath>
-            </params>
-        </pipesIterator>
-    </pipesIterators>
-    <fetchers>
-        <fetcher class="org.apache.tika.pipes.fetcher.FileSystemFetcher">
-            <params>
-                <name>fs</name>
-                <basePath>fix</basePath>
-            </params>
-        </fetcher>
-    </fetchers>
-    <metadataFilters>
-        <metadataFilter 
class="org.apache.tika.metadata.filter.FieldNameMappingFilter">
-            <params>
-                <param name="mappings" type="list">
-                    <string>X-TIKA:content->content</string>
-                    
<string>X-TIKA:embedded_resource_path->embedded_path</string>
-                    <string>Content-Length->length</string>
-                    <string>dc:creator->creators</string>
-                    <string>dc:title->title</string>
-                </param>
-            </params>
-        </metadataFilter>
-    </metadataFilters>
-    <emitters>
-        <emitter class="org.apache.tika.pipes.emitter.fs.FileSystemEmitter">
-            <params>
-                <name>fs</name>
-                <basePath>fix</basePath>
-            </params>
-        </emitter>
-        <!--
-        <emitter class="org.apache.tika.pipes.emitter.solr.SolrEmitter">
-            <params>
-                <param name="name" type="string">solr1</param>
-                <param name="url" 
type="string">http://localhost:8983/solr/tika-test</param>
-                <param name="attachmentStrategy" 
type="string">concatenate-content</param>
-                <param name="contentField" type="string">content</param>
-                <param name="commitWithin" type="int">10</param>
-            </params>
-        </emitter>
-        <emitter class="org.apache.tika.pipes.emitter.solr.SolrEmitter">
-            <params>
-                <param name="name" type="string">solr2</param>
-                <param name="url" 
type="string">http://localhost:8983/solr/tika-test</param>
-                <param name="attachmentStrategy" 
type="string">parent-child</param>
-                <param name="contentField" type="string">content</param>
-                <param name="commitWithin" type="int">10</param>
-            </params>
-        </emitter>-->
-    </emitters>
+  <service-loader initializableProblemHandler="throw"/>
+  <pipesIterators>
+    <pipesIterator 
class="org.apache.tika.pipes.pipesiterator.FileSystemPipesIterator">
+      <params>
+        <fetcherName>fs</fetcherName>
+        <basePath>fix</basePath>
+      </params>
+    </pipesIterator>
+  </pipesIterators>
+  <fetchers>
+    <fetcher class="org.apache.tika.pipes.fetcher.FileSystemFetcher">
+      <params>
+        <name>fs</name>
+        <basePath>fix</basePath>
+      </params>
+    </fetcher>
+  </fetchers>
+  <metadataFilters>
+    <metadataFilter 
class="org.apache.tika.metadata.filter.FieldNameMappingFilter">
+      <params>
+        <mappings>
+          <mapping from="X-TIKA:content" to="content"/>
+          <mapping from="X-TIKA:embedded_resource_path" to="embedded_path"/>
+          <mapping from="Content-Length" to="length"/>
+          <mapping from="dc:creator" to="creators"/>
+          <mapping from="dc:title" to="title"/>
+        </mappings>
+      </params>
+    </metadataFilter>
+  </metadataFilters>
+  <emitters>
+    <emitter class="org.apache.tika.pipes.emitter.fs.FileSystemEmitter">
+      <params>
+        <name>fs</name>
+        <basePath>fix</basePath>
+      </params>
+    </emitter>
+    <!--
+    <emitter class="org.apache.tika.pipes.emitter.solr.SolrEmitter">
+        <params>
+            <param name="name" type="string">solr1</param>
+            <param name="url" 
type="string">http://localhost:8983/solr/tika-test</param>
+            <param name="attachmentStrategy" 
type="string">concatenate-content</param>
+            <param name="contentField" type="string">content</param>
+            <param name="commitWithin" type="int">10</param>
+        </params>
+    </emitter>
+    <emitter class="org.apache.tika.pipes.emitter.solr.SolrEmitter">
+        <params>
+            <param name="name" type="string">solr2</param>
+            <param name="url" 
type="string">http://localhost:8983/solr/tika-test</param>
+            <param name="attachmentStrategy" type="string">parent-child</param>
+            <param name="contentField" type="string">content</param>
+            <param name="commitWithin" type="int">10</param>
+        </params>
+    </emitter>-->
+  </emitters>
 </properties>
\ No newline at end of file
diff --git 
a/tika-server/tika-server-core/src/test/resources/configs/metadata-filter-include.xml
 
b/tika-server/tika-server-core/src/test/resources/configs/metadata-filter-include.xml
index 3a7a7c1..96ce8e5 100644
--- 
a/tika-server/tika-server-core/src/test/resources/configs/metadata-filter-include.xml
+++ 
b/tika-server/tika-server-core/src/test/resources/configs/metadata-filter-include.xml
@@ -19,11 +19,11 @@
     <metadataFilters>
         <metadataFilter 
class="org.apache.tika.metadata.filter.IncludeFieldMetadataFilter">
             <params>
-                <param name="include" type="list">
-                    <string>X-TIKA:content</string>
-                    <string>extended-properties:Application</string>
-                    <string>Content-Type</string>
-                </param>
+                <include>
+                    <field>X-TIKA:content</field>
+                    <field>extended-properties:Application</field>
+                    <field>Content-Type</field>
+                </include>
             </params>
         </metadataFilter>
     </metadataFilters>

Reply via email to