This is an automated email from the ASF dual-hosted git repository.
tallison pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tika.git
The following commit(s) were added to refs/heads/main by this push:
new 7109dcf01e TIKA-4608 -- clean up metadata filter api (#2507)
7109dcf01e is described below
commit 7109dcf01e55c280bfd061ca9f04e5018f2ffe9b
Author: Tim Allison <[email protected]>
AuthorDate: Tue Dec 30 19:23:52 2025 -0500
TIKA-4608 -- clean up metadata filter api (#2507)
---
.../src/main/java/org/apache/tika/cli/TikaCLI.java | 3 +-
.../src/main/java/org/apache/tika/gui/TikaGUI.java | 2 +-
.../metadata/filter/CompositeMetadataFilter.java | 5 ++-
.../tika/metadata/filter/MetadataFilter.java | 13 ++++----
.../tika/metadata/filter/MetadataFilterBase.java | 3 +-
.../apache/tika/metadata/filter/NoOpFilter.java | 4 +--
.../filter/RemoveByMimeMetadataFilter.java | 6 ++--
.../tika/sax/RecursiveParserWrapperHandler.java | 2 +-
.../core/metadata/TikaEvalMetadataFilterTest.java | 6 +++-
.../apache/tika/parser/image/JpegParserTest.java | 2 +-
.../apache/tika/pipes/core/server/EmitHandler.java | 7 ++--
.../core/server/MetadataListAndEmbeddedBytes.java | 2 +-
.../filter/AttachmentCountingListFilter.java | 5 ++-
.../apache/tika/pipes/core/PassbackFilterTest.java | 15 ++++-----
.../pipes/emitter/solr/SolrEmitterDevTest.java | 6 +++-
.../filter/AttachmentCountingListFilter.java | 5 ++-
.../tika/metadata/filter/TestMetadataFilter.java | 38 ++++++++++++++--------
.../CustomClassSerializationTest.java | 3 +-
.../core/resource/RecursiveMetadataResource.java | 10 +++---
.../tika/server/core/resource/TikaResource.java | 22 ++++++-------
20 files changed, 85 insertions(+), 74 deletions(-)
diff --git a/tika-app/src/main/java/org/apache/tika/cli/TikaCLI.java
b/tika-app/src/main/java/org/apache/tika/cli/TikaCLI.java
index a0f0ddb574..cb25539678 100644
--- a/tika-app/src/main/java/org/apache/tika/cli/TikaCLI.java
+++ b/tika-app/src/main/java/org/apache/tika/cli/TikaCLI.java
@@ -560,8 +560,7 @@ public class TikaCLI {
JsonMetadataList.setPrettyPrinting(prettyPrint);
try (Writer writer = getOutputWriter(output, encoding)) {
List<Metadata> metadataList = handler.getMetadataList();
- metadataList = tikaLoader
- .loadMetadataFilters().filter(metadataList);
+ tikaLoader.loadMetadataFilters().filter(metadataList);
JsonMetadataList.toJson(metadataList, writer);
}
}
diff --git a/tika-app/src/main/java/org/apache/tika/gui/TikaGUI.java
b/tika-app/src/main/java/org/apache/tika/gui/TikaGUI.java
index 6075ab82a2..84977bdd90 100644
--- a/tika-app/src/main/java/org/apache/tika/gui/TikaGUI.java
+++ b/tika-app/src/main/java/org/apache/tika/gui/TikaGUI.java
@@ -386,7 +386,7 @@ public class TikaGUI extends JFrame implements
ActionListener, HyperlinkListener
StringWriter jsonBuffer = new StringWriter();
JsonMetadataList.setPrettyPrinting(true);
List<Metadata> metadataList =
recursiveParserWrapperHandler.getMetadataList();
- metadataList =
tikaConfig.loadMetadataFilters().filter(metadataList);
+ tikaConfig.loadMetadataFilters().filter(metadataList);
JsonMetadataList.toJson(metadataList, jsonBuffer);
setText(json, jsonBuffer.toString());
}
diff --git
a/tika-core/src/main/java/org/apache/tika/metadata/filter/CompositeMetadataFilter.java
b/tika-core/src/main/java/org/apache/tika/metadata/filter/CompositeMetadataFilter.java
index 3b306cd580..bd2e52ba48 100644
---
a/tika-core/src/main/java/org/apache/tika/metadata/filter/CompositeMetadataFilter.java
+++
b/tika-core/src/main/java/org/apache/tika/metadata/filter/CompositeMetadataFilter.java
@@ -44,11 +44,10 @@ public class CompositeMetadataFilter extends MetadataFilter
{
}
@Override
- public List<Metadata> filter(List<Metadata> metadataList) throws
TikaException {
+ public void filter(List<Metadata> metadataList) throws TikaException {
for (MetadataFilter filter : filters) {
- metadataList = filter.filter(metadataList);
+ filter.filter(metadataList);
}
- return metadataList;
}
@Override
diff --git
a/tika-core/src/main/java/org/apache/tika/metadata/filter/MetadataFilter.java
b/tika-core/src/main/java/org/apache/tika/metadata/filter/MetadataFilter.java
index 6bd8fae632..b3f81c1919 100644
---
a/tika-core/src/main/java/org/apache/tika/metadata/filter/MetadataFilter.java
+++
b/tika-core/src/main/java/org/apache/tika/metadata/filter/MetadataFilter.java
@@ -25,13 +25,12 @@ import org.apache.tika.metadata.Metadata;
public abstract class MetadataFilter implements Serializable {
/**
- * For efficiency's sake, the original metadata list and data therein may
be modified.
- * Users are responsible for doing a defensive copy before calling filter
if mutability
- * would be problematic.
+ * Filters the metadata list in place. The list and the metadata objects
within it
+ * may be modified. Callers must pass a mutable list and should make a
defensive
+ * copy before calling if the original data must be preserved.
*
- * @param metadataList
- * @return
- * @throws TikaException
+ * @param metadataList the list to filter (must be mutable)
+ * @throws TikaException if filtering fails
*/
- public abstract List<Metadata> filter(List<Metadata> metadataList) throws
TikaException;
+ public abstract void filter(List<Metadata> metadataList) throws
TikaException;
}
diff --git
a/tika-core/src/main/java/org/apache/tika/metadata/filter/MetadataFilterBase.java
b/tika-core/src/main/java/org/apache/tika/metadata/filter/MetadataFilterBase.java
index dc5959ed2b..50df92a451 100644
---
a/tika-core/src/main/java/org/apache/tika/metadata/filter/MetadataFilterBase.java
+++
b/tika-core/src/main/java/org/apache/tika/metadata/filter/MetadataFilterBase.java
@@ -29,11 +29,10 @@ import org.apache.tika.metadata.Metadata;
public abstract class MetadataFilterBase extends MetadataFilter {
@Override
- public List<Metadata> filter(List<Metadata> metadataList) throws
TikaException {
+ public void filter(List<Metadata> metadataList) throws TikaException {
for (Metadata m : metadataList) {
filter(m);
}
- return metadataList;
}
protected abstract void filter(Metadata metadata);
diff --git
a/tika-core/src/main/java/org/apache/tika/metadata/filter/NoOpFilter.java
b/tika-core/src/main/java/org/apache/tika/metadata/filter/NoOpFilter.java
index 8a380edcee..9ccc8f4d64 100644
--- a/tika-core/src/main/java/org/apache/tika/metadata/filter/NoOpFilter.java
+++ b/tika-core/src/main/java/org/apache/tika/metadata/filter/NoOpFilter.java
@@ -33,7 +33,7 @@ public class NoOpFilter extends MetadataFilter {
@Override
- public List<Metadata> filter(List<Metadata> metadataList) throws
TikaException {
- return metadataList;
+ public void filter(List<Metadata> metadataList) throws TikaException {
+ // no-op
}
}
diff --git
a/tika-core/src/main/java/org/apache/tika/metadata/filter/RemoveByMimeMetadataFilter.java
b/tika-core/src/main/java/org/apache/tika/metadata/filter/RemoveByMimeMetadataFilter.java
index a7a4c79bd3..fa774d8cda 100644
---
a/tika-core/src/main/java/org/apache/tika/metadata/filter/RemoveByMimeMetadataFilter.java
+++
b/tika-core/src/main/java/org/apache/tika/metadata/filter/RemoveByMimeMetadataFilter.java
@@ -73,10 +73,8 @@ public class RemoveByMimeMetadataFilter extends
MetadataFilter {
}
@Override
- public List<Metadata> filter(List<Metadata> metadataList) throws
TikaException {
- List<Metadata> result = new ArrayList<>(metadataList);
- result.removeIf(this::shouldRemove);
- return result;
+ public void filter(List<Metadata> metadataList) throws TikaException {
+ metadataList.removeIf(this::shouldRemove);
}
private boolean shouldRemove(Metadata metadata) {
diff --git
a/tika-core/src/main/java/org/apache/tika/sax/RecursiveParserWrapperHandler.java
b/tika-core/src/main/java/org/apache/tika/sax/RecursiveParserWrapperHandler.java
index 154d5733ae..edb40b22f1 100644
---
a/tika-core/src/main/java/org/apache/tika/sax/RecursiveParserWrapperHandler.java
+++
b/tika-core/src/main/java/org/apache/tika/sax/RecursiveParserWrapperHandler.java
@@ -53,7 +53,7 @@ public class RecursiveParserWrapperHandler extends
AbstractRecursiveParserWrappe
* Create a handler with no limit on the number of embedded resources
*/
public RecursiveParserWrapperHandler(ContentHandlerFactory
contentHandlerFactory) {
- super(contentHandlerFactory, -1);
+ this(contentHandlerFactory, -1);
}
/**
diff --git
a/tika-eval/tika-eval-core/src/test/java/org/apache/tika/eval/core/metadata/TikaEvalMetadataFilterTest.java
b/tika-eval/tika-eval-core/src/test/java/org/apache/tika/eval/core/metadata/TikaEvalMetadataFilterTest.java
index 1374ac0eb9..31a842ba76 100644
---
a/tika-eval/tika-eval-core/src/test/java/org/apache/tika/eval/core/metadata/TikaEvalMetadataFilterTest.java
+++
b/tika-eval/tika-eval-core/src/test/java/org/apache/tika/eval/core/metadata/TikaEvalMetadataFilterTest.java
@@ -18,6 +18,7 @@ package org.apache.tika.eval.core.metadata;
import static org.junit.jupiter.api.Assertions.assertEquals;
+import java.util.ArrayList;
import java.util.List;
import org.junit.jupiter.api.Test;
@@ -38,7 +39,10 @@ public class TikaEvalMetadataFilterTest {
String content = "the quick brown fox, Zothro 1234 1235, jumped
over the lazy dog";
metadata.set(TikaCoreProperties.TIKA_CONTENT, content);
- metadata = filter.filter(List.of(metadata)).get(0);
+ List<Metadata> metadataList = new ArrayList<>();
+ metadataList.add(metadata);
+ filter.filter(metadataList);
+ metadata = metadataList.get(0);
assertEquals("eng", metadata.get(TikaEvalMetadataFilter.LANGUAGE));
assertEquals(12, (int)
metadata.getInt(TikaEvalMetadataFilter.NUM_TOKENS));
assertEquals(11, (int)
metadata.getInt(TikaEvalMetadataFilter.NUM_UNIQUE_TOKENS));
diff --git
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/parser/image/JpegParserTest.java
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/parser/image/JpegParserTest.java
index 11af0ccef7..79ea679d1a 100644
---
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/parser/image/JpegParserTest.java
+++
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/parser/image/JpegParserTest.java
@@ -50,7 +50,7 @@ public class JpegParserTest extends TikaTest {
wrapper.parse(TikaInputStream.get(is), handler, new Metadata(),
new ParseContext());
}
List<Metadata> metadataList = handler.getMetadataList();
- metadataList = metadataFilter.filter(metadataList);
+ metadataFilter.filter(metadataList);
Metadata metadata = metadataList.get(0);
// Geo tags should be there with 5dp, and not rounded
assertEquals("51.575762", metadata.get(Metadata.LATITUDE));
diff --git
a/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/server/EmitHandler.java
b/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/server/EmitHandler.java
index 5215b5c59f..32e97ebfa7 100644
---
a/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/server/EmitHandler.java
+++
b/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/server/EmitHandler.java
@@ -124,16 +124,15 @@ class EmitHandler {
}
PassbackFilter passbackFilter = parseContext.get(PassbackFilter.class);
if (passbackFilter != null) {
- List<Metadata> filtered = null;
try {
- filtered = passbackFilter.filter(parseData.metadataList);
+ passbackFilter.filter(parseData.metadataList);
} catch (TikaException e) {
LOG.warn("problem filtering for pass back", e);
}
if (StringUtils.isBlank(parseExceptionStack)) {
- return new
PipesResult(PipesResult.RESULT_STATUS.EMIT_SUCCESS_PASSBACK, new
EmitDataImpl(emitKey.getEmitKey(), filtered));
+ return new
PipesResult(PipesResult.RESULT_STATUS.EMIT_SUCCESS_PASSBACK, new
EmitDataImpl(emitKey.getEmitKey(), parseData.metadataList));
} else {
- return new
PipesResult(PipesResult.RESULT_STATUS.EMIT_SUCCESS_PARSE_EXCEPTION, new
EmitDataImpl(emitKey.getEmitKey(), filtered), parseExceptionStack);
+ return new
PipesResult(PipesResult.RESULT_STATUS.EMIT_SUCCESS_PARSE_EXCEPTION, new
EmitDataImpl(emitKey.getEmitKey(), parseData.metadataList),
parseExceptionStack);
}
}
diff --git
a/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/server/MetadataListAndEmbeddedBytes.java
b/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/server/MetadataListAndEmbeddedBytes.java
index fd2386ff09..1c9ea38ce2 100644
---
a/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/server/MetadataListAndEmbeddedBytes.java
+++
b/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/server/MetadataListAndEmbeddedBytes.java
@@ -40,7 +40,7 @@ class MetadataListAndEmbeddedBytes {
}
public void filter(MetadataFilter filter) throws TikaException {
- metadataList = filter.filter(metadataList);
+ filter.filter(metadataList);
}
public EmbeddedDocumentBytesHandler getEmbeddedDocumentBytesHandler() {
diff --git
a/tika-pipes/tika-pipes-integration-tests/src/test/java/org/apache/tika/metadata/filter/AttachmentCountingListFilter.java
b/tika-pipes/tika-pipes-integration-tests/src/test/java/org/apache/tika/metadata/filter/AttachmentCountingListFilter.java
index fdf4f1c228..730981a6a0 100644
---
a/tika-pipes/tika-pipes-integration-tests/src/test/java/org/apache/tika/metadata/filter/AttachmentCountingListFilter.java
+++
b/tika-pipes/tika-pipes-integration-tests/src/test/java/org/apache/tika/metadata/filter/AttachmentCountingListFilter.java
@@ -28,13 +28,12 @@ public class AttachmentCountingListFilter extends
MetadataFilter {
private Integer count = 0;
@Override
- public List<Metadata> filter(List<Metadata> metadataList) throws
TikaException {
+ public void filter(List<Metadata> metadataList) throws TikaException {
if (metadataList == null || metadataList.isEmpty()) {
- return metadataList;
+ return;
}
metadataList.get(0).set("X-TIKA:attachment_count",
Integer.toString(metadataList.size() - 1));
count += metadataList.size();
- return metadataList;
}
public Integer getCount() {
diff --git
a/tika-pipes/tika-pipes-integration-tests/src/test/java/org/apache/tika/pipes/core/PassbackFilterTest.java
b/tika-pipes/tika-pipes-integration-tests/src/test/java/org/apache/tika/pipes/core/PassbackFilterTest.java
index c00989a1a5..a3a55bb372 100644
---
a/tika-pipes/tika-pipes-integration-tests/src/test/java/org/apache/tika/pipes/core/PassbackFilterTest.java
+++
b/tika-pipes/tika-pipes-integration-tests/src/test/java/org/apache/tika/pipes/core/PassbackFilterTest.java
@@ -22,7 +22,6 @@ import static org.junit.jupiter.api.Assertions.assertNull;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Path;
-import java.util.ArrayList;
import java.util.List;
import java.util.Locale;
@@ -100,17 +99,17 @@ public class PassbackFilterTest {
private static class MyPassbackFilter extends PassbackFilter {
@Override
- public List<Metadata> filter(List<Metadata> metadataList) throws
TikaException {
- List<Metadata> ret = new ArrayList<>();
+ public void filter(List<Metadata> metadataList) throws TikaException {
+ // Remove items without RESOURCE_NAME_KEY and transform remaining
ones
+ metadataList.removeIf(m ->
StringUtils.isBlank(m.get(TikaCoreProperties.RESOURCE_NAME_KEY)));
for (Metadata m : metadataList) {
String val = m.get(TikaCoreProperties.RESOURCE_NAME_KEY);
- if (!StringUtils.isBlank(val)) {
- Metadata retM = new Metadata();
- retM.add(TikaCoreProperties.RESOURCE_NAME_KEY,
val.toUpperCase(Locale.ROOT));
- ret.add(retM);
+ // Clear all fields and only keep RESOURCE_NAME_KEY
(uppercased)
+ for (String name : m.names()) {
+ m.remove(name);
}
+ m.set(TikaCoreProperties.RESOURCE_NAME_KEY,
val.toUpperCase(Locale.ROOT));
}
- return ret;
}
}
diff --git
a/tika-pipes/tika-pipes-plugins/tika-pipes-solr/src/test/java/org/apache/tika/pipes/emitter/solr/SolrEmitterDevTest.java
b/tika-pipes/tika-pipes-plugins/tika-pipes-solr/src/test/java/org/apache/tika/pipes/emitter/solr/SolrEmitterDevTest.java
index d3090a36fb..04c3106386 100644
---
a/tika-pipes/tika-pipes-plugins/tika-pipes-solr/src/test/java/org/apache/tika/pipes/emitter/solr/SolrEmitterDevTest.java
+++
b/tika-pipes/tika-pipes-plugins/tika-pipes-solr/src/test/java/org/apache/tika/pipes/emitter/solr/SolrEmitterDevTest.java
@@ -16,6 +16,7 @@
*/
package org.apache.tika.pipes.emitter.solr;
+import java.util.ArrayList;
import java.util.Collections;
import java.util.Date;
import java.util.HashMap;
@@ -67,7 +68,10 @@ public class SolrEmitterDevTest {
mappings.put(TikaCoreProperties.CREATED.getName(), "created");
mappings.put(TikaCoreProperties.TIKA_CONTENT.getName(), "content");
filter.setMappings(mappings);
- metadata = filter.filter(List.of(metadata)).get(0);
+ List<Metadata> metadataList = new ArrayList<>();
+ metadataList.add(metadata);
+ filter.filter(metadataList);
+ metadata = metadataList.get(0);
solrEmitter.emit(emitKey, Collections.singletonList(metadata), new
ParseContext());
}
diff --git
a/tika-serialization/src/test/java/org/apache/tika/metadata/filter/AttachmentCountingListFilter.java
b/tika-serialization/src/test/java/org/apache/tika/metadata/filter/AttachmentCountingListFilter.java
index e33390a7c5..798356c186 100644
---
a/tika-serialization/src/test/java/org/apache/tika/metadata/filter/AttachmentCountingListFilter.java
+++
b/tika-serialization/src/test/java/org/apache/tika/metadata/filter/AttachmentCountingListFilter.java
@@ -27,13 +27,12 @@ public class AttachmentCountingListFilter extends
MetadataFilter {
private Integer count = 0;
@Override
- public List<Metadata> filter(List<Metadata> metadataList) throws
TikaException {
+ public void filter(List<Metadata> metadataList) throws TikaException {
if (metadataList == null || metadataList.isEmpty()) {
- return metadataList;
+ return;
}
metadataList.get(0).set("X-TIKA:attachment_count",
Integer.toString(metadataList.size() - 1));
count += metadataList.size();
- return metadataList;
}
public Integer getCount() {
diff --git
a/tika-serialization/src/test/java/org/apache/tika/metadata/filter/TestMetadataFilter.java
b/tika-serialization/src/test/java/org/apache/tika/metadata/filter/TestMetadataFilter.java
index ecbdef42c6..197a116702 100644
---
a/tika-serialization/src/test/java/org/apache/tika/metadata/filter/TestMetadataFilter.java
+++
b/tika-serialization/src/test/java/org/apache/tika/metadata/filter/TestMetadataFilter.java
@@ -20,6 +20,7 @@ import static
org.junit.jupiter.api.Assertions.assertArrayEquals;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertNull;
+import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashSet;
import java.util.List;
@@ -139,14 +140,18 @@ public class TestMetadataFilter extends TikaTest {
MetadataFilter filter = new
RemoveByMimeMetadataFilter(set("image/jpeg", "application/pdf"));
// jpeg should be removed
- List<Metadata> result = filter.filter(List.of(jpegMetadata));
- assertEquals(0, result.size());
+ List<Metadata> jpegList = new ArrayList<>();
+ jpegList.add(jpegMetadata);
+ filter.filter(jpegList);
+ assertEquals(0, jpegList.size());
// text/plain should be kept
- result = filter.filter(List.of(plainMetadata));
- assertEquals(1, result.size());
- assertEquals(2, result.get(0).size());
- assertEquals("author", result.get(0).get("author"));
+ List<Metadata> plainList = new ArrayList<>();
+ plainList.add(plainMetadata);
+ filter.filter(plainList);
+ assertEquals(1, plainList.size());
+ assertEquals(2, plainList.get(0).size());
+ assertEquals("author", plainList.get(0).get("author"));
}
@Test
@@ -164,14 +169,18 @@ public class TestMetadataFilter extends TikaTest {
MetadataFilter filter = loader.get(MetadataFilter.class);
// jpeg should be removed
- List<Metadata> result = filter.filter(List.of(jpegMetadata));
- assertEquals(0, result.size());
+ List<Metadata> jpegList = new ArrayList<>();
+ jpegList.add(jpegMetadata);
+ filter.filter(jpegList);
+ assertEquals(0, jpegList.size());
// text/plain should be kept and upper-cased by mock-upper-case-filter
- result = filter.filter(List.of(plainMetadata));
- assertEquals(1, result.size());
- assertEquals(2, result.get(0).size());
- assertEquals("AUTHOR", result.get(0).get("author"));
+ List<Metadata> plainList = new ArrayList<>();
+ plainList.add(plainMetadata);
+ filter.filter(plainList);
+ assertEquals(1, plainList.size());
+ assertEquals(2, plainList.get(0).size());
+ assertEquals("AUTHOR", plainList.get(0).get("author"));
}
@Test
@@ -275,6 +284,9 @@ public class TestMetadataFilter extends TikaTest {
}
private static Metadata filterOne(MetadataFilter filter, Metadata
singleMetadata) throws TikaException {
- return filter.filter(List.of(singleMetadata)).get(0);
+ List<Metadata> list = new ArrayList<>();
+ list.add(singleMetadata);
+ filter.filter(list);
+ return list.get(0);
}
}
diff --git
a/tika-serialization/src/test/java/org/apache/tika/serialization/CustomClassSerializationTest.java
b/tika-serialization/src/test/java/org/apache/tika/serialization/CustomClassSerializationTest.java
index ad70cb521b..dabb6671ee 100644
---
a/tika-serialization/src/test/java/org/apache/tika/serialization/CustomClassSerializationTest.java
+++
b/tika-serialization/src/test/java/org/apache/tika/serialization/CustomClassSerializationTest.java
@@ -64,7 +64,7 @@ public class CustomClassSerializationTest {
}
@Override
- public java.util.List<Metadata> filter(java.util.List<Metadata>
metadataList) {
+ public void filter(java.util.List<Metadata> metadataList) {
for (Metadata metadata : metadataList) {
for (String name : metadata.names()) {
String[] values = metadata.getValues(name);
@@ -74,7 +74,6 @@ public class CustomClassSerializationTest {
}
}
}
- return metadataList;
}
@Override
diff --git
a/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/resource/RecursiveMetadataResource.java
b/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/resource/RecursiveMetadataResource.java
index 61d6f14403..d215552db1 100644
---
a/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/resource/RecursiveMetadataResource.java
+++
b/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/resource/RecursiveMetadataResource.java
@@ -85,9 +85,9 @@ public class RecursiveMetadataResource {
LOG.error("something went seriously wrong", e);
}
MetadataFilter metadataFilter = context.get(MetadataFilter.class,
getTikaLoader().loadMetadataFilters());
- //note that the filter may modify the contents of handler's metadata
list.
- //do a deep copy if that's problematic.
- return metadataFilter.filter(handler.getMetadataList());
+ List<Metadata> metadataList = handler.getMetadataList();
+ metadataFilter.filter(metadataList);
+ return metadataList;
}
static HandlerConfig buildHandlerConfig(MultivaluedMap<String, String>
httpHeaders, String handlerTypeName, HandlerConfig.PARSE_MODE parseMode) {
@@ -188,7 +188,9 @@ public class RecursiveMetadataResource {
LOG.error("something went seriously wrong", e);
}
MetadataFilter metadataFilter = context.get(MetadataFilter.class,
getTikaLoader().loadMetadataFilters());
- return new
MetadataList(metadataFilter.filter(handler.getMetadataList()));
+ List<Metadata> metadataList = handler.getMetadataList();
+ metadataFilter.filter(metadataList);
+ return new MetadataList(metadataList);
}
/**
diff --git
a/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/resource/TikaResource.java
b/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/resource/TikaResource.java
index 4729e66a47..9b312606b2 100644
---
a/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/resource/TikaResource.java
+++
b/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/resource/TikaResource.java
@@ -27,6 +27,7 @@ import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.io.Writer;
import java.nio.charset.StandardCharsets;
+import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.function.Function;
@@ -501,14 +502,13 @@ public class TikaResource {
throws IOException, TikaException {
Metadata metadata = new Metadata();
parseToMetadata(getInputStream(att.getObject(InputStream.class),
metadata, httpHeaders, info), metadata, preparePostHeaderMap(att, httpHeaders),
info, handlerTypeName);
- List<Metadata> ret = TikaResource
- .getTikaLoader()
- .loadMetadataFilters()
- .filter(List.of(metadata));
- if (ret == null || ret.isEmpty()) {
+ List<Metadata> metadataList = new ArrayList<>();
+ metadataList.add(metadata);
+
TikaResource.getTikaLoader().loadMetadataFilters().filter(metadataList);
+ if (metadataList.isEmpty()) {
return new Metadata();
}
- return ret.get(0);
+ return metadataList.get(0);
}
@PUT
@@ -519,13 +519,13 @@ public class TikaResource {
throws IOException, TikaException {
Metadata metadata = new Metadata();
parseToMetadata(getInputStream(is, metadata, httpHeaders, info),
metadata, httpHeaders.getRequestHeaders(), info, handlerTypeName);
- List<Metadata> ret = TikaResource
- .getTikaLoader().loadMetadataFilters()
- .filter(List.of(metadata));
- if (ret == null || ret.isEmpty()) {
+ List<Metadata> metadataList = new ArrayList<>();
+ metadataList.add(metadata);
+
TikaResource.getTikaLoader().loadMetadataFilters().filter(metadataList);
+ if (metadataList.isEmpty()) {
return new Metadata();
}
- return metadata;
+ return metadataList.get(0);
}
private void parseToMetadata(TikaInputStream tis, Metadata metadata,
MultivaluedMap<String, String> httpHeaders, UriInfo info, String
handlerTypeName)