This is an automated email from the ASF dual-hosted git repository. nick pushed a commit to branch multiple-parsers in repository https://gitbox.apache.org/repos/asf/tika.git
commit 12a98b63babc8515177d6f0e3df17ae8912142ee Author: Nick Burch <n...@gagravarr.org> AuthorDate: Wed Mar 14 17:35:01 2018 +0000 Keep all implemented and unit test --- .../parser/multiple/AbstractMultipleParser.java | 16 ++++++++++++++-- .../tika/parser/multiple/MultipleParserTest.java | 22 +++++++++++++++++++++- 2 files changed, 35 insertions(+), 3 deletions(-) diff --git a/tika-core/src/main/java/org/apache/tika/parser/multiple/AbstractMultipleParser.java b/tika-core/src/main/java/org/apache/tika/parser/multiple/AbstractMultipleParser.java index ece2b8d..8f896b2 100644 --- a/tika-core/src/main/java/org/apache/tika/parser/multiple/AbstractMultipleParser.java +++ b/tika-core/src/main/java/org/apache/tika/parser/multiple/AbstractMultipleParser.java @@ -22,6 +22,7 @@ import static org.apache.tika.utils.ParserUtils.recordParserFailure; import java.io.IOException; import java.io.InputStream; +import java.util.ArrayList; import java.util.Arrays; import java.util.HashSet; import java.util.List; @@ -326,8 +327,19 @@ public abstract class AbstractMultipleParser extends AbstractParser { // Most recent (last) parser has already won continue; case KEEP_ALL: - // TODO Find unique values to add - // TODO Implement + // Start with old list, then add any new unique values + List<String> vals = new ArrayList<>(Arrays.asList(oldVals)); + newMetadata.remove(n); + for (String oldVal : oldVals) { + newMetadata.add(n, oldVal); + } + for (String newVal : newVals) { + if (! vals.contains(newVal)) { + newMetadata.add(n, newVal); + vals.add(newVal); + } + } + continue; } } diff --git a/tika-core/src/test/java/org/apache/tika/parser/multiple/MultipleParserTest.java b/tika-core/src/test/java/org/apache/tika/parser/multiple/MultipleParserTest.java index 3d77e9d..590c95d 100644 --- a/tika-core/src/test/java/org/apache/tika/parser/multiple/MultipleParserTest.java +++ b/tika-core/src/test/java/org/apache/tika/parser/multiple/MultipleParserTest.java @@ -220,7 +220,27 @@ public class MultipleParserTest { assertEquals(EmptyParser.class.getName(), usedParsers[3]); - // TODO Implement then check the Merge policies + // Merge + p = new SupplementingParser(null, MetadataPolicy.KEEP_ALL, pFail, + pContent1, pContent2, pNothing); + + metadata = new Metadata(); + handler = new BodyContentHandler(); + p.parse(new ByteArrayInputStream(new byte[] {0,1,2,3,4}), handler, metadata, context); + assertEquals("Fell back 1!Fell back 2!", handler.toString()); + + assertEquals("Test1", metadata.get("T1")); + assertEquals("Test2", metadata.get("T2")); + assertEquals(2, metadata.getValues("TBoth").length); + assertEquals("Test1", metadata.getValues("TBoth")[0]); + assertEquals("Test2", metadata.getValues("TBoth")[1]); + + usedParsers = metadata.getValues("X-Parsed-By"); + assertEquals(4, usedParsers.length); + assertEquals(ErrorParser.class.getName(), usedParsers[0]); + assertEquals(DummyParser.class.getName(), usedParsers[1]); + assertEquals(DummyParser.class.getName(), usedParsers[2]); + assertEquals(EmptyParser.class.getName(), usedParsers[3]); // Check the error details always come through, no matter the policy -- To stop receiving notification emails like this one, please contact n...@apache.org.