This is an automated email from the ASF dual-hosted git repository. tallison pushed a commit to branch TIKA-4433 in repository https://gitbox.apache.org/repos/asf/tika.git
commit ff889c894e7846af58a17f64d72c453776b43271 Author: tallison <[email protected]> AuthorDate: Thu Jun 5 10:24:02 2025 -0400 TIKA-4433 -- improve handling of null values --- .../metadata/writefilter/StandardWriteFilter.java | 11 ++++ .../writefilter/StandardWriteFilterTest.java | 74 ++++++++++++++++++++++ 2 files changed, 85 insertions(+) diff --git a/tika-core/src/main/java/org/apache/tika/metadata/writefilter/StandardWriteFilter.java b/tika-core/src/main/java/org/apache/tika/metadata/writefilter/StandardWriteFilter.java index 38763d079..8e11b9805 100644 --- a/tika-core/src/main/java/org/apache/tika/metadata/writefilter/StandardWriteFilter.java +++ b/tika-core/src/main/java/org/apache/tika/metadata/writefilter/StandardWriteFilter.java @@ -173,6 +173,11 @@ public class StandardWriteFilter implements MetadataWriteFilter, Serializable { @Override public void set(String field, String value, Map<String, String[]> data) { + //legacy behavior is that setting(null) removes the key + if (value == null) { + data.remove(field); + return; + } if (! include(field, value)) { return; } @@ -435,6 +440,9 @@ public class StandardWriteFilter implements MetadataWriteFilter, Serializable { } private boolean includeField(String name) { + if (name == null) { + throw new NullPointerException("property name must not be null"); + } if (ALWAYS_SET_FIELDS.contains(name)) { return true; } @@ -445,6 +453,9 @@ public class StandardWriteFilter implements MetadataWriteFilter, Serializable { } private static int estimateSize(String s) { + if (s == null) { + return 0; + } return 2 * s.length(); } diff --git a/tika-core/src/test/java/org/apache/tika/metadata/writefilter/StandardWriteFilterTest.java b/tika-core/src/test/java/org/apache/tika/metadata/writefilter/StandardWriteFilterTest.java index 7c3369bfd..933d5e6e1 100644 --- a/tika-core/src/test/java/org/apache/tika/metadata/writefilter/StandardWriteFilterTest.java +++ b/tika-core/src/test/java/org/apache/tika/metadata/writefilter/StandardWriteFilterTest.java @@ -16,8 +16,10 @@ */ package org.apache.tika.metadata.writefilter; +import static org.junit.jupiter.api.Assertions.assertArrayEquals; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertNull; +import static org.junit.jupiter.api.Assertions.assertThrows; import java.io.ByteArrayInputStream; import java.nio.charset.StandardCharsets; @@ -32,6 +34,7 @@ import org.apache.tika.config.TikaConfig; import org.apache.tika.config.TikaConfigTest; import org.apache.tika.metadata.Metadata; import org.apache.tika.metadata.OfficeOpenXMLExtended; +import org.apache.tika.metadata.Property; import org.apache.tika.metadata.TikaCoreProperties; import org.apache.tika.mime.MediaType; import org.apache.tika.mime.MediaTypeRegistry; @@ -203,6 +206,77 @@ public class StandardWriteFilterTest extends TikaTest { } @Test + public void testAddOrder() throws Exception { + StandardWriteFilter standardWriteFilter = new StandardWriteFilter(100, 1000, 100000, 10, Set.of(), Set.of(), true); + Metadata m = new Metadata(); + m.setMetadataWriteFilter(standardWriteFilter); + m.add("test", "foo"); + m.add("test", "bar"); + m.add("test", "baz"); + + assertArrayEquals(new String[]{"foo", "bar", "baz"}, m.getValues("test")); + } + + @Test + public void testNullValues() throws Exception { + StandardWriteFilter standardWriteFilter = new StandardWriteFilter(100, 1000, 100000, 10, Set.of(), Set.of(), true); + Metadata m = new Metadata(); + m.set("test", "foo"); + m.setMetadataWriteFilter(standardWriteFilter); + m.set("test", null); + + assertEquals(0, m.names().length); + assertNull(m.get("test")); + + //now test adding + m = new Metadata(); + m.add("test", "foo"); + m.add("test", null); + //Not sure this is the behavior we want, but it is what we're currently doing. + assertArrayEquals(new String[]{"foo"}, m.getValues("test")); + + //now check when empty not allowed + standardWriteFilter = new StandardWriteFilter(100, 1000, 100000, 10, Set.of(), Set.of(), false); + m = new Metadata(); + m.set("test", "foo"); + m.setMetadataWriteFilter(standardWriteFilter); + assertEquals(1, m.names().length); + assertEquals("foo", m.get("test")); + + m.set("test", null); + assertEquals(0, m.names().length); + assertNull(m.get("test")); + + m.add("test", "foo"); + m.add("test", null); + + assertEquals(1, m.names().length); + assertEquals(1, m.getValues("test").length); + } + + @Test + public void testNullKeys() { + StandardWriteFilter standardWriteFilter = new StandardWriteFilter(100, 1000, 100000, 10, Set.of(), Set.of(), true); + Metadata m = new Metadata(); + m.setMetadataWriteFilter(standardWriteFilter); + Exception ex = assertThrows(NullPointerException.class, () -> { + m.set((String) null, "foo"); + }); + ex = assertThrows(NullPointerException.class, () -> { + m.set((Property) null, "foo"); + }); + + ex = assertThrows(NullPointerException.class, () -> { + m.add((Property) null, "foo"); + }); + + ex = assertThrows(NullPointerException.class, () -> { + m.add((Property) null, "foo"); + }); + + } + + @Test public void testExclude() throws Exception { TikaConfig tikaConfig = new TikaConfig(TikaConfigTest.class.getResourceAsStream("TIKA-3695-exclude.xml"));
