This is an automated email from the ASF dual-hosted git repository.

tallison pushed a commit to branch TIKA-4433
in repository https://gitbox.apache.org/repos/asf/tika.git

commit ff889c894e7846af58a17f64d72c453776b43271
Author: tallison <[email protected]>
AuthorDate: Thu Jun 5 10:24:02 2025 -0400

    TIKA-4433 -- improve handling of null values
---
 .../metadata/writefilter/StandardWriteFilter.java  | 11 ++++
 .../writefilter/StandardWriteFilterTest.java       | 74 ++++++++++++++++++++++
 2 files changed, 85 insertions(+)

diff --git 
a/tika-core/src/main/java/org/apache/tika/metadata/writefilter/StandardWriteFilter.java
 
b/tika-core/src/main/java/org/apache/tika/metadata/writefilter/StandardWriteFilter.java
index 38763d079..8e11b9805 100644
--- 
a/tika-core/src/main/java/org/apache/tika/metadata/writefilter/StandardWriteFilter.java
+++ 
b/tika-core/src/main/java/org/apache/tika/metadata/writefilter/StandardWriteFilter.java
@@ -173,6 +173,11 @@ public class StandardWriteFilter implements 
MetadataWriteFilter, Serializable {
 
     @Override
     public void set(String field, String value, Map<String, String[]> data) {
+        //legacy behavior is that setting(null) removes the key
+        if (value == null) {
+            data.remove(field);
+            return;
+        }
         if (! include(field, value)) {
             return;
         }
@@ -435,6 +440,9 @@ public class StandardWriteFilter implements 
MetadataWriteFilter, Serializable {
     }
 
     private boolean includeField(String name) {
+        if (name == null) {
+            throw new NullPointerException("property name must not be null");
+        }
         if (ALWAYS_SET_FIELDS.contains(name)) {
             return true;
         }
@@ -445,6 +453,9 @@ public class StandardWriteFilter implements 
MetadataWriteFilter, Serializable {
     }
 
     private static int estimateSize(String s) {
+        if (s == null) {
+            return 0;
+        }
         return 2 * s.length();
     }
 
diff --git 
a/tika-core/src/test/java/org/apache/tika/metadata/writefilter/StandardWriteFilterTest.java
 
b/tika-core/src/test/java/org/apache/tika/metadata/writefilter/StandardWriteFilterTest.java
index 7c3369bfd..933d5e6e1 100644
--- 
a/tika-core/src/test/java/org/apache/tika/metadata/writefilter/StandardWriteFilterTest.java
+++ 
b/tika-core/src/test/java/org/apache/tika/metadata/writefilter/StandardWriteFilterTest.java
@@ -16,8 +16,10 @@
  */
 package org.apache.tika.metadata.writefilter;
 
+import static org.junit.jupiter.api.Assertions.assertArrayEquals;
 import static org.junit.jupiter.api.Assertions.assertEquals;
 import static org.junit.jupiter.api.Assertions.assertNull;
+import static org.junit.jupiter.api.Assertions.assertThrows;
 
 import java.io.ByteArrayInputStream;
 import java.nio.charset.StandardCharsets;
@@ -32,6 +34,7 @@ import org.apache.tika.config.TikaConfig;
 import org.apache.tika.config.TikaConfigTest;
 import org.apache.tika.metadata.Metadata;
 import org.apache.tika.metadata.OfficeOpenXMLExtended;
+import org.apache.tika.metadata.Property;
 import org.apache.tika.metadata.TikaCoreProperties;
 import org.apache.tika.mime.MediaType;
 import org.apache.tika.mime.MediaTypeRegistry;
@@ -203,6 +206,77 @@ public class StandardWriteFilterTest extends TikaTest {
     }
 
     @Test
+    public void testAddOrder() throws Exception {
+        StandardWriteFilter standardWriteFilter = new StandardWriteFilter(100, 
1000, 100000, 10, Set.of(), Set.of(), true);
+        Metadata m = new Metadata();
+        m.setMetadataWriteFilter(standardWriteFilter);
+        m.add("test", "foo");
+        m.add("test", "bar");
+        m.add("test", "baz");
+
+        assertArrayEquals(new String[]{"foo", "bar", "baz"}, 
m.getValues("test"));
+    }
+
+    @Test
+    public void testNullValues() throws Exception {
+        StandardWriteFilter standardWriteFilter = new StandardWriteFilter(100, 
1000, 100000, 10, Set.of(), Set.of(), true);
+        Metadata m = new Metadata();
+        m.set("test", "foo");
+        m.setMetadataWriteFilter(standardWriteFilter);
+        m.set("test", null);
+
+        assertEquals(0, m.names().length);
+        assertNull(m.get("test"));
+
+        //now test adding
+        m = new Metadata();
+        m.add("test", "foo");
+        m.add("test", null);
+        //Not sure this is the behavior we want, but it is what we're 
currently doing.
+        assertArrayEquals(new String[]{"foo"}, m.getValues("test"));
+
+        //now check when empty not allowed
+        standardWriteFilter = new StandardWriteFilter(100, 1000, 100000, 10, 
Set.of(), Set.of(), false);
+        m = new Metadata();
+        m.set("test", "foo");
+        m.setMetadataWriteFilter(standardWriteFilter);
+        assertEquals(1, m.names().length);
+        assertEquals("foo", m.get("test"));
+
+        m.set("test", null);
+        assertEquals(0, m.names().length);
+        assertNull(m.get("test"));
+
+        m.add("test", "foo");
+        m.add("test", null);
+
+        assertEquals(1, m.names().length);
+        assertEquals(1, m.getValues("test").length);
+    }
+
+    @Test
+    public void testNullKeys() {
+        StandardWriteFilter standardWriteFilter = new StandardWriteFilter(100, 
1000, 100000, 10, Set.of(), Set.of(), true);
+        Metadata m = new Metadata();
+        m.setMetadataWriteFilter(standardWriteFilter);
+        Exception ex = assertThrows(NullPointerException.class, () -> {
+            m.set((String) null, "foo");
+        });
+        ex = assertThrows(NullPointerException.class, () -> {
+            m.set((Property) null, "foo");
+        });
+
+        ex = assertThrows(NullPointerException.class, () -> {
+            m.add((Property) null, "foo");
+        });
+
+        ex = assertThrows(NullPointerException.class, () -> {
+            m.add((Property) null, "foo");
+        });
+
+    }
+
+        @Test
     public void testExclude() throws Exception {
         TikaConfig tikaConfig =
                 new 
TikaConfig(TikaConfigTest.class.getResourceAsStream("TIKA-3695-exclude.xml"));

Reply via email to