This is an automated email from the ASF dual-hosted git repository.
tallison pushed a commit to branch branch_3x
in repository https://gitbox.apache.org/repos/asf/tika.git
The following commit(s) were added to refs/heads/branch_3x by this push:
new d1c2cd8ab TIKA-4433 -- improve handling of null values (#2242)
d1c2cd8ab is described below
commit d1c2cd8ab00c577bb82c2dc26127cf5aaf8638fb
Author: Tim Allison <[email protected]>
AuthorDate: Thu Jun 5 10:42:12 2025 -0400
TIKA-4433 -- improve handling of null values (#2242)
* TIKA-4433 -- improve handling of null values
(cherry picked from commit 4703898144477d57ab37c2fad49cd95163551b86)
---
.../metadata/writefilter/StandardWriteFilter.java | 11 ++++
.../writefilter/StandardWriteFilterTest.java | 74 ++++++++++++++++++++++
2 files changed, 85 insertions(+)
diff --git
a/tika-core/src/main/java/org/apache/tika/metadata/writefilter/StandardWriteFilter.java
b/tika-core/src/main/java/org/apache/tika/metadata/writefilter/StandardWriteFilter.java
index a245e8d2c..d5a3e8315 100644
---
a/tika-core/src/main/java/org/apache/tika/metadata/writefilter/StandardWriteFilter.java
+++
b/tika-core/src/main/java/org/apache/tika/metadata/writefilter/StandardWriteFilter.java
@@ -173,6 +173,11 @@ public class StandardWriteFilter implements
MetadataWriteFilter, Serializable {
@Override
public void set(String field, String value, Map<String, String[]> data) {
+ //legacy behavior is that setting(null) removes the key
+ if (value == null) {
+ data.remove(field);
+ return;
+ }
if (! include(field, value)) {
return;
}
@@ -435,6 +440,9 @@ public class StandardWriteFilter implements
MetadataWriteFilter, Serializable {
}
private boolean includeField(String name) {
+ if (name == null) {
+ throw new NullPointerException("property name must not be null");
+ }
if (ALWAYS_SET_FIELDS.contains(name)) {
return true;
}
@@ -445,6 +453,9 @@ public class StandardWriteFilter implements
MetadataWriteFilter, Serializable {
}
private static int estimateSize(String s) {
+ if (s == null) {
+ return 0;
+ }
return 2 * s.length();
}
diff --git
a/tika-core/src/test/java/org/apache/tika/metadata/writefilter/StandardWriteFilterTest.java
b/tika-core/src/test/java/org/apache/tika/metadata/writefilter/StandardWriteFilterTest.java
index 7c3369bfd..4fb98cffa 100644
---
a/tika-core/src/test/java/org/apache/tika/metadata/writefilter/StandardWriteFilterTest.java
+++
b/tika-core/src/test/java/org/apache/tika/metadata/writefilter/StandardWriteFilterTest.java
@@ -16,8 +16,10 @@
*/
package org.apache.tika.metadata.writefilter;
+import static org.junit.jupiter.api.Assertions.assertArrayEquals;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertNull;
+import static org.junit.jupiter.api.Assertions.assertThrows;
import java.io.ByteArrayInputStream;
import java.nio.charset.StandardCharsets;
@@ -32,6 +34,7 @@ import org.apache.tika.config.TikaConfig;
import org.apache.tika.config.TikaConfigTest;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.metadata.OfficeOpenXMLExtended;
+import org.apache.tika.metadata.Property;
import org.apache.tika.metadata.TikaCoreProperties;
import org.apache.tika.mime.MediaType;
import org.apache.tika.mime.MediaTypeRegistry;
@@ -202,6 +205,77 @@ public class StandardWriteFilterTest extends TikaTest {
assertEquals(3, metadata.getValues(TikaCoreProperties.SUBJECT).length);
}
+ @Test
+ public void testAddOrder() throws Exception {
+ StandardWriteFilter standardWriteFilter = new StandardWriteFilter(100,
1000, 100000, 10, Set.of(), Set.of(), true);
+ Metadata m = new Metadata();
+ m.setMetadataWriteFilter(standardWriteFilter);
+ m.add("test", "foo");
+ m.add("test", "bar");
+ m.add("test", "baz");
+
+ assertArrayEquals(new String[]{"foo", "bar", "baz"},
m.getValues("test"));
+ }
+
+ @Test
+ public void testNullValues() throws Exception {
+ StandardWriteFilter standardWriteFilter = new StandardWriteFilter(100,
1000, 100000, 10, Set.of(), Set.of(), true);
+ Metadata m = new Metadata();
+ m.set("test", "foo");
+ m.setMetadataWriteFilter(standardWriteFilter);
+ m.set("test", null);
+
+ assertEquals(0, m.names().length);
+ assertNull(m.get("test"));
+
+ //now test adding
+ m = new Metadata();
+ m.add("test", "foo");
+ m.add("test", null);
+ //Not sure this is the behavior we want, but it is what we're
currently doing.
+ assertArrayEquals(new String[]{"foo"}, m.getValues("test"));
+
+ //now check when empty not allowed
+ standardWriteFilter = new StandardWriteFilter(100, 1000, 100000, 10,
Set.of(), Set.of(), false);
+ m = new Metadata();
+ m.set("test", "foo");
+ m.setMetadataWriteFilter(standardWriteFilter);
+ assertEquals(1, m.names().length);
+ assertEquals("foo", m.get("test"));
+
+ m.set("test", null);
+ assertEquals(0, m.names().length);
+ assertNull(m.get("test"));
+
+ m.add("test", "foo");
+ m.add("test", null);
+
+ assertEquals(1, m.names().length);
+ assertEquals(1, m.getValues("test").length);
+ }
+
+ @Test
+ public void testNullKeys() {
+ StandardWriteFilter standardWriteFilter = new StandardWriteFilter(100,
1000, 100000, 10, Set.of(), Set.of(), true);
+ Metadata m = new Metadata();
+ m.setMetadataWriteFilter(standardWriteFilter);
+ Exception ex = assertThrows(NullPointerException.class, () -> {
+ m.set((String) null, "foo");
+ });
+ ex = assertThrows(NullPointerException.class, () -> {
+ m.set((Property) null, "foo");
+ });
+
+ ex = assertThrows(NullPointerException.class, () -> {
+ m.add((Property) null, "foo");
+ });
+
+ ex = assertThrows(NullPointerException.class, () -> {
+ m.add((Property) null, "foo");
+ });
+
+ }
+
@Test
public void testExclude() throws Exception {
TikaConfig tikaConfig =