This is an automated email from the ASF dual-hosted git repository. tallison pushed a commit to branch TIKA-4545-jsonify-all-the-things in repository https://gitbox.apache.org/repos/asf/tika.git
commit d7f1b6d5bf0c5f1c2447bbcd7423b0bcb086e93d Author: tallison <[email protected]> AuthorDate: Fri Nov 28 14:44:33 2025 -0500 TIKA-4545 - simplify parsecontext serialization --- .../org/apache/tika/config/ParseContextConfig.java | 161 ++++++++ .../java/org/apache/tika/parser/ParseContext.java | 20 +- .../apache/tika/config/ParseContextConfigTest.java | 127 +++++++ .../tika/serialization/ConfigDeserializer.java | 131 +++++++ .../serialization/ParseContextDeserializer.java | 76 ++-- .../tika/serialization/ParseContextSerializer.java | 87 ++++- .../tika/serialization/TikaJsonDeserializer.java | 412 --------------------- .../tika/serialization/TikaJsonSerializer.java | 271 -------------- .../tika/serialization/ConfigDeserializerTest.java | 305 +++++++++++++++ .../TestParseContextSerialization.java | 229 +++++++++++- .../serialization/TikaJsonSerializationTest.java | 49 --- .../apache/tika/serialization/mocks/ClassA.java | 156 -------- .../apache/tika/serialization/mocks/ClassB.java | 96 ----- .../apache/tika/serialization/mocks/ClassC.java | 50 --- 14 files changed, 1075 insertions(+), 1095 deletions(-) diff --git a/tika-core/src/main/java/org/apache/tika/config/ParseContextConfig.java b/tika-core/src/main/java/org/apache/tika/config/ParseContextConfig.java new file mode 100644 index 000000000..31459cb9f --- /dev/null +++ b/tika-core/src/main/java/org/apache/tika/config/ParseContextConfig.java @@ -0,0 +1,161 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.tika.config; + +import java.io.IOException; +import java.lang.reflect.Method; +import java.util.Locale; + +import org.apache.tika.exception.TikaConfigException; +import org.apache.tika.parser.ParseContext; + +/** + * Facade for accessing runtime configuration from ParseContext's ConfigContainer. + * <p> + * This wrapper provides a safe way for parsers to access runtime configuration + * without directly depending on tika-serialization. It performs these critical checks: + * <ul> + * <li>If ConfigContainer has config for the requested key but ConfigDeserializer + * is not on the classpath, throws IllegalStateException with a clear error message</li> + * <li>If ConfigDeserializer is available, delegates to it for deserialization</li> + * <li>If no config is present, returns the default config</li> + * </ul> + * <p> + * Usage in parsers: + * <pre> + * PDFParserConfig localConfig = ParseContextConfig.getConfig( + * context, "pdf-parser", PDFParserConfig.class, defaultConfig); + * </pre> + * + * @since Apache Tika 4.0 + */ +public class ParseContextConfig { + + private static final Class<?> CONFIG_DESERIALIZER_CLASS; + private static final Method GET_CONFIG_METHOD; + private static final Method HAS_CONFIG_METHOD; + + static { + Class<?> clazz = null; + Method getMethod = null; + Method hasMethod = null; + try { + clazz = Class.forName("org.apache.tika.serialization.ConfigDeserializer"); + getMethod = clazz.getMethod("getConfig", + ParseContext.class, String.class, Class.class, Object.class); + hasMethod = clazz.getMethod("hasConfig", ParseContext.class, String.class); + } catch (ClassNotFoundException | NoSuchMethodException e) { + // ConfigDeserializer not on classpath - will check at runtime if needed + } + CONFIG_DESERIALIZER_CLASS = clazz; + GET_CONFIG_METHOD = getMethod; + HAS_CONFIG_METHOD = hasMethod; + } + + /** + * Retrieves runtime configuration from ParseContext's ConfigContainer. + * <p> + * This method performs defensive checking: if the ConfigContainer has configuration + * for the requested key but the ConfigDeserializer is not available on the classpath, + * it throws IllegalStateException. This prevents silent failures where users expect + * their runtime config to be used but it's silently ignored. + * + * @param context the parse context (may be null) + * @param configKey the configuration key (e.g., "pdf-parser", "html-parser") + * @param configClass the configuration class + * @param defaultConfig the default configuration to use if no runtime config exists + * @param <T> the configuration type + * @return the runtime config merged with defaults, or the default config if no runtime config + * @throws TikaConfigException if ConfigContainer has config but ConfigDeserializer is not on classpath + * @throws IOException if deserialization fails + */ + public static <T> T getConfig(ParseContext context, String configKey, + Class<T> configClass, T defaultConfig) + throws TikaConfigException, IOException { + if (context == null) { + return defaultConfig; + } + + ConfigContainer configContainer = context.get(ConfigContainer.class); + if (configContainer == null) { + return defaultConfig; + } + + // Check if there's config for this specific key + boolean hasConfigForKey = configContainer.get(configKey).isPresent(); + if (!hasConfigForKey) { + return defaultConfig; + } + + // Config exists for this key - ConfigDeserializer MUST be available + if (CONFIG_DESERIALIZER_CLASS == null) { + throw new TikaConfigException(String.format(Locale.ROOT, + "ParseContext contains ConfigContainer with configuration for '%s' " + + "but org.apache.tika.serialization.ConfigDeserializer is not on the classpath. " + + "This means your runtime configuration will be ignored. " + + "To fix: add tika-serialization as a dependency, or remove the ConfigContainer " + + "from ParseContext if runtime configuration via ConfigContainer is not needed.", + configKey)); + } + + // ConfigDeserializer is available - delegate to it + try { + @SuppressWarnings("unchecked") + T result = (T) GET_CONFIG_METHOD.invoke(null, context, configKey, configClass, defaultConfig); + return result; + } catch (Exception e) { + Throwable cause = e.getCause() != null ? e.getCause() : e; + if (cause instanceof IOException) { + throw (IOException) cause; + } + throw new IOException("Failed to deserialize config for '" + configKey + "': " + + cause.getMessage(), cause); + } + } + + /** + * Checks if runtime configuration exists for the given key. + * <p> + * Unlike {@link #getConfig}, this method does NOT throw if ConfigDeserializer + * is missing - it only checks for the presence of config. + * + * @param context the parse context + * @param configKey the configuration key + * @return true if config exists for this key + */ + public static boolean hasConfig(ParseContext context, String configKey) { + if (context == null) { + return false; + } + + ConfigContainer configContainer = context.get(ConfigContainer.class); + if (configContainer == null) { + return false; + } + + return configContainer.get(configKey).isPresent(); + } + + /** + * Checks if ConfigDeserializer is available on the classpath. + * + * @return true if tika-serialization is available + */ + public static boolean isConfigDeserializerAvailable() { + return CONFIG_DESERIALIZER_CLASS != null; + } +} diff --git a/tika-core/src/main/java/org/apache/tika/parser/ParseContext.java b/tika-core/src/main/java/org/apache/tika/parser/ParseContext.java index dd925aa81..0393bec52 100644 --- a/tika-core/src/main/java/org/apache/tika/parser/ParseContext.java +++ b/tika-core/src/main/java/org/apache/tika/parser/ParseContext.java @@ -20,7 +20,6 @@ import java.io.Serializable; import java.util.Collections; import java.util.HashMap; import java.util.Map; -import java.util.Set; /** * Parse context. Used to pass context information to Tika parsers. @@ -86,13 +85,22 @@ public class ParseContext implements Serializable { } public boolean isEmpty() { - return context.size() == 0; + return context.isEmpty(); } - //this should really only be used for serialization - public Set<String> keySet() { - return Collections - .unmodifiableSet(context.keySet()); + + /** + * Returns the internal context map for serialization purposes. + * The returned map is unmodifiable. + * <p> + * This method is intended for use by serialization frameworks only. + * Keys are fully-qualified class names, values are the objects stored in the context. + * + * @return an unmodifiable view of the context map + * @since Apache Tika 4.0 + */ + public Map<String, Object> getContextMap() { + return Collections.unmodifiableMap(context); } @Override diff --git a/tika-core/src/test/java/org/apache/tika/config/ParseContextConfigTest.java b/tika-core/src/test/java/org/apache/tika/config/ParseContextConfigTest.java new file mode 100644 index 000000000..2ee968765 --- /dev/null +++ b/tika-core/src/test/java/org/apache/tika/config/ParseContextConfigTest.java @@ -0,0 +1,127 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.tika.config; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertTrue; + +import org.junit.jupiter.api.Test; + +import org.apache.tika.parser.ParseContext; + +/** + * Tests for ParseContextConfig wrapper. + * <p> + * Note: These tests assume tika-serialization is NOT on the classpath (typical for tika-core tests). + * Additional integration tests in tika-serialization verify behavior when ConfigDeserializer IS available. + */ +public class ParseContextConfigTest { + + public static class TestConfig { + private String name = "default"; + + public String getName() { + return name; + } + + public void setName(String name) { + this.name = name; + } + } + + @Test + public void testNoConfigContainer() throws Exception { + ParseContext context = new ParseContext(); + TestConfig defaultConfig = new TestConfig(); + defaultConfig.setName("my-default"); + + TestConfig result = ParseContextConfig.getConfig(context, "test-parser", TestConfig.class, defaultConfig); + + assertEquals(defaultConfig, result); + assertEquals("my-default", result.getName()); + } + + @Test + public void testNullContext() throws Exception { + TestConfig defaultConfig = new TestConfig(); + defaultConfig.setName("my-default"); + + TestConfig result = ParseContextConfig.getConfig(null, "test-parser", TestConfig.class, defaultConfig); + + assertEquals(defaultConfig, result); + } + + @Test + public void testConfigContainerWithoutMatchingKey() throws Exception { + ParseContext context = new ParseContext(); + ConfigContainer configContainer = new ConfigContainer(); + configContainer.set("other-parser", "{\"name\":\"other\"}"); + context.set(ConfigContainer.class, configContainer); + + TestConfig defaultConfig = new TestConfig(); + defaultConfig.setName("my-default"); + + // No config for "test-parser", should return default + TestConfig result = ParseContextConfig.getConfig(context, "test-parser", TestConfig.class, defaultConfig); + + assertEquals(defaultConfig, result); + assertEquals("my-default", result.getName()); + } + + @Test + public void testHasConfigTrue() { + ParseContext context = new ParseContext(); + ConfigContainer configContainer = new ConfigContainer(); + configContainer.set("test-parser", "{\"name\":\"test\"}"); + context.set(ConfigContainer.class, configContainer); + + assertTrue(ParseContextConfig.hasConfig(context, "test-parser")); + } + + @Test + public void testHasConfigFalse() { + ParseContext context = new ParseContext(); + ConfigContainer configContainer = new ConfigContainer(); + configContainer.set("other-parser", "{\"name\":\"test\"}"); + context.set(ConfigContainer.class, configContainer); + + assertFalse(ParseContextConfig.hasConfig(context, "test-parser")); + } + + @Test + public void testHasConfigNoContainer() { + ParseContext context = new ParseContext(); + + assertFalse(ParseContextConfig.hasConfig(context, "test-parser")); + } + + @Test + public void testHasConfigNullContext() { + assertFalse(ParseContextConfig.hasConfig(null, "test-parser")); + } + + @Test + public void testIsConfigDeserializerAvailable() { + // This will be false in tika-core tests, true in tika-serialization tests + // Just verify the method works + boolean available = ParseContextConfig.isConfigDeserializerAvailable(); + assertNotNull(available); // Just checking it doesn't throw + } + +} diff --git a/tika-serialization/src/main/java/org/apache/tika/serialization/ConfigDeserializer.java b/tika-serialization/src/main/java/org/apache/tika/serialization/ConfigDeserializer.java new file mode 100644 index 000000000..ba5199b97 --- /dev/null +++ b/tika-serialization/src/main/java/org/apache/tika/serialization/ConfigDeserializer.java @@ -0,0 +1,131 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.tika.serialization; + +import java.io.IOException; + +import com.fasterxml.jackson.databind.ObjectMapper; + +import org.apache.tika.config.ConfigContainer; +import org.apache.tika.parser.ParseContext; + +/** + * Helper utility for parsers to deserialize their configuration from ConfigContainer. + * <p> + * <strong>Note for Parser Developers:</strong> Instead of calling this class directly, + * use {@link org.apache.tika.config.ParseContextConfig} which provides the same functionality + * but with better error handling. ParseContextConfig will throw a clear exception if + * ConfigContainer has config but tika-serialization is not on the classpath. + * <p> + * This allows parsers to retrieve their configuration using the same friendly names + * as in tika-config.json (e.g., "pdf-parser", "html-parser") from per-request + * configurations sent via FetchEmitTuple or other serialization mechanisms. + * <p> + * The helper automatically merges user configuration with parser defaults, eliminating + * the need for config-specific cloneAndUpdate methods. + * <p> + * Example usage in a parser: + * <pre> + * // Recommended: Use ParseContextConfig wrapper (in tika-core) + * PDFParserConfig localConfig = ParseContextConfig.getConfig( + * context, "pdf-parser", PDFParserConfig.class, defaultConfig); + * </pre> + * + * @see org.apache.tika.config.ParseContextConfig + */ +public class ConfigDeserializer { + + private static final ObjectMapper MAPPER = new ObjectMapper(); + + /** + * Retrieves and deserializes a parser configuration from the ConfigContainer in ParseContext. + * If a default config is provided, the user config will be merged on top of it. + * + * @param context the parse context containing the ConfigContainer + * @param configKey the configuration key (e.g., "pdf-parser", "html-parser") + * @param configClass the configuration class to deserialize into + * @param defaultConfig optional default config to merge with user config (can be null) + * @param <T> the configuration type + * @return the merged configuration, the default config if no user config found, or null if neither exists + * @throws IOException if deserialization fails + */ + public static <T> T getConfig(ParseContext context, String configKey, Class<T> configClass, T defaultConfig) + throws IOException { + if (context == null) { + return defaultConfig; + } + + ConfigContainer configContainer = context.get(ConfigContainer.class); + if (configContainer == null) { + return defaultConfig; + } + + String configJson = configContainer.get(configKey).orElse(null); + if (configJson == null) { + return defaultConfig; + } + + // If there's a default config, merge the user config on top of it + if (defaultConfig != null) { + // IMPORTANT: Clone the default config first to preserve immutability + // Never modify the original defaultConfig as it may be reused across requests + T configCopy = MAPPER.convertValue(defaultConfig, configClass); + + // Now update the copy with user config + return MAPPER.readerForUpdating(configCopy).readValue(configJson); + } else { + // No default config, just deserialize the user config + return MAPPER.readValue(configJson, configClass); + } + } + + /** + * Retrieves and deserializes a parser configuration from the ConfigContainer in ParseContext. + * This version does not merge with any default config. + * + * @param context the parse context containing the ConfigContainer + * @param configKey the configuration key (e.g., "pdf-parser", "html-parser") + * @param configClass the configuration class to deserialize into + * @param <T> the configuration type + * @return the deserialized configuration, or null if not found + * @throws IOException if deserialization fails + */ + public static <T> T getConfig(ParseContext context, String configKey, Class<T> configClass) + throws IOException { + return getConfig(context, configKey, configClass, null); + } + + /** + * Checks if a configuration exists in the ConfigContainer. + * + * @param context the parse context + * @param configKey the configuration key to check + * @return true if the configuration exists + */ + public static boolean hasConfig(ParseContext context, String configKey) { + if (context == null) { + return false; + } + + ConfigContainer configContainer = context.get(ConfigContainer.class); + if (configContainer == null) { + return false; + } + + return configContainer.get(configKey).isPresent(); + } +} diff --git a/tika-serialization/src/main/java/org/apache/tika/serialization/ParseContextDeserializer.java b/tika-serialization/src/main/java/org/apache/tika/serialization/ParseContextDeserializer.java index ff43fc1a2..ff1742ec3 100644 --- a/tika-serialization/src/main/java/org/apache/tika/serialization/ParseContextDeserializer.java +++ b/tika-serialization/src/main/java/org/apache/tika/serialization/ParseContextDeserializer.java @@ -27,6 +27,7 @@ import com.fasterxml.jackson.core.JsonParser; import com.fasterxml.jackson.databind.DeserializationContext; import com.fasterxml.jackson.databind.JsonDeserializer; import com.fasterxml.jackson.databind.JsonNode; +import com.fasterxml.jackson.databind.ObjectMapper; import org.apache.tika.config.ConfigContainer; import org.apache.tika.parser.ParseContext; @@ -34,61 +35,74 @@ import org.apache.tika.parser.ParseContext; public class ParseContextDeserializer extends JsonDeserializer<ParseContext> { @Override - public ParseContext deserialize(JsonParser jsonParser, DeserializationContext deserializationContext) throws IOException, JacksonException { - JsonNode root = jsonParser.getCodec().readTree(jsonParser); - return readParseContext(root); + public ParseContext deserialize(JsonParser jsonParser, + DeserializationContext deserializationContext) + throws IOException, JacksonException { + ObjectMapper mapper = (ObjectMapper) jsonParser.getCodec(); + JsonNode root = mapper.readTree(jsonParser); + return readParseContext(root, mapper); } + /** + * Backwards-compatible version that creates its own ObjectMapper. + * Prefer {@link #readParseContext(JsonNode, ObjectMapper)} when possible. + */ public static ParseContext readParseContext(JsonNode jsonNode) throws IOException { - //some use cases include the wrapper node, e.g. { "parseContext": {}} - //some include the contents only. - //Try to find "parseContext" to start. If that doesn't exist, assume the jsonNode is the contents. - JsonNode contextNode = jsonNode.get(PARSE_CONTEXT); + return readParseContext(jsonNode, ParseContextSerializer.createMapper()); + } + public static ParseContext readParseContext(JsonNode jsonNode, ObjectMapper mapper) + throws IOException { + // Some use cases include the wrapper node, e.g. { "parseContext": {}} + // Some include the contents only. + // Try to find "parseContext" to start. If that doesn't exist, assume jsonNode is the contents. + JsonNode contextNode = jsonNode.get(PARSE_CONTEXT); if (contextNode == null) { contextNode = jsonNode; } + ParseContext parseContext = new ParseContext(); + + // Handle legacy "objects" field - deserialize directly into ParseContext if (contextNode.has("objects")) { - for (Map.Entry<String, JsonNode> e : contextNode - .get("objects") - .properties()) { - String superClassName = e.getKey(); - JsonNode obj = e.getValue(); - String className = readVal(TikaJsonSerializer.INSTANTIATED_CLASS_KEY, obj, null, true); + JsonNode objectsNode = contextNode.get("objects"); + for (Map.Entry<String, JsonNode> entry : objectsNode.properties()) { + String superClassName = entry.getKey(); + JsonNode objectNode = entry.getValue(); + try { - Class clazz = Class.forName(className); - Class superClazz = className.equals(superClassName) ? clazz : Class.forName(superClassName); - parseContext.set(superClazz, TikaJsonDeserializer.deserialize(clazz, obj)); - } catch (ReflectiveOperationException ex) { - throw new IOException(ex); + Class<?> superClass = Class.forName(superClassName); + + // Let Jackson handle polymorphic deserialization with type info + // Security is enforced by the PolymorphicTypeValidator in the mapper + Object deserializedObject = mapper.treeToValue(objectNode, Object.class); + + parseContext.set((Class) superClass, deserializedObject); + } catch (ClassNotFoundException ex) { + throw new IOException("Class not found: " + superClassName, ex); } } } + + // Store all non-"objects" fields as named configurations in ConfigContainer + // This allows parsers to look up their config by friendly name (e.g., "pdf-parser") + // matching the same format used in tika-config.json ConfigContainer configContainer = null; for (Iterator<String> it = contextNode.fieldNames(); it.hasNext(); ) { - String nodeName = it.next(); - if (! "objects".equals(nodeName)) { + String fieldName = it.next(); + if (!"objects".equals(fieldName)) { if (configContainer == null) { configContainer = new ConfigContainer(); } - configContainer.set(nodeName, contextNode.get(nodeName).toString()); + configContainer.set(fieldName, contextNode.get(fieldName).toString()); } } + if (configContainer != null) { parseContext.set(ConfigContainer.class, configContainer); } + return parseContext; } - private static String readVal(String key, JsonNode jsonObj, String defaultRet, boolean isRequired) throws IOException { - JsonNode valNode = jsonObj.get(key); - if (valNode == null) { - if (isRequired) { - throw new IOException("Sorry, no value for key=" + key); - } - return defaultRet; - } - return valNode.asText(); - } } diff --git a/tika-serialization/src/main/java/org/apache/tika/serialization/ParseContextSerializer.java b/tika-serialization/src/main/java/org/apache/tika/serialization/ParseContextSerializer.java index 64bb5c8cd..096710a45 100644 --- a/tika-serialization/src/main/java/org/apache/tika/serialization/ParseContextSerializer.java +++ b/tika-serialization/src/main/java/org/apache/tika/serialization/ParseContextSerializer.java @@ -17,11 +17,14 @@ package org.apache.tika.serialization; import java.io.IOException; -import java.util.Set; +import java.util.Map; import com.fasterxml.jackson.core.JsonGenerator; import com.fasterxml.jackson.databind.JsonSerializer; +import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.databind.SerializerProvider; +import com.fasterxml.jackson.databind.jsontype.BasicPolymorphicTypeValidator; +import com.fasterxml.jackson.databind.jsontype.PolymorphicTypeValidator; import org.apache.tika.config.ConfigContainer; import org.apache.tika.parser.ParseContext; @@ -29,34 +32,84 @@ import org.apache.tika.parser.ParseContext; public class ParseContextSerializer extends JsonSerializer<ParseContext> { public static final String PARSE_CONTEXT = "parseContext"; + /** + * Creates an ObjectMapper for serialization with polymorphic type handling. + * Configures security validation to allow only Tika classes. + * Uses OBJECT_AND_NON_CONCRETE typing to add type info for Object and abstract types, + * which avoids interfering with custom serializers for concrete types like ParseContext. + */ + static ObjectMapper createMapper() { + ObjectMapper mapper = new ObjectMapper(); + + // Configure polymorphic type validator for security + // Use allowIfSubType to allow: + // - org.apache.tika.* classes (all Tika types) + // - java.util.* classes (collections, dates, etc.) + // This is needed because we deserialize with base type Object.class + PolymorphicTypeValidator typeValidator = BasicPolymorphicTypeValidator.builder() + .allowIfSubType("org.apache.tika.") + .allowIfSubType("java.util.") + .build(); + + // Use OBJECT_AND_NON_CONCRETE to add type info when static type is: + // - Object.class (for objects in the "objects" map) + // - Abstract classes or interfaces (for nested polymorphic objects like List<MetadataFilter>) + // This avoids adding type info to concrete types like ParseContext (which has a custom serializer) + mapper.activateDefaultTyping(typeValidator, ObjectMapper.DefaultTyping.OBJECT_AND_NON_CONCRETE); + + return mapper; + } + @Override - public void serialize(ParseContext parseContext, JsonGenerator jsonGenerator, SerializerProvider serializerProvider) throws IOException { + public void serialize(ParseContext parseContext, JsonGenerator jsonGenerator, + SerializerProvider serializerProvider) throws IOException { jsonGenerator.writeStartObject(); - Set<String> objectKeySet = parseContext.keySet(); - ConfigContainer p = parseContext.get(ConfigContainer.class); - if ((p != null && objectKeySet.size() > 1) || (p == null && ! objectKeySet.isEmpty())) { + + Map<String, Object> contextMap = parseContext.getContextMap(); + ConfigContainer configContainer = parseContext.get(ConfigContainer.class); + + // Serialize objects stored directly in ParseContext (legacy format) + // These are objects set via context.set(SomeClass.class, someObject) + boolean hasNonConfigObjects = contextMap.size() > (configContainer != null ? 1 : 0); + if (hasNonConfigObjects) { jsonGenerator.writeFieldName("objects"); jsonGenerator.writeStartObject(); - for (String className : parseContext.keySet()) { + + ObjectMapper mapper = (ObjectMapper) jsonGenerator.getCodec(); + if (mapper == null) { + mapper = createMapper(); + } + + for (Map.Entry<String, Object> entry : contextMap.entrySet()) { + String className = entry.getKey(); if (className.equals(ConfigContainer.class.getName())) { continue; } - try { - Class clazz = Class.forName(className); - TikaJsonSerializer.serialize(className, parseContext.get(clazz), jsonGenerator); - } catch (TikaSerializationException e) { - throw new IOException(e); - } catch (ClassNotFoundException e) { - throw new IllegalArgumentException(e); - } + + Object value = entry.getValue(); + + // Write the field name (superclass/interface name from key) + jsonGenerator.writeFieldName(className); + + // Let Jackson handle type information and serialization + // Use writerFor(Object.class) to ensure polymorphic type info is added + mapper.writerFor(Object.class).writeValue(jsonGenerator, value); } + jsonGenerator.writeEndObject(); } - if (p != null) { - for (String k : p.getKeys()) { - jsonGenerator.writeStringField(k, p.get(k).get()); + + // Write ConfigContainer fields as top-level properties (new friendly-name format) + // Each field contains a JSON string representing a parser/component configuration + // using the same friendly names as tika-config.json (e.g., "pdf-parser", "html-parser") + if (configContainer != null) { + for (String key : configContainer.getKeys()) { + jsonGenerator.writeFieldName(key); + // Write the JSON string as raw JSON (not as a quoted string) + jsonGenerator.writeRawValue(configContainer.get(key).get()); } } + jsonGenerator.writeEndObject(); } } diff --git a/tika-serialization/src/main/java/org/apache/tika/serialization/TikaJsonDeserializer.java b/tika-serialization/src/main/java/org/apache/tika/serialization/TikaJsonDeserializer.java deleted file mode 100644 index ac0cd5e42..000000000 --- a/tika-serialization/src/main/java/org/apache/tika/serialization/TikaJsonDeserializer.java +++ /dev/null @@ -1,412 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.tika.serialization; - -import java.lang.reflect.Array; -import java.lang.reflect.InvocationTargetException; -import java.lang.reflect.Method; -import java.lang.reflect.ParameterizedType; -import java.lang.reflect.Type; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.Optional; - -import com.fasterxml.jackson.databind.JsonNode; - -/** - * See the notes @link{TikaJsonSerializer}. - * <p> - * This currently requires a setString() option on objects that have enum parameters. - */ -public class TikaJsonDeserializer { - - public static Optional deserializeObject(JsonNode root) { - if (!root.isObject()) { - throw new IllegalArgumentException("root needs to be an object"); - } - if (!root.has(TikaJsonSerializer.INSTANTIATED_CLASS_KEY)) { - throw new IllegalArgumentException("need to specify: " + TikaJsonSerializer.INSTANTIATED_CLASS_KEY); - } - String className = root - .get(TikaJsonSerializer.INSTANTIATED_CLASS_KEY) - .asText(); - - try { - return Optional.of(deserialize(Class.forName(className), root)); - } catch (Exception e) { - throw new IllegalArgumentException(e); - } - } - - public static <T> T deserialize(Class<? extends T> clazz, JsonNode root) throws ReflectiveOperationException { - T obj = clazz - .getDeclaredConstructor() - .newInstance(); - Map<String, List<Method>> setters = getSetters(obj); - if (!root.isObject()) { - throw new IllegalArgumentException("must be object"); - } - for (Map.Entry<String, JsonNode> e : root.properties()) { - String name = e.getKey(); - JsonNode child = e.getValue(); - if (TikaJsonSerializer.INSTANTIATED_CLASS_KEY.equals(name)) { - continue; - } - setValue(name, child, obj, setters); - } - return obj; - } - - private static Map<String, List<Method>> getSetters(Object obj) { - Map<String, List<Method>> setters = new HashMap<>(); - for (Method m : obj - .getClass() - .getMethods()) { - String n = m.getName(); - if (n.startsWith(TikaJsonSerializer.SET) && n.length() > 3 && Character.isUpperCase(n.charAt(3))) { - if (m.getParameters().length == 1) { - String paramName = TikaJsonSerializer.getParam(TikaJsonSerializer.SET, n); - List<Method> methods = setters.get(paramName); - if (methods == null) { - methods = new ArrayList<>(); - setters.put(paramName, methods); - } - methods.add(m); - } - } - } - return setters; - } - - private static void setValue(String name, JsonNode node, Object obj, Map<String, List<Method>> setters) throws ReflectiveOperationException { - List<Method> mySetters = setters.get(name); - if (mySetters == null || mySetters.isEmpty()) { - throw new IllegalArgumentException("can't find any setter for " + name); - } - if (node.isNull()) { - setNull(name, node, obj, mySetters); - } else if (node.isNumber()) { - setNumericValue(name, node, obj, mySetters); - } else if (node.isTextual()) { - setStringValue(name, node.asText(), obj, mySetters); - } else if (node.isArray()) { - setArray(name, node, obj, mySetters); - } else if (node.isObject()) { - setObject(name, node, obj, mySetters); - } else if (node.isBoolean()) { - setBoolean(name, node, obj, mySetters); - } - } - - private static void setArray(String name, JsonNode node, Object obj, List<Method> mySetters) { - //there's much more to be done here. :( - for (Method setter : mySetters) { - try { - tryArray(name, node, obj, setter); - } catch (InvocationTargetException | IllegalAccessException e) { - throw new IllegalArgumentException("couldn't create array for " + name); - } - } - } - - private static void tryArray(String name, JsonNode node, Object obj, Method setter) throws InvocationTargetException, IllegalAccessException { - Class argClass = setter.getParameterTypes()[0]; - Class componentType = argClass.getComponentType(); - if (argClass.isArray()) { - int len = node.size(); - Object arrayObject = Array.newInstance(componentType, len); - for (int i = 0; i < len; i++) { - Array.set(arrayObject, i, getVal(componentType, node.get(i))); - } - setter.invoke(obj, arrayObject); - - } else if (List.class.isAssignableFrom(argClass)) { - Type listType = setter.getGenericParameterTypes()[0]; - Type elementType = null; - if (listType instanceof ParameterizedType) { - elementType = ((ParameterizedType) listType).getActualTypeArguments()[0]; - } - if (elementType == null) { - throw new IllegalArgumentException("Can't infer parameterized type for list in: " + node); - } - int len = node.size(); - List<Object> list = new ArrayList<>(); - for (int i = 0; i < len; i++) { - list.add(getVal(elementType, node.get(i))); - } - setter.invoke(obj, list); - } - } - - private static <T> T getVal(T clazz, JsonNode node) { - if (clazz.equals(String.class)) { - return (T) node.asText(); - } else if (clazz.equals(Integer.class) || clazz.equals(int.class)) { - return (T) Integer.valueOf(node.intValue()); - } else if (clazz.equals(Long.class) || clazz.equals(long.class)) { - return (T) Long.valueOf(node.longValue()); - } else if (clazz.equals(Float.class) || clazz.equals(float.class)) { - return (T) Float.valueOf(node.floatValue()); - } else if (clazz.equals(Double.class) || clazz.equals(double.class)) { - return (T) Double.valueOf(node.doubleValue()); - } else if (node.isObject()) { - if (node.has(TikaJsonSerializer.INSTANTIATED_CLASS_KEY)) { - Optional<T> optional = deserializeObject(node); - if (optional.isPresent()) { - return optional.get(); - } - } else { - throw new IllegalArgumentException("I see a json object, but I don't see " + - TikaJsonSerializer.INSTANTIATED_CLASS_KEY + ": " + node); - } - } - //add short, boolean - throw new IllegalArgumentException("I regret I don't yet support: " + clazz); - } - - private static void setObject(String name, JsonNode node, Object obj, List<Method> mySetters) { - if (!node.has(TikaJsonSerializer.INSTANTIATED_CLASS_KEY)) { - setMap(name, node, obj, mySetters); - return; - } - - Optional object = deserializeObject(node); - if (object.isEmpty()) { - //log, throw exception?! - return; - } - for (Method m : mySetters) { - Class argClass = m.getParameters()[0].getType(); - if (argClass.isAssignableFrom(object - .get() - .getClass())) { - try { - m.invoke(obj, object.get()); - return; - } catch (IllegalAccessException | InvocationTargetException e) { - //swallow - } - } - } - throw new IllegalArgumentException("can't set object on " + name); - } - - private static void setMap(String name, JsonNode node, Object obj, List<Method> setters) { - //TODO this should try to match the map setters with the data types - //for now, we're just doing <String,String> - Map<String, String> val = new HashMap<>(); - for (Map.Entry<String, JsonNode> e : node.properties()) { - val.put(e.getKey(), e - .getValue() - .textValue()); - } - for (Method m : setters) { - try { - m.invoke(obj, val); - return; - } catch (ReflectiveOperationException e) { - //swallow - } - } - throw new IllegalArgumentException("can't find map setter for: " + name); - } - - private static void setBoolean(String name, JsonNode node, Object obj, List<Method> setters) throws ReflectiveOperationException { - for (Method m : setters) { - Class argClass = m.getParameters()[0].getType(); - if (argClass.equals(Boolean.class) || argClass.equals(boolean.class)) { - m.invoke(obj, node.booleanValue()); - return; - } - } - //TODO -- maybe check for string? - throw new IllegalArgumentException("can't set boolean on " + name); - } - - private static void setNull(String name, JsonNode node, Object obj, List<Method> setters) { - for (Method m : setters) { - Class argClass = m.getParameters()[0].getType(); - if (!TikaJsonSerializer.PRIMITIVES.contains(argClass)) { - try { - - m.invoke(obj, argClass.cast(null)); - return; - } catch (Exception e) { - //swallow - } - } - } - throw new IllegalArgumentException("can't set null on " + name); - } - - private static void setStringValue(String name, String txt, Object obj, List<Method> setters) throws ReflectiveOperationException { - - //try for exact match first - for (Method m : setters) { - Class argClass = m.getParameters()[0].getType(); - if (argClass.equals(String.class)) { - m.invoke(obj, txt); - return; - } - } - Method intMethod = null; - Method longMethod = null; - Method doubleMethod = null; - Method floatMethod = null; - Method shortMethod = null; - Method boolMethod = null; - for (Method m : setters) { - Class argClass = m.getParameters()[0].getType(); - if (argClass.equals(Integer.class) || argClass.equals(int.class)) { - intMethod = m; - } else if (argClass.equals(Long.class) || argClass.equals(long.class)) { - longMethod = m; - } else if (argClass.equals(Float.class) || argClass.equals(float.class)) { - floatMethod = m; - } else if (argClass.equals(Double.class) || argClass.equals(double.class)) { - doubleMethod = m; - } else if (argClass.equals(Short.class) || argClass.equals(short.class)) { - shortMethod = m; - } else if (argClass.equals(Boolean.class) || argClass.equals(boolean.class)) { - boolMethod = m; - } - } - - if (shortMethod != null) { - try { - short val = Short.parseShort(txt); - shortMethod.invoke(obj, val); - return; - } catch (NumberFormatException e) { - //swallow - } - } else if (intMethod != null) { - try { - int val = Integer.parseInt(txt); - intMethod.invoke(obj, val); - return; - } catch (NumberFormatException e) { - //swallow - } - } else if (floatMethod != null) { - try { - float val = Float.parseFloat(txt); - floatMethod.invoke(obj, val); - return; - } catch (NumberFormatException e) { - //swallow - } - } else if (longMethod != null) { - try { - long val = Long.parseLong(txt); - longMethod.invoke(obj, val); - return; - } catch (NumberFormatException e) { - //swallow - } - } else if (doubleMethod != null) { - try { - double val = Double.parseDouble(txt); - doubleMethod.invoke(obj, val); - return; - } catch (NumberFormatException e) { - //swallow - } - } else if (boolMethod != null) { - if (txt.equalsIgnoreCase("true")) { - boolMethod.invoke(obj, true); - } else if (txt.equalsIgnoreCase("false")) { - boolMethod.invoke(obj, false); - } - } - throw new IllegalArgumentException("I regret I couldn't find a setter for: " + name); - - } - - private static void setNumericValue(String name, JsonNode node, Object obj, List<Method> setters) throws ReflectiveOperationException { - - //try numeric and equals first - for (Method m : setters) { - Class argClass = m.getParameters()[0].getType(); - if ((argClass.equals(int.class) || argClass.equals(Integer.class)) && node.isInt()) { - m.invoke(obj, node.intValue()); - return; - } else if ((argClass.equals(long.class) || argClass.equals(Long.class)) && node.isLong()) { - m.invoke(obj, node.asLong()); - return; - } else if ((argClass.equals(float.class) || argClass.equals(Float.class)) && node.isFloat()) { - m.invoke(obj, node.floatValue()); - return; - } else if ((argClass.equals(double.class) || argClass.equals(Double.class)) && node.isDouble()) { - m.invoke(obj, node.doubleValue()); - return; - } else if ((argClass.equals(short.class) || argClass.equals(Short.class)) && node.isShort()) { - m.invoke(obj, node.shortValue()); - return; - } - } - //try for higher precision setters - for (Method m : setters) { - Class argClass = m.getParameters()[0].getType(); - if ((argClass.equals(long.class) || argClass.equals(Long.class)) && node.isInt()) { - m.invoke(obj, node.asLong()); - return; - } else if ((argClass.equals(double.class) || argClass.equals(Double.class)) && node.isFloat()) { - m.invoke(obj, node.floatValue()); - return; - } - } - //try for lower precision setters - //we have to do this for node=double, type=float; should we do this for long->integer?! - for (Method m : setters) { - Class argClass = m.getParameters()[0].getType(); - if ((argClass.equals(int.class) || argClass.equals(Integer.class)) && node.isLong()) { - long val = node.longValue(); - if (val >= Integer.MAX_VALUE || val <= Integer.MIN_VALUE) { - //don't do this - } else { - m.invoke(obj, node.intValue()); - } - return; - } else if ((argClass.equals(float.class) || argClass.equals(Float.class)) && node.isDouble()) { - //TODO -- check for over/underflow - m.invoke(obj, node.floatValue()); - return; - } else if ((argClass.equals(short.class) || argClass.equals(Short.class)) && node.isInt()) { - int val = node.intValue(); - if (val > Short.MAX_VALUE || val < Short.MIN_VALUE) { - //don't do this - } else { - m.invoke(obj, node.shortValue()); - return; - } - } - } - //finally try for String - for (Method m : setters) { - Class argClass = m.getParameters()[0].getType(); - if (argClass.equals(String.class)) { - m.invoke(obj, node.asText()); - return; - } - } - throw new IllegalArgumentException("Couldn't find numeric setter for: " + name); - - } -} diff --git a/tika-serialization/src/main/java/org/apache/tika/serialization/TikaJsonSerializer.java b/tika-serialization/src/main/java/org/apache/tika/serialization/TikaJsonSerializer.java deleted file mode 100644 index 77a2400e5..000000000 --- a/tika-serialization/src/main/java/org/apache/tika/serialization/TikaJsonSerializer.java +++ /dev/null @@ -1,271 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.tika.serialization; - -import java.io.IOException; -import java.lang.reflect.Constructor; -import java.lang.reflect.InvocationTargetException; -import java.lang.reflect.Method; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.List; -import java.util.Locale; -import java.util.Map; -import java.util.Set; - -import com.fasterxml.jackson.core.JsonGenerator; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - - -/** - * This is a basic serializer that requires that an object: - * a) have a no-arg constructor - * b) have both setters and getters for the same parameters with the same names, e.g. setXYZ and getXYZ - * c) setters and getters have to follow the pattern setX where x is a capital letter - * d) have maps as parameters where the keys are strings (and the values are strings for now) - * e) at deserialization time, objects that have setters for enums also have to have a setter for a string value of that enum - */ -public class TikaJsonSerializer { - - public static String INSTANTIATED_CLASS_KEY = "_class"; - static Set<Class> PRIMITIVES = Set.of(int.class, double.class, float.class, long.class, short.class, boolean.class, String.class, byte.class, char.class); - static Set<Class> BOXED = Set.of(Integer.class, Double.class, Float.class, Long.class, Short.class, Boolean.class, Byte.class, Character.class); - static String SET = "set"; - private static Logger LOG = LoggerFactory.getLogger(TikaJsonSerializer.class); - private static String GET = "get"; - private static String IS = "is"; - - public static void serialize(Object obj, JsonGenerator jsonGenerator) throws TikaSerializationException, IOException { - serialize(null, obj, jsonGenerator); - } - - public static void serialize(String fieldName, Object obj, JsonGenerator jsonGenerator) throws TikaSerializationException, IOException { - if (obj == null) { - if (fieldName == null) { - jsonGenerator.writeNull(); - } else { - jsonGenerator.writeNullField(fieldName); - } - } else if (PRIMITIVES.contains(obj.getClass()) || BOXED.contains(obj.getClass())) { - try { - serializePrimitiveAndBoxed(fieldName, obj, jsonGenerator); - } catch (IOException e) { - throw new TikaSerializationException("problem serializing", e); - } - } else if (isCollection(obj)) { - serializeCollection(fieldName, obj, jsonGenerator); - } else if (obj - .getClass() - .isEnum()) { - jsonGenerator.writeStringField(fieldName, ((Enum) obj).name()); - } else { - serializeObject(fieldName, obj, jsonGenerator); - } - } - - /** - * limited to array, list and map - * - * @param obj - * @return - */ - private static boolean isCollection(Object obj) { - Class clazz = obj.getClass(); - return clazz.isArray() || List.class.isAssignableFrom(clazz) || Map.class.isAssignableFrom(clazz); - } - - - /** - * @param fieldName can be null -- used only for logging and debugging - * @param obj - * @param jsonGenerator - * @throws TikaSerializationException - */ - public static void serializeObject(String fieldName, Object obj, JsonGenerator jsonGenerator) throws TikaSerializationException { - - try { - Constructor constructor = obj - .getClass() - .getConstructor(); - } catch (NoSuchMethodException e) { - throw new IllegalArgumentException("class (" + obj.getClass() + ") doesn't have a no-arg constructor. Respectfully not serializing."); - } - try { - if (fieldName != null) { - jsonGenerator.writeFieldName(fieldName); - } - jsonGenerator.writeStartObject(); - jsonGenerator.writeStringField(INSTANTIATED_CLASS_KEY, obj - .getClass() - .getName()); - Map<String, Method> matches = getGetters(obj - .getClass() - .getMethods()); - //iterate through the getters - for (Map.Entry<String, Method> e : matches.entrySet()) { - try { - Object methodVal = e - .getValue() - .invoke(obj); - serialize(e.getKey(), methodVal, jsonGenerator); - } catch (IllegalAccessException | InvocationTargetException ex) { - throw new TikaSerializationException("couldn't write paramName=" + e.getKey(), ex); - } - } - - jsonGenerator.writeEndObject(); - } catch (IOException e) { - throw new TikaSerializationException("problem", e); - } - } - - private static Map<String, Method> getGetters(Method[] methods) { - Map<String, List<Method>> getters = new HashMap<>(); - Map<String, List<Method>> setters = new HashMap<>(); - - for (Method m : methods) { - String name = m.getName(); - if (name.startsWith("get") && name.length() > 3 && Character.isUpperCase(name.charAt(3))) { - String param = getParam(GET, name); - add(param, m, getters); - } else if (name.startsWith("is") && name.length() > 2 && Character.isUpperCase(name.charAt(2))) { - String param = getParam(IS, name); - add(param, m, getters); - } else if (name.startsWith("set") && name.length() > 3 && Character.isUpperCase(name.charAt(3))) { - //take only single param setters - if (m.getParameters().length == 1) { - String param = getParam(SET, name); - add(param, m, setters); - } - } - } - //this strictly looks for classA.equals(classB) - //this does not look for instance of, nor does it look for boxed vs. primitives - //Also, TODO -- this should favor getters and setters with Strings over those - //with complex types - Map<String, Method> ret = new HashMap<>(); - for (Map.Entry<String, List<Method>> e : getters.entrySet()) { - String paramName = e.getKey(); - //figure out how to skip Class level setters/getters - if ("class".equals(paramName)) { - continue; - } - List<Method> setterList = setters.get(paramName); - if (setterList == null || setterList.size() == 0) { - LOG.debug("Couldn't find setter for getter: " + paramName); - continue; - } - for (Method getter : e.getValue()) { - for (Method setter : setterList) { - Class setClass = setter.getParameters()[0].getType(); - if (getter - .getReturnType() - .equals(setClass)) { - ret.put(paramName, getter); - } - } - } - } - return ret; - } - - private static void serializeCollection(String fieldName, Object obj, JsonGenerator jsonGenerator) throws IOException, TikaSerializationException { - if (fieldName != null) { - jsonGenerator.writeFieldName(fieldName); - } - Class clazz = obj.getClass(); - if (clazz.isArray()) { - jsonGenerator.writeStartArray(); - for (Object item : (Object[]) obj) { - serialize(item, jsonGenerator); - } - jsonGenerator.writeEndArray(); - } else if (List.class.isAssignableFrom(clazz)) { - //should we get the generic type of the list via reflection - //so that we can set the superclass field in the item? - jsonGenerator.writeStartArray(); - for (Object item : (List) obj) { - serialize(item, jsonGenerator); - } - jsonGenerator.writeEndArray(); - } else if (Map.class.isAssignableFrom(clazz)) { - jsonGenerator.writeStartObject(); - for (Map.Entry<String, Object> e : ((Map<String, Object>) obj).entrySet()) { - serialize(e.getKey(), e.getValue(), jsonGenerator); - } - jsonGenerator.writeEndObject(); - } else { - throw new UnsupportedOperationException("Should have been a collection?! " + clazz); - } - } - - private static void serializePrimitiveAndBoxed(String paramName, Object obj, JsonGenerator jsonGenerator) throws IOException { - Class clazz = obj.getClass(); - if (paramName != null) { - jsonGenerator.writeFieldName(paramName); - } - if (clazz.equals(String.class)) { - jsonGenerator.writeString((String) obj); - } else if (clazz.equals(Integer.class)) { - jsonGenerator.writeNumber((Integer) obj); - } else if (clazz.equals(Short.class)) { - jsonGenerator.writeNumber((Short) obj); - } else if (clazz.equals(Long.class)) { - jsonGenerator.writeNumber((Long) obj); - } else if (clazz.equals(Float.class)) { - jsonGenerator.writeNumber((Float) obj); - } else if (clazz.equals(Double.class)) { - jsonGenerator.writeNumber((Double) obj); - } else if (clazz.equals(Boolean.class)) { - jsonGenerator.writeBoolean((Boolean) obj); - } else if (clazz.equals(short.class)) { - jsonGenerator.writeNumber((short) obj); - } else if (clazz.equals(int.class)) { - jsonGenerator.writeNumber((int) obj); - } else if (clazz.equals(long.class)) { - jsonGenerator.writeNumber((long) obj); - } else if (clazz.equals(float.class)) { - jsonGenerator.writeNumber((float) obj); - } else if (clazz.equals(double.class)) { - jsonGenerator.writeNumber((double) obj); - } else if (clazz.equals(boolean.class)) { - jsonGenerator.writeBoolean((boolean) obj); - } else { - throw new UnsupportedOperationException("I regret that I don't yet support " + clazz); - } - - } - - private static void add(String param, Method method, Map<String, List<Method>> map) { - List<Method> methods = map.get(param); - if (methods == null) { - methods = new ArrayList<>(); - map.put(param, methods); - } - methods.add(method); - } - - static String getParam(String prefix, String name) { - String ret = name.substring(prefix.length()); - ret = ret - .substring(0, 1) - .toLowerCase(Locale.ROOT) + ret.substring(1); - return ret; - } - -} diff --git a/tika-serialization/src/test/java/org/apache/tika/serialization/ConfigDeserializerTest.java b/tika-serialization/src/test/java/org/apache/tika/serialization/ConfigDeserializerTest.java new file mode 100644 index 000000000..00bd55a2c --- /dev/null +++ b/tika-serialization/src/test/java/org/apache/tika/serialization/ConfigDeserializerTest.java @@ -0,0 +1,305 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.tika.serialization; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertNotSame; +import static org.junit.jupiter.api.Assertions.assertNull; +import static org.junit.jupiter.api.Assertions.assertTrue; + +import org.junit.jupiter.api.Test; + +import org.apache.tika.config.ConfigContainer; +import org.apache.tika.config.ParseContextConfig; +import org.apache.tika.parser.ParseContext; + +public class ConfigDeserializerTest { + + /** + * Simple test config class to verify immutability + */ + public static class TestConfig { + private String name = "default"; + private int value = 100; + private boolean enabled = false; + + public String getName() { + return name; + } + + public void setName(String name) { + this.name = name; + } + + public int getValue() { + return value; + } + + public void setValue(int value) { + this.value = value; + } + + public boolean isEnabled() { + return enabled; + } + + public void setEnabled(boolean enabled) { + this.enabled = enabled; + } + } + + @Test + public void testDefaultConfigImmutability() throws Exception { + // Create a default config + TestConfig defaultConfig = new TestConfig(); + defaultConfig.setName("default"); + defaultConfig.setValue(100); + defaultConfig.setEnabled(false); + + // Store original values + String originalName = defaultConfig.getName(); + int originalValue = defaultConfig.getValue(); + boolean originalEnabled = defaultConfig.isEnabled(); + + // Create ParseContext with user config that overrides some values + ParseContext context = new ParseContext(); + ConfigContainer configContainer = new ConfigContainer(); + configContainer.set("test-config", "{\"name\":\"override\",\"value\":200}"); + context.set(ConfigContainer.class, configContainer); + + // Get merged config + TestConfig mergedConfig = ConfigDeserializer.getConfig(context, "test-config", TestConfig.class, defaultConfig); + + // Verify merged config has user overrides + assertNotNull(mergedConfig); + assertEquals("override", mergedConfig.getName()); + assertEquals(200, mergedConfig.getValue()); + assertEquals(false, mergedConfig.isEnabled()); // Not overridden, should use default + + // CRITICAL: Verify defaultConfig was NOT modified + assertEquals(originalName, defaultConfig.getName(), "defaultConfig.name should not be modified"); + assertEquals(originalValue, defaultConfig.getValue(), "defaultConfig.value should not be modified"); + assertEquals(originalEnabled, defaultConfig.isEnabled(), "defaultConfig.enabled should not be modified"); + + // Verify we got a different object + assertNotSame(defaultConfig, mergedConfig, "Should return a new config object, not the default"); + } + + @Test + public void testDefaultConfigImmutabilityMultipleCalls() throws Exception { + // Create a shared default config (simulating what parsers do) + TestConfig sharedDefault = new TestConfig(); + sharedDefault.setName("shared"); + sharedDefault.setValue(50); + sharedDefault.setEnabled(true); + + // First request with one override + ParseContext context1 = new ParseContext(); + ConfigContainer configContainer1 = new ConfigContainer(); + configContainer1.set("test-config", "{\"value\":100}"); + context1.set(ConfigContainer.class, configContainer1); + + TestConfig config1 = ConfigDeserializer.getConfig(context1, "test-config", TestConfig.class, sharedDefault); + + // Second request with different override + ParseContext context2 = new ParseContext(); + ConfigContainer configContainer2 = new ConfigContainer(); + configContainer2.set("test-config", "{\"name\":\"request2\",\"enabled\":false}"); + context2.set(ConfigContainer.class, configContainer2); + + TestConfig config2 = ConfigDeserializer.getConfig(context2, "test-config", TestConfig.class, sharedDefault); + + // Verify each request got its own merged config + assertEquals("shared", config1.getName()); + assertEquals(100, config1.getValue()); + assertEquals(true, config1.isEnabled()); + + assertEquals("request2", config2.getName()); + assertEquals(50, config2.getValue()); // Used default + assertEquals(false, config2.isEnabled()); + + // CRITICAL: Verify shared default was never modified + assertEquals("shared", sharedDefault.getName()); + assertEquals(50, sharedDefault.getValue()); + assertEquals(true, sharedDefault.isEnabled()); + + // Verify all three are different objects + assertNotSame(sharedDefault, config1); + assertNotSame(sharedDefault, config2); + assertNotSame(config1, config2); + } + + @Test + public void testNoDefaultConfig() throws Exception { + ParseContext context = new ParseContext(); + ConfigContainer configContainer = new ConfigContainer(); + configContainer.set("test-config", "{\"name\":\"test\",\"value\":123}"); + context.set(ConfigContainer.class, configContainer); + + TestConfig config = ConfigDeserializer.getConfig(context, "test-config", TestConfig.class, null); + + assertNotNull(config); + assertEquals("test", config.getName()); + assertEquals(123, config.getValue()); + assertEquals(false, config.isEnabled()); // Default value from class + } + + @Test + public void testNoUserConfig() throws Exception { + TestConfig defaultConfig = new TestConfig(); + defaultConfig.setName("default"); + defaultConfig.setValue(999); + + // No ConfigContainer in ParseContext + ParseContext context = new ParseContext(); + + TestConfig config = ConfigDeserializer.getConfig(context, "test-config", TestConfig.class, defaultConfig); + + // Should return the default config as-is + assertEquals(defaultConfig, config); + assertEquals("default", config.getName()); + assertEquals(999, config.getValue()); + } + + @Test + public void testNoConfigContainer() throws Exception { + TestConfig defaultConfig = new TestConfig(); + defaultConfig.setName("default"); + + ParseContext context = new ParseContext(); + // No ConfigContainer set + + TestConfig config = ConfigDeserializer.getConfig(context, "test-config", TestConfig.class, defaultConfig); + + assertEquals(defaultConfig, config); + } + + @Test + public void testHasConfig() throws Exception { + ParseContext context = new ParseContext(); + ConfigContainer configContainer = new ConfigContainer(); + configContainer.set("parser-a", "{\"key\":\"value\"}"); + configContainer.set("parser-b", "{\"key\":\"value\"}"); + context.set(ConfigContainer.class, configContainer); + + assertTrue(ConfigDeserializer.hasConfig(context, "parser-a")); + assertTrue(ConfigDeserializer.hasConfig(context, "parser-b")); + assertFalse(ConfigDeserializer.hasConfig(context, "parser-c")); + } + + @Test + public void testHasConfigNoContainer() throws Exception { + ParseContext context = new ParseContext(); + + assertFalse(ConfigDeserializer.hasConfig(context, "parser-a")); + } + + @Test + public void testHasConfigNullContext() throws Exception { + assertFalse(ConfigDeserializer.hasConfig(null, "parser-a")); + } + + @Test + public void testGetConfigNullContext() throws Exception { + TestConfig defaultConfig = new TestConfig(); + + TestConfig config = ConfigDeserializer.getConfig(null, "test-config", TestConfig.class, defaultConfig); + + assertEquals(defaultConfig, config); + } + + @Test + public void testGetConfigWithoutDefault() throws Exception { + ParseContext context = new ParseContext(); + ConfigContainer configContainer = new ConfigContainer(); + configContainer.set("test-config", "{\"name\":\"test\"}"); + context.set(ConfigContainer.class, configContainer); + + TestConfig config = ConfigDeserializer.getConfig(context, "test-config", TestConfig.class); + + assertNotNull(config); + assertEquals("test", config.getName()); + } + + @Test + public void testGetConfigWithoutDefaultNoUserConfig() throws Exception { + ParseContext context = new ParseContext(); + + TestConfig config = ConfigDeserializer.getConfig(context, "test-config", TestConfig.class); + + assertNull(config); + } + + @Test + public void testParseContextConfigWrapperDelegation() throws Exception { + // Test that ParseContextConfig correctly delegates to ConfigDeserializer + // when tika-serialization is on the classpath + + TestConfig defaultConfig = new TestConfig(); + defaultConfig.setName("default"); + defaultConfig.setValue(100); + + ParseContext context = new ParseContext(); + ConfigContainer configContainer = new ConfigContainer(); + configContainer.set("test-parser", "{\"name\":\"override\",\"value\":200}"); + context.set(ConfigContainer.class, configContainer); + + // Use the wrapper + TestConfig config = ParseContextConfig.getConfig(context, "test-parser", TestConfig.class, defaultConfig); + + // Should get merged config + assertNotNull(config); + assertEquals("override", config.getName()); + assertEquals(200, config.getValue()); + + // Verify immutability + assertEquals("default", defaultConfig.getName()); + assertEquals(100, defaultConfig.getValue()); + } + + @Test + public void testParseContextConfigWrapperNoConfig() throws Exception { + // Test wrapper when no config is present + TestConfig defaultConfig = new TestConfig(); + defaultConfig.setName("default"); + + ParseContext context = new ParseContext(); + + TestConfig config = ParseContextConfig.getConfig(context, "test-parser", TestConfig.class, defaultConfig); + + assertEquals(defaultConfig, config); + } + + @Test + public void testParseContextConfigWrapperIsAvailable() { + // Verify ConfigDeserializer is detected as available in this test environment + assertTrue(ParseContextConfig.isConfigDeserializerAvailable(), "ConfigDeserializer should be available when tika-serialization is on classpath"); + } + + @Test + public void testParseContextConfigWrapperHasConfig() { + ParseContext context = new ParseContext(); + ConfigContainer configContainer = new ConfigContainer(); + configContainer.set("parser-a", "{\"key\":\"value\"}"); + context.set(ConfigContainer.class, configContainer); + + assertTrue(ParseContextConfig.hasConfig(context, "parser-a")); + assertFalse(ParseContextConfig.hasConfig(context, "parser-b")); + } +} diff --git a/tika-serialization/src/test/java/org/apache/tika/serialization/TestParseContextSerialization.java b/tika-serialization/src/test/java/org/apache/tika/serialization/TestParseContextSerialization.java index 55546d7d3..931eb9ba5 100644 --- a/tika-serialization/src/test/java/org/apache/tika/serialization/TestParseContextSerialization.java +++ b/tika-serialization/src/test/java/org/apache/tika/serialization/TestParseContextSerialization.java @@ -17,14 +17,15 @@ package org.apache.tika.serialization; import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNotNull; import static org.junit.jupiter.api.Assertions.assertTrue; import java.io.StringWriter; import java.io.Writer; import java.util.List; -import com.fasterxml.jackson.core.JsonFactory; import com.fasterxml.jackson.core.JsonGenerator; +import com.fasterxml.jackson.databind.JsonNode; import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.databind.module.SimpleModule; import org.junit.jupiter.api.Test; @@ -38,6 +39,31 @@ import org.apache.tika.parser.ParseContext; public class TestParseContextSerialization { + private ObjectMapper createMapper() { + // Start with the properly configured mapper that has polymorphic type handling + ObjectMapper mapper = ParseContextSerializer.createMapper(); + + // Register our custom serializer/deserializer on top + SimpleModule module = new SimpleModule(); + module.addDeserializer(ParseContext.class, new ParseContextDeserializer()); + module.addSerializer(ParseContext.class, new ParseContextSerializer()); + mapper.registerModule(module); + return mapper; + } + + private String serializeParseContext(ParseContext pc) throws Exception { + ObjectMapper mapper = createMapper(); + try (Writer writer = new StringWriter()) { + try (JsonGenerator jsonGenerator = mapper + .getFactory() + .createGenerator(writer)) { + ParseContextSerializer serializer = new ParseContextSerializer(); + serializer.serialize(pc, jsonGenerator, null); + } + return writer.toString(); + } + } + @Test public void testBasic() throws Exception { @@ -50,24 +76,213 @@ public class TestParseContextSerialization { {"k1":1,"k2":"val3" } """); pc.set(ConfigContainer.class, configContainer); + ObjectMapper mapper = createMapper(); String json; try (Writer writer = new StringWriter()) { - try (JsonGenerator jsonGenerator = new JsonFactory().createGenerator(writer)) { + try (JsonGenerator jsonGenerator = mapper + .getFactory() + .createGenerator(writer)) { ParseContextSerializer serializer = new ParseContextSerializer(); serializer.serialize(pc, jsonGenerator, null); } json = writer.toString(); } - ObjectMapper mapper = new ObjectMapper(); - SimpleModule module = new SimpleModule(); - module.addDeserializer(ParseContext.class, new ParseContextDeserializer()); - mapper.registerModule(module); ParseContext deserialized = mapper.readValue(json, ParseContext.class); MetadataFilter dMetadataFilter = deserialized.get(MetadataFilter.class); assertTrue(dMetadataFilter instanceof CompositeMetadataFilter); - List<MetadataFilter> metadataFilters = ((CompositeMetadataFilter)dMetadataFilter).getFilters(); + List<MetadataFilter> metadataFilters = ((CompositeMetadataFilter) dMetadataFilter).getFilters(); assertEquals(1, metadataFilters.size()); assertTrue(metadataFilters.get(0) instanceof DateNormalizingMetadataFilter); } + + @Test + public void testFriendlyNameFormat() throws Exception { + // Test the new friendly-name format matching tika-config.json + ParseContext pc = new ParseContext(); + ConfigContainer configContainer = new ConfigContainer(); + + // Add friendly-named configurations + configContainer.set("pdf-parser", "{\"ocrStrategy\":\"AUTO\",\"extractInlineImages\":true}"); + configContainer.set("html-parser", "{\"extractScripts\":false}"); + + pc.set(ConfigContainer.class, configContainer); + + String json = serializeParseContext(pc); + + // Verify JSON structure + ObjectMapper mapper = createMapper(); + JsonNode root = mapper.readTree(json); + + assertTrue(root.has("pdf-parser"), "Should have pdf-parser field"); + assertTrue(root.has("html-parser"), "Should have html-parser field"); + assertEquals("AUTO", root + .get("pdf-parser") + .get("ocrStrategy") + .asText()); + assertEquals(false, root + .get("html-parser") + .get("extractScripts") + .asBoolean()); + + // Verify round-trip + ParseContext deserialized = mapper.readValue(json, ParseContext.class); + ConfigContainer deserializedConfig = deserialized.get(ConfigContainer.class); + assertNotNull(deserializedConfig); + assertTrue(deserializedConfig + .get("pdf-parser") + .isPresent()); + assertTrue(deserializedConfig + .get("html-parser") + .isPresent()); + } + + @Test + public void testLegacyObjectsFormat() throws Exception { + // Test the legacy format with "objects" field + MetadataFilter metadataFilter = new CompositeMetadataFilter(List.of(new DateNormalizingMetadataFilter())); + ParseContext pc = new ParseContext(); + pc.set(MetadataFilter.class, metadataFilter); + + String json = serializeParseContext(pc); + + // Verify JSON has "objects" field + ObjectMapper mapper = createMapper(); + JsonNode root = mapper.readTree(json); + assertTrue(root.has("objects"), "Should have objects field for legacy format"); + + // Verify round-trip + ParseContext deserialized = mapper.readValue(json, ParseContext.class); + MetadataFilter deserializedFilter = deserialized.get(MetadataFilter.class); + assertNotNull(deserializedFilter); + assertTrue(deserializedFilter instanceof CompositeMetadataFilter); + } + + @Test + public void testMixedFormat() throws Exception { + // Test that both legacy objects and new friendly names can coexist + MetadataFilter metadataFilter = new CompositeMetadataFilter(List.of(new DateNormalizingMetadataFilter())); + ParseContext pc = new ParseContext(); + pc.set(MetadataFilter.class, metadataFilter); + + ConfigContainer configContainer = new ConfigContainer(); + configContainer.set("pdf-parser", "{\"ocrStrategy\":\"NO_OCR\"}"); + pc.set(ConfigContainer.class, configContainer); + + String json = serializeParseContext(pc); + + // Verify both formats are present + ObjectMapper mapper = createMapper(); + JsonNode root = mapper.readTree(json); + assertTrue(root.has("objects"), "Should have objects field"); + assertTrue(root.has("pdf-parser"), "Should have pdf-parser field"); + + // Verify round-trip + ParseContext deserialized = mapper.readValue(json, ParseContext.class); + + // Check legacy object + MetadataFilter deserializedFilter = deserialized.get(MetadataFilter.class); + assertNotNull(deserializedFilter); + assertTrue(deserializedFilter instanceof CompositeMetadataFilter); + + // Check friendly-name config + ConfigContainer deserializedConfig = deserialized.get(ConfigContainer.class); + assertNotNull(deserializedConfig); + assertTrue(deserializedConfig + .get("pdf-parser") + .isPresent()); + } + + @Test + public void testConfigDeserializerHelper() throws Exception { + // Test the ConfigDeserializer helper utility + ParseContext pc = new ParseContext(); + ConfigContainer configContainer = new ConfigContainer(); + + // Simulate a PDFParserConfig as JSON + String pdfConfig = "{\"extractInlineImages\":true,\"ocrStrategy\":\"AUTO\"}"; + configContainer.set("pdf-parser", pdfConfig); + + pc.set(ConfigContainer.class, configContainer); + + // Test hasConfig + assertTrue(ConfigDeserializer.hasConfig(pc, "pdf-parser")); + + // Test getConfig with a simple JSON deserialization + // We can't use actual PDFParserConfig here since we don't have the dependency, + // but we can verify the JSON is retrieved correctly + String retrievedConfig = pc + .get(ConfigContainer.class) + .get("pdf-parser") + .orElse(null); + assertNotNull(retrievedConfig); + assertTrue(retrievedConfig.contains("extractInlineImages")); + } + + @Test + public void testDeserializeFriendlyNameFromJSON() throws Exception { + // Test deserializing friendly-name format from raw JSON string + String json = """ + { + "pdf-parser": { + "ocrStrategy": "AUTO", + "extractInlineImages": true + }, + "html-parser": { + "extractScripts": false + } + } + """; + + ObjectMapper mapper = createMapper(); + ParseContext deserialized = mapper.readValue(json, ParseContext.class); + + ConfigContainer config = deserialized.get(ConfigContainer.class); + assertNotNull(config); + assertTrue(config + .get("pdf-parser") + .isPresent()); + assertTrue(config + .get("html-parser") + .isPresent()); + + // Verify the JSON content + String pdfParserJson = config + .get("pdf-parser") + .get(); + assertTrue(pdfParserJson.contains("AUTO")); + assertTrue(pdfParserJson.contains("extractInlineImages")); + } + + @Test + public void testDeserializeMixedFromJSON() throws Exception { + // Test deserializing JSON with both legacy objects and friendly names + // First create the ParseContext and serialize it to get the correct format + MetadataFilter metadataFilter = new CompositeMetadataFilter(List.of(new DateNormalizingMetadataFilter())); + ParseContext pc = new ParseContext(); + pc.set(MetadataFilter.class, metadataFilter); + + ConfigContainer configContainer = new ConfigContainer(); + configContainer.set("pdf-parser", "{\"ocrStrategy\":\"AUTO\"}"); + pc.set(ConfigContainer.class, configContainer); + + // Serialize to JSON + ObjectMapper mapper = createMapper(); + String json = mapper.writeValueAsString(pc); + + // Now deserialize it back + ParseContext deserialized = mapper.readValue(json, ParseContext.class); + + // Verify legacy object was deserialized + MetadataFilter filter = deserialized.get(MetadataFilter.class); + assertNotNull(filter); + assertTrue(filter instanceof CompositeMetadataFilter); + + // Verify friendly-name config was stored + ConfigContainer config = deserialized.get(ConfigContainer.class); + assertNotNull(config); + assertTrue(config + .get("pdf-parser") + .isPresent()); + } } diff --git a/tika-serialization/src/test/java/org/apache/tika/serialization/TikaJsonSerializationTest.java b/tika-serialization/src/test/java/org/apache/tika/serialization/TikaJsonSerializationTest.java deleted file mode 100644 index 8aefbc428..000000000 --- a/tika-serialization/src/test/java/org/apache/tika/serialization/TikaJsonSerializationTest.java +++ /dev/null @@ -1,49 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.tika.serialization; - -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertTrue; - -import java.io.StringReader; -import java.io.StringWriter; -import java.util.Optional; - -import com.fasterxml.jackson.core.JsonGenerator; -import com.fasterxml.jackson.databind.JsonNode; -import com.fasterxml.jackson.databind.ObjectMapper; -import org.junit.jupiter.api.Test; - -import org.apache.tika.serialization.mocks.ClassC; - -public class TikaJsonSerializationTest { - - @Test - public void testBasic() throws Exception { - StringWriter sw = new StringWriter(); - ClassC classA = new ClassC(); - try (JsonGenerator jsonGenerator = new ObjectMapper().createGenerator(sw)) { - TikaJsonSerializer.serialize(classA, jsonGenerator); - } - JsonNode root = new ObjectMapper().readTree(new StringReader(sw.toString())); - Optional opt = TikaJsonDeserializer.deserializeObject(root); - assertTrue(opt.isPresent()); - assertEquals(classA, opt.get()); - - } - -} diff --git a/tika-serialization/src/test/java/org/apache/tika/serialization/mocks/ClassA.java b/tika-serialization/src/test/java/org/apache/tika/serialization/mocks/ClassA.java deleted file mode 100644 index 5b17d7342..000000000 --- a/tika-serialization/src/test/java/org/apache/tika/serialization/mocks/ClassA.java +++ /dev/null @@ -1,156 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.tika.serialization.mocks; - -import java.util.Objects; - -public class ClassA { - private int a = 10; - private float b = 11.1f; - private short c = 2; - private long d = 13l; - private boolean e = false; - private Integer f = 14; - private Integer g = null; - private Long h = 15l; - private Long i = null; - private Boolean j = Boolean.TRUE; - private Boolean k = null; - - public int getA() { - return a; - } - - public void setA(int a) { - this.a = a; - } - - public float getB() { - return b; - } - - public void setB(float b) { - this.b = b; - } - - public short getC() { - return c; - } - - public void setC(short c) { - this.c = c; - } - - public long getD() { - return d; - } - - public void setD(long d) { - this.d = d; - } - - public boolean isE() { - return e; - } - - public void setE(boolean e) { - this.e = e; - } - - public Integer getF() { - return f; - } - - public void setF(Integer f) { - this.f = f; - } - - public Integer getG() { - return g; - } - - public void setG(Integer g) { - this.g = g; - } - - public Long getH() { - return h; - } - - public void setH(Long h) { - this.h = h; - } - - public Long getI() { - return i; - } - - public void setI(Long i) { - this.i = i; - } - - public Boolean getJ() { - return j; - } - - public void setJ(Boolean j) { - this.j = j; - } - - public Boolean getK() { - return k; - } - - public void setK(Boolean k) { - this.k = k; - } - - @Override - public String toString() { - return "ClassA{" + "a=" + a + ", b=" + b + ", c=" + c + ", d=" + d + ", e=" + e + ", f=" + f + ", g=" + g + ", h=" + h + ", i=" + i + ", j=" + j + ", k=" + k + '}'; - } - - @Override - public boolean equals(Object o) { - if (this == o) { - return true; - } - if (o == null || getClass() != o.getClass()) { - return false; - } - - ClassA classA = (ClassA) o; - return a == classA.a && Float.compare(b, classA.b) == 0 && c == classA.c && d == classA.d && e == classA.e && Objects.equals(f, classA.f) && Objects.equals(g, classA.g) && - Objects.equals(h, classA.h) && Objects.equals(i, classA.i) && Objects.equals(j, classA.j) && Objects.equals(k, classA.k); - } - - @Override - public int hashCode() { - int result = a; - result = 31 * result + Float.hashCode(b); - result = 31 * result + c; - result = 31 * result + Long.hashCode(d); - result = 31 * result + Boolean.hashCode(e); - result = 31 * result + Objects.hashCode(f); - result = 31 * result + Objects.hashCode(g); - result = 31 * result + Objects.hashCode(h); - result = 31 * result + Objects.hashCode(i); - result = 31 * result + Objects.hashCode(j); - result = 31 * result + Objects.hashCode(k); - return result; - } -} diff --git a/tika-serialization/src/test/java/org/apache/tika/serialization/mocks/ClassB.java b/tika-serialization/src/test/java/org/apache/tika/serialization/mocks/ClassB.java deleted file mode 100644 index e5b8d1d17..000000000 --- a/tika-serialization/src/test/java/org/apache/tika/serialization/mocks/ClassB.java +++ /dev/null @@ -1,96 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.tika.serialization.mocks; - -import java.util.ArrayList; -import java.util.Arrays; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.Objects; - -public class ClassB extends ClassA { - private String s = "hello world"; - private Map<String, String> counts = new HashMap<>(); - private Integer[] ints = new Integer[]{1, 2, 3, 4}; - private List<Float> floats = new ArrayList<>(); - - public ClassB() { - floats.add(2.3f); - floats.add(3.4f); - counts.put("k1", "v1"); - counts.put("k2", "v2"); - } - - public String getS() { - return s; - } - - public void setS(String s) { - this.s = s; - } - - public Map<String, String> getCounts() { - return counts; - } - - public void setCounts(Map<String, String> counts) { - this.counts = counts; - } - - public Integer[] getInts() { - return ints; - } - - public void setInts(Integer[] ints) { - this.ints = ints; - } - - public List<Float> getFloats() { - return floats; - } - - public void setFloats(List<Float> floats) { - this.floats = floats; - } - - @Override - public boolean equals(Object o) { - if (this == o) { - return true; - } - if (o == null || getClass() != o.getClass()) { - return false; - } - if (!super.equals(o)) { - return false; - } - - ClassB classB = (ClassB) o; - return Objects.equals(s, classB.s) && Objects.equals(counts, classB.counts) && Arrays.equals(ints, classB.ints) && Objects.equals(floats, classB.floats); - } - - @Override - public int hashCode() { - int result = super.hashCode(); - result = 31 * result + Objects.hashCode(s); - result = 31 * result + Objects.hashCode(counts); - result = 31 * result + Arrays.hashCode(ints); - result = 31 * result + Objects.hashCode(floats); - return result; - } -} diff --git a/tika-serialization/src/test/java/org/apache/tika/serialization/mocks/ClassC.java b/tika-serialization/src/test/java/org/apache/tika/serialization/mocks/ClassC.java deleted file mode 100644 index 7da5752c7..000000000 --- a/tika-serialization/src/test/java/org/apache/tika/serialization/mocks/ClassC.java +++ /dev/null @@ -1,50 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.tika.serialization.mocks; - -import java.util.Objects; - -public class ClassC { - - ClassB classB = new ClassB(); - - public ClassB getClassB() { - return classB; - } - - public void setClassB(ClassB classB) { - this.classB = classB; - } - - @Override - public boolean equals(Object o) { - if (this == o) { - return true; - } - if (o == null || getClass() != o.getClass()) { - return false; - } - - ClassC classC = (ClassC) o; - return Objects.equals(classB, classC.classB); - } - - @Override - public int hashCode() { - return Objects.hashCode(classB); - } -}
