This is an automated email from the ASF dual-hosted git repository. tallison pushed a commit to branch simplify-parse-context-serialization in repository https://gitbox.apache.org/repos/asf/tika.git
commit 327b165844d524c5f4fe1fd2c3c04d30a5b36a4e Author: tallison <[email protected]> AuthorDate: Wed Nov 26 15:49:24 2025 -0500 simplify parse context serialization --- .../java/org/apache/tika/parser/ParseContext.java | 14 + .../serialization/ParseContextDeserializer.java | 113 ++++-- .../tika/serialization/ParseContextSerializer.java | 70 +++- .../tika/serialization/TikaJsonDeserializer.java | 412 --------------------- .../tika/serialization/TikaJsonSerializer.java | 271 -------------- .../serialization/TikaJsonSerializationTest.java | 49 --- .../apache/tika/serialization/mocks/ClassA.java | 156 -------- .../apache/tika/serialization/mocks/ClassB.java | 96 ----- .../apache/tika/serialization/mocks/ClassC.java | 50 --- 9 files changed, 160 insertions(+), 1071 deletions(-) diff --git a/tika-core/src/main/java/org/apache/tika/parser/ParseContext.java b/tika-core/src/main/java/org/apache/tika/parser/ParseContext.java index dd925aa81..f4c49f650 100644 --- a/tika-core/src/main/java/org/apache/tika/parser/ParseContext.java +++ b/tika-core/src/main/java/org/apache/tika/parser/ParseContext.java @@ -95,6 +95,20 @@ public class ParseContext implements Serializable { .unmodifiableSet(context.keySet()); } + /** + * Returns the internal context map for serialization purposes. + * The returned map is unmodifiable. + * <p> + * This method is intended for use by serialization frameworks only. + * Keys are fully-qualified class names, values are the objects stored in the context. + * + * @return an unmodifiable view of the context map + * @since Apache Tika 4.0 + */ + public Map<String, Object> getContextMap() { + return Collections.unmodifiableMap(context); + } + @Override public boolean equals(Object o) { if (this == o) { diff --git a/tika-serialization/src/main/java/org/apache/tika/serialization/ParseContextDeserializer.java b/tika-serialization/src/main/java/org/apache/tika/serialization/ParseContextDeserializer.java index ff43fc1a2..2d7acaace 100644 --- a/tika-serialization/src/main/java/org/apache/tika/serialization/ParseContextDeserializer.java +++ b/tika-serialization/src/main/java/org/apache/tika/serialization/ParseContextDeserializer.java @@ -27,57 +27,128 @@ import com.fasterxml.jackson.core.JsonParser; import com.fasterxml.jackson.databind.DeserializationContext; import com.fasterxml.jackson.databind.JsonDeserializer; import com.fasterxml.jackson.databind.JsonNode; +import com.fasterxml.jackson.databind.ObjectMapper; import org.apache.tika.config.ConfigContainer; import org.apache.tika.parser.ParseContext; public class ParseContextDeserializer extends JsonDeserializer<ParseContext> { + /** + * Validates that a class name is safe to deserialize. + * Only allows Tika classes and common safe types. + * + * @param className the fully-qualified class name + * @throws IOException if the class is not allowed + */ + private static void validateClassName(String className) throws IOException { + // Deny dangerous base types + if (className.equals("java.lang.Object") || + className.equals("java.io.Serializable")) { + throw new IOException("Deserialization of " + className + " is not allowed for security reasons"); + } + + // Allow Tika classes + if (className.startsWith("org.apache.tika.")) { + return; + } + + // Allow metadata-extractor (used in some parsers) + if (className.startsWith("com.drew.")) { + return; + } + + // Allow common safe types + if (className.startsWith("java.lang.String") || + className.startsWith("java.lang.Number") || + className.startsWith("java.lang.Boolean") || + className.startsWith("java.lang.Integer") || + className.startsWith("java.lang.Long") || + className.startsWith("java.lang.Double") || + className.startsWith("java.lang.Float") || + className.startsWith("java.util.Date") || + className.startsWith("java.time.")) { + return; + } + + // Deny everything else by default + throw new IOException("Deserialization of class " + className + + " is not allowed. Only Tika classes and common safe types are permitted."); + } + @Override - public ParseContext deserialize(JsonParser jsonParser, DeserializationContext deserializationContext) throws IOException, JacksonException { - JsonNode root = jsonParser.getCodec().readTree(jsonParser); - return readParseContext(root); + public ParseContext deserialize(JsonParser jsonParser, + DeserializationContext deserializationContext) + throws IOException, JacksonException { + ObjectMapper mapper = (ObjectMapper) jsonParser.getCodec(); + JsonNode root = mapper.readTree(jsonParser); + return readParseContext(root, mapper); } + /** + * Backwards-compatible version that creates its own ObjectMapper. + * Prefer {@link #readParseContext(JsonNode, ObjectMapper)} when possible. + */ public static ParseContext readParseContext(JsonNode jsonNode) throws IOException { - //some use cases include the wrapper node, e.g. { "parseContext": {}} - //some include the contents only. - //Try to find "parseContext" to start. If that doesn't exist, assume the jsonNode is the contents. - JsonNode contextNode = jsonNode.get(PARSE_CONTEXT); + return readParseContext(jsonNode, ParseContextSerializer.createMapper()); + } + public static ParseContext readParseContext(JsonNode jsonNode, ObjectMapper mapper) + throws IOException { + // Some use cases include the wrapper node, e.g. { "parseContext": {}} + // Some include the contents only. + // Try to find "parseContext" to start. If that doesn't exist, assume jsonNode is the contents. + JsonNode contextNode = jsonNode.get(PARSE_CONTEXT); if (contextNode == null) { contextNode = jsonNode; } + ParseContext parseContext = new ParseContext(); + + // Deserialize objects from "objects" field if (contextNode.has("objects")) { - for (Map.Entry<String, JsonNode> e : contextNode - .get("objects") - .properties()) { - String superClassName = e.getKey(); - JsonNode obj = e.getValue(); - String className = readVal(TikaJsonSerializer.INSTANTIATED_CLASS_KEY, obj, null, true); + JsonNode objectsNode = contextNode.get("objects"); + for (Map.Entry<String, JsonNode> entry : objectsNode.properties()) { + String superClassName = entry.getKey(); + JsonNode objectNode = entry.getValue(); + + String actualClassName = readVal("_class", objectNode, null, true); + + // SECURITY: Validate class name before deserialization + validateClassName(actualClassName); + validateClassName(superClassName); + try { - Class clazz = Class.forName(className); - Class superClazz = className.equals(superClassName) ? clazz : Class.forName(superClassName); - parseContext.set(superClazz, TikaJsonDeserializer.deserialize(clazz, obj)); - } catch (ReflectiveOperationException ex) { - throw new IOException(ex); + Class<?> actualClass = Class.forName(actualClassName); + Class<?> superClass = actualClassName.equals(superClassName) ? + actualClass : Class.forName(superClassName); + + // Use Jackson's standard deserialization + Object deserializedObject = mapper.treeToValue(objectNode, actualClass); + + parseContext.set((Class) superClass, deserializedObject); + } catch (ClassNotFoundException ex) { + throw new IOException("Class not found: " + actualClassName, ex); } } } + + // Deserialize ConfigContainer from top-level fields (excluding "objects") ConfigContainer configContainer = null; for (Iterator<String> it = contextNode.fieldNames(); it.hasNext(); ) { - String nodeName = it.next(); - if (! "objects".equals(nodeName)) { + String fieldName = it.next(); + if (!"objects".equals(fieldName)) { if (configContainer == null) { configContainer = new ConfigContainer(); } - configContainer.set(nodeName, contextNode.get(nodeName).toString()); + configContainer.set(fieldName, contextNode.get(fieldName).toString()); } } + if (configContainer != null) { parseContext.set(ConfigContainer.class, configContainer); } + return parseContext; } diff --git a/tika-serialization/src/main/java/org/apache/tika/serialization/ParseContextSerializer.java b/tika-serialization/src/main/java/org/apache/tika/serialization/ParseContextSerializer.java index 64bb5c8cd..206a955d0 100644 --- a/tika-serialization/src/main/java/org/apache/tika/serialization/ParseContextSerializer.java +++ b/tika-serialization/src/main/java/org/apache/tika/serialization/ParseContextSerializer.java @@ -17,10 +17,12 @@ package org.apache.tika.serialization; import java.io.IOException; -import java.util.Set; +import java.util.Map; import com.fasterxml.jackson.core.JsonGenerator; +import com.fasterxml.jackson.databind.JsonNode; import com.fasterxml.jackson.databind.JsonSerializer; +import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.databind.SerializerProvider; import org.apache.tika.config.ConfigContainer; @@ -29,34 +31,70 @@ import org.apache.tika.parser.ParseContext; public class ParseContextSerializer extends JsonSerializer<ParseContext> { public static final String PARSE_CONTEXT = "parseContext"; + /** + * Creates an ObjectMapper for serialization. + * Note: Security validation happens during deserialization. + */ + static ObjectMapper createMapper() { + return new ObjectMapper(); + } + @Override - public void serialize(ParseContext parseContext, JsonGenerator jsonGenerator, SerializerProvider serializerProvider) throws IOException { + public void serialize(ParseContext parseContext, JsonGenerator jsonGenerator, + SerializerProvider serializerProvider) throws IOException { jsonGenerator.writeStartObject(); - Set<String> objectKeySet = parseContext.keySet(); - ConfigContainer p = parseContext.get(ConfigContainer.class); - if ((p != null && objectKeySet.size() > 1) || (p == null && ! objectKeySet.isEmpty())) { + + Map<String, Object> contextMap = parseContext.getContextMap(); + ConfigContainer configContainer = parseContext.get(ConfigContainer.class); + + // Write non-ConfigContainer objects under "objects" field + boolean hasNonConfigObjects = contextMap.size() > (configContainer != null ? 1 : 0); + if (hasNonConfigObjects) { jsonGenerator.writeFieldName("objects"); jsonGenerator.writeStartObject(); - for (String className : parseContext.keySet()) { + + ObjectMapper mapper = (ObjectMapper) jsonGenerator.getCodec(); + if (mapper == null) { + mapper = createMapper(); + } + + for (Map.Entry<String, Object> entry : contextMap.entrySet()) { + String className = entry.getKey(); if (className.equals(ConfigContainer.class.getName())) { continue; } - try { - Class clazz = Class.forName(className); - TikaJsonSerializer.serialize(className, parseContext.get(clazz), jsonGenerator); - } catch (TikaSerializationException e) { - throw new IOException(e); - } catch (ClassNotFoundException e) { - throw new IllegalArgumentException(e); + + Object value = entry.getValue(); + + // Write the field name (superclass/interface name from key) + jsonGenerator.writeFieldName(className); + jsonGenerator.writeStartObject(); + + // Write type information for deserialization + jsonGenerator.writeStringField("_class", value.getClass().getName()); + + // Serialize object properties using Jackson + com.fasterxml.jackson.databind.JsonNode tree = mapper.valueToTree(value); + var fields = tree.fields(); + while (fields.hasNext()) { + var field = fields.next(); + jsonGenerator.writeFieldName(field.getKey()); + jsonGenerator.writeTree(field.getValue()); } + + jsonGenerator.writeEndObject(); } + jsonGenerator.writeEndObject(); } - if (p != null) { - for (String k : p.getKeys()) { - jsonGenerator.writeStringField(k, p.get(k).get()); + + // Write ConfigContainer fields directly as top-level properties + if (configContainer != null) { + for (String key : configContainer.getKeys()) { + jsonGenerator.writeStringField(key, configContainer.get(key).get()); } } + jsonGenerator.writeEndObject(); } } diff --git a/tika-serialization/src/main/java/org/apache/tika/serialization/TikaJsonDeserializer.java b/tika-serialization/src/main/java/org/apache/tika/serialization/TikaJsonDeserializer.java deleted file mode 100644 index ac0cd5e42..000000000 --- a/tika-serialization/src/main/java/org/apache/tika/serialization/TikaJsonDeserializer.java +++ /dev/null @@ -1,412 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.tika.serialization; - -import java.lang.reflect.Array; -import java.lang.reflect.InvocationTargetException; -import java.lang.reflect.Method; -import java.lang.reflect.ParameterizedType; -import java.lang.reflect.Type; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.Optional; - -import com.fasterxml.jackson.databind.JsonNode; - -/** - * See the notes @link{TikaJsonSerializer}. - * <p> - * This currently requires a setString() option on objects that have enum parameters. - */ -public class TikaJsonDeserializer { - - public static Optional deserializeObject(JsonNode root) { - if (!root.isObject()) { - throw new IllegalArgumentException("root needs to be an object"); - } - if (!root.has(TikaJsonSerializer.INSTANTIATED_CLASS_KEY)) { - throw new IllegalArgumentException("need to specify: " + TikaJsonSerializer.INSTANTIATED_CLASS_KEY); - } - String className = root - .get(TikaJsonSerializer.INSTANTIATED_CLASS_KEY) - .asText(); - - try { - return Optional.of(deserialize(Class.forName(className), root)); - } catch (Exception e) { - throw new IllegalArgumentException(e); - } - } - - public static <T> T deserialize(Class<? extends T> clazz, JsonNode root) throws ReflectiveOperationException { - T obj = clazz - .getDeclaredConstructor() - .newInstance(); - Map<String, List<Method>> setters = getSetters(obj); - if (!root.isObject()) { - throw new IllegalArgumentException("must be object"); - } - for (Map.Entry<String, JsonNode> e : root.properties()) { - String name = e.getKey(); - JsonNode child = e.getValue(); - if (TikaJsonSerializer.INSTANTIATED_CLASS_KEY.equals(name)) { - continue; - } - setValue(name, child, obj, setters); - } - return obj; - } - - private static Map<String, List<Method>> getSetters(Object obj) { - Map<String, List<Method>> setters = new HashMap<>(); - for (Method m : obj - .getClass() - .getMethods()) { - String n = m.getName(); - if (n.startsWith(TikaJsonSerializer.SET) && n.length() > 3 && Character.isUpperCase(n.charAt(3))) { - if (m.getParameters().length == 1) { - String paramName = TikaJsonSerializer.getParam(TikaJsonSerializer.SET, n); - List<Method> methods = setters.get(paramName); - if (methods == null) { - methods = new ArrayList<>(); - setters.put(paramName, methods); - } - methods.add(m); - } - } - } - return setters; - } - - private static void setValue(String name, JsonNode node, Object obj, Map<String, List<Method>> setters) throws ReflectiveOperationException { - List<Method> mySetters = setters.get(name); - if (mySetters == null || mySetters.isEmpty()) { - throw new IllegalArgumentException("can't find any setter for " + name); - } - if (node.isNull()) { - setNull(name, node, obj, mySetters); - } else if (node.isNumber()) { - setNumericValue(name, node, obj, mySetters); - } else if (node.isTextual()) { - setStringValue(name, node.asText(), obj, mySetters); - } else if (node.isArray()) { - setArray(name, node, obj, mySetters); - } else if (node.isObject()) { - setObject(name, node, obj, mySetters); - } else if (node.isBoolean()) { - setBoolean(name, node, obj, mySetters); - } - } - - private static void setArray(String name, JsonNode node, Object obj, List<Method> mySetters) { - //there's much more to be done here. :( - for (Method setter : mySetters) { - try { - tryArray(name, node, obj, setter); - } catch (InvocationTargetException | IllegalAccessException e) { - throw new IllegalArgumentException("couldn't create array for " + name); - } - } - } - - private static void tryArray(String name, JsonNode node, Object obj, Method setter) throws InvocationTargetException, IllegalAccessException { - Class argClass = setter.getParameterTypes()[0]; - Class componentType = argClass.getComponentType(); - if (argClass.isArray()) { - int len = node.size(); - Object arrayObject = Array.newInstance(componentType, len); - for (int i = 0; i < len; i++) { - Array.set(arrayObject, i, getVal(componentType, node.get(i))); - } - setter.invoke(obj, arrayObject); - - } else if (List.class.isAssignableFrom(argClass)) { - Type listType = setter.getGenericParameterTypes()[0]; - Type elementType = null; - if (listType instanceof ParameterizedType) { - elementType = ((ParameterizedType) listType).getActualTypeArguments()[0]; - } - if (elementType == null) { - throw new IllegalArgumentException("Can't infer parameterized type for list in: " + node); - } - int len = node.size(); - List<Object> list = new ArrayList<>(); - for (int i = 0; i < len; i++) { - list.add(getVal(elementType, node.get(i))); - } - setter.invoke(obj, list); - } - } - - private static <T> T getVal(T clazz, JsonNode node) { - if (clazz.equals(String.class)) { - return (T) node.asText(); - } else if (clazz.equals(Integer.class) || clazz.equals(int.class)) { - return (T) Integer.valueOf(node.intValue()); - } else if (clazz.equals(Long.class) || clazz.equals(long.class)) { - return (T) Long.valueOf(node.longValue()); - } else if (clazz.equals(Float.class) || clazz.equals(float.class)) { - return (T) Float.valueOf(node.floatValue()); - } else if (clazz.equals(Double.class) || clazz.equals(double.class)) { - return (T) Double.valueOf(node.doubleValue()); - } else if (node.isObject()) { - if (node.has(TikaJsonSerializer.INSTANTIATED_CLASS_KEY)) { - Optional<T> optional = deserializeObject(node); - if (optional.isPresent()) { - return optional.get(); - } - } else { - throw new IllegalArgumentException("I see a json object, but I don't see " + - TikaJsonSerializer.INSTANTIATED_CLASS_KEY + ": " + node); - } - } - //add short, boolean - throw new IllegalArgumentException("I regret I don't yet support: " + clazz); - } - - private static void setObject(String name, JsonNode node, Object obj, List<Method> mySetters) { - if (!node.has(TikaJsonSerializer.INSTANTIATED_CLASS_KEY)) { - setMap(name, node, obj, mySetters); - return; - } - - Optional object = deserializeObject(node); - if (object.isEmpty()) { - //log, throw exception?! - return; - } - for (Method m : mySetters) { - Class argClass = m.getParameters()[0].getType(); - if (argClass.isAssignableFrom(object - .get() - .getClass())) { - try { - m.invoke(obj, object.get()); - return; - } catch (IllegalAccessException | InvocationTargetException e) { - //swallow - } - } - } - throw new IllegalArgumentException("can't set object on " + name); - } - - private static void setMap(String name, JsonNode node, Object obj, List<Method> setters) { - //TODO this should try to match the map setters with the data types - //for now, we're just doing <String,String> - Map<String, String> val = new HashMap<>(); - for (Map.Entry<String, JsonNode> e : node.properties()) { - val.put(e.getKey(), e - .getValue() - .textValue()); - } - for (Method m : setters) { - try { - m.invoke(obj, val); - return; - } catch (ReflectiveOperationException e) { - //swallow - } - } - throw new IllegalArgumentException("can't find map setter for: " + name); - } - - private static void setBoolean(String name, JsonNode node, Object obj, List<Method> setters) throws ReflectiveOperationException { - for (Method m : setters) { - Class argClass = m.getParameters()[0].getType(); - if (argClass.equals(Boolean.class) || argClass.equals(boolean.class)) { - m.invoke(obj, node.booleanValue()); - return; - } - } - //TODO -- maybe check for string? - throw new IllegalArgumentException("can't set boolean on " + name); - } - - private static void setNull(String name, JsonNode node, Object obj, List<Method> setters) { - for (Method m : setters) { - Class argClass = m.getParameters()[0].getType(); - if (!TikaJsonSerializer.PRIMITIVES.contains(argClass)) { - try { - - m.invoke(obj, argClass.cast(null)); - return; - } catch (Exception e) { - //swallow - } - } - } - throw new IllegalArgumentException("can't set null on " + name); - } - - private static void setStringValue(String name, String txt, Object obj, List<Method> setters) throws ReflectiveOperationException { - - //try for exact match first - for (Method m : setters) { - Class argClass = m.getParameters()[0].getType(); - if (argClass.equals(String.class)) { - m.invoke(obj, txt); - return; - } - } - Method intMethod = null; - Method longMethod = null; - Method doubleMethod = null; - Method floatMethod = null; - Method shortMethod = null; - Method boolMethod = null; - for (Method m : setters) { - Class argClass = m.getParameters()[0].getType(); - if (argClass.equals(Integer.class) || argClass.equals(int.class)) { - intMethod = m; - } else if (argClass.equals(Long.class) || argClass.equals(long.class)) { - longMethod = m; - } else if (argClass.equals(Float.class) || argClass.equals(float.class)) { - floatMethod = m; - } else if (argClass.equals(Double.class) || argClass.equals(double.class)) { - doubleMethod = m; - } else if (argClass.equals(Short.class) || argClass.equals(short.class)) { - shortMethod = m; - } else if (argClass.equals(Boolean.class) || argClass.equals(boolean.class)) { - boolMethod = m; - } - } - - if (shortMethod != null) { - try { - short val = Short.parseShort(txt); - shortMethod.invoke(obj, val); - return; - } catch (NumberFormatException e) { - //swallow - } - } else if (intMethod != null) { - try { - int val = Integer.parseInt(txt); - intMethod.invoke(obj, val); - return; - } catch (NumberFormatException e) { - //swallow - } - } else if (floatMethod != null) { - try { - float val = Float.parseFloat(txt); - floatMethod.invoke(obj, val); - return; - } catch (NumberFormatException e) { - //swallow - } - } else if (longMethod != null) { - try { - long val = Long.parseLong(txt); - longMethod.invoke(obj, val); - return; - } catch (NumberFormatException e) { - //swallow - } - } else if (doubleMethod != null) { - try { - double val = Double.parseDouble(txt); - doubleMethod.invoke(obj, val); - return; - } catch (NumberFormatException e) { - //swallow - } - } else if (boolMethod != null) { - if (txt.equalsIgnoreCase("true")) { - boolMethod.invoke(obj, true); - } else if (txt.equalsIgnoreCase("false")) { - boolMethod.invoke(obj, false); - } - } - throw new IllegalArgumentException("I regret I couldn't find a setter for: " + name); - - } - - private static void setNumericValue(String name, JsonNode node, Object obj, List<Method> setters) throws ReflectiveOperationException { - - //try numeric and equals first - for (Method m : setters) { - Class argClass = m.getParameters()[0].getType(); - if ((argClass.equals(int.class) || argClass.equals(Integer.class)) && node.isInt()) { - m.invoke(obj, node.intValue()); - return; - } else if ((argClass.equals(long.class) || argClass.equals(Long.class)) && node.isLong()) { - m.invoke(obj, node.asLong()); - return; - } else if ((argClass.equals(float.class) || argClass.equals(Float.class)) && node.isFloat()) { - m.invoke(obj, node.floatValue()); - return; - } else if ((argClass.equals(double.class) || argClass.equals(Double.class)) && node.isDouble()) { - m.invoke(obj, node.doubleValue()); - return; - } else if ((argClass.equals(short.class) || argClass.equals(Short.class)) && node.isShort()) { - m.invoke(obj, node.shortValue()); - return; - } - } - //try for higher precision setters - for (Method m : setters) { - Class argClass = m.getParameters()[0].getType(); - if ((argClass.equals(long.class) || argClass.equals(Long.class)) && node.isInt()) { - m.invoke(obj, node.asLong()); - return; - } else if ((argClass.equals(double.class) || argClass.equals(Double.class)) && node.isFloat()) { - m.invoke(obj, node.floatValue()); - return; - } - } - //try for lower precision setters - //we have to do this for node=double, type=float; should we do this for long->integer?! - for (Method m : setters) { - Class argClass = m.getParameters()[0].getType(); - if ((argClass.equals(int.class) || argClass.equals(Integer.class)) && node.isLong()) { - long val = node.longValue(); - if (val >= Integer.MAX_VALUE || val <= Integer.MIN_VALUE) { - //don't do this - } else { - m.invoke(obj, node.intValue()); - } - return; - } else if ((argClass.equals(float.class) || argClass.equals(Float.class)) && node.isDouble()) { - //TODO -- check for over/underflow - m.invoke(obj, node.floatValue()); - return; - } else if ((argClass.equals(short.class) || argClass.equals(Short.class)) && node.isInt()) { - int val = node.intValue(); - if (val > Short.MAX_VALUE || val < Short.MIN_VALUE) { - //don't do this - } else { - m.invoke(obj, node.shortValue()); - return; - } - } - } - //finally try for String - for (Method m : setters) { - Class argClass = m.getParameters()[0].getType(); - if (argClass.equals(String.class)) { - m.invoke(obj, node.asText()); - return; - } - } - throw new IllegalArgumentException("Couldn't find numeric setter for: " + name); - - } -} diff --git a/tika-serialization/src/main/java/org/apache/tika/serialization/TikaJsonSerializer.java b/tika-serialization/src/main/java/org/apache/tika/serialization/TikaJsonSerializer.java deleted file mode 100644 index 77a2400e5..000000000 --- a/tika-serialization/src/main/java/org/apache/tika/serialization/TikaJsonSerializer.java +++ /dev/null @@ -1,271 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.tika.serialization; - -import java.io.IOException; -import java.lang.reflect.Constructor; -import java.lang.reflect.InvocationTargetException; -import java.lang.reflect.Method; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.List; -import java.util.Locale; -import java.util.Map; -import java.util.Set; - -import com.fasterxml.jackson.core.JsonGenerator; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - - -/** - * This is a basic serializer that requires that an object: - * a) have a no-arg constructor - * b) have both setters and getters for the same parameters with the same names, e.g. setXYZ and getXYZ - * c) setters and getters have to follow the pattern setX where x is a capital letter - * d) have maps as parameters where the keys are strings (and the values are strings for now) - * e) at deserialization time, objects that have setters for enums also have to have a setter for a string value of that enum - */ -public class TikaJsonSerializer { - - public static String INSTANTIATED_CLASS_KEY = "_class"; - static Set<Class> PRIMITIVES = Set.of(int.class, double.class, float.class, long.class, short.class, boolean.class, String.class, byte.class, char.class); - static Set<Class> BOXED = Set.of(Integer.class, Double.class, Float.class, Long.class, Short.class, Boolean.class, Byte.class, Character.class); - static String SET = "set"; - private static Logger LOG = LoggerFactory.getLogger(TikaJsonSerializer.class); - private static String GET = "get"; - private static String IS = "is"; - - public static void serialize(Object obj, JsonGenerator jsonGenerator) throws TikaSerializationException, IOException { - serialize(null, obj, jsonGenerator); - } - - public static void serialize(String fieldName, Object obj, JsonGenerator jsonGenerator) throws TikaSerializationException, IOException { - if (obj == null) { - if (fieldName == null) { - jsonGenerator.writeNull(); - } else { - jsonGenerator.writeNullField(fieldName); - } - } else if (PRIMITIVES.contains(obj.getClass()) || BOXED.contains(obj.getClass())) { - try { - serializePrimitiveAndBoxed(fieldName, obj, jsonGenerator); - } catch (IOException e) { - throw new TikaSerializationException("problem serializing", e); - } - } else if (isCollection(obj)) { - serializeCollection(fieldName, obj, jsonGenerator); - } else if (obj - .getClass() - .isEnum()) { - jsonGenerator.writeStringField(fieldName, ((Enum) obj).name()); - } else { - serializeObject(fieldName, obj, jsonGenerator); - } - } - - /** - * limited to array, list and map - * - * @param obj - * @return - */ - private static boolean isCollection(Object obj) { - Class clazz = obj.getClass(); - return clazz.isArray() || List.class.isAssignableFrom(clazz) || Map.class.isAssignableFrom(clazz); - } - - - /** - * @param fieldName can be null -- used only for logging and debugging - * @param obj - * @param jsonGenerator - * @throws TikaSerializationException - */ - public static void serializeObject(String fieldName, Object obj, JsonGenerator jsonGenerator) throws TikaSerializationException { - - try { - Constructor constructor = obj - .getClass() - .getConstructor(); - } catch (NoSuchMethodException e) { - throw new IllegalArgumentException("class (" + obj.getClass() + ") doesn't have a no-arg constructor. Respectfully not serializing."); - } - try { - if (fieldName != null) { - jsonGenerator.writeFieldName(fieldName); - } - jsonGenerator.writeStartObject(); - jsonGenerator.writeStringField(INSTANTIATED_CLASS_KEY, obj - .getClass() - .getName()); - Map<String, Method> matches = getGetters(obj - .getClass() - .getMethods()); - //iterate through the getters - for (Map.Entry<String, Method> e : matches.entrySet()) { - try { - Object methodVal = e - .getValue() - .invoke(obj); - serialize(e.getKey(), methodVal, jsonGenerator); - } catch (IllegalAccessException | InvocationTargetException ex) { - throw new TikaSerializationException("couldn't write paramName=" + e.getKey(), ex); - } - } - - jsonGenerator.writeEndObject(); - } catch (IOException e) { - throw new TikaSerializationException("problem", e); - } - } - - private static Map<String, Method> getGetters(Method[] methods) { - Map<String, List<Method>> getters = new HashMap<>(); - Map<String, List<Method>> setters = new HashMap<>(); - - for (Method m : methods) { - String name = m.getName(); - if (name.startsWith("get") && name.length() > 3 && Character.isUpperCase(name.charAt(3))) { - String param = getParam(GET, name); - add(param, m, getters); - } else if (name.startsWith("is") && name.length() > 2 && Character.isUpperCase(name.charAt(2))) { - String param = getParam(IS, name); - add(param, m, getters); - } else if (name.startsWith("set") && name.length() > 3 && Character.isUpperCase(name.charAt(3))) { - //take only single param setters - if (m.getParameters().length == 1) { - String param = getParam(SET, name); - add(param, m, setters); - } - } - } - //this strictly looks for classA.equals(classB) - //this does not look for instance of, nor does it look for boxed vs. primitives - //Also, TODO -- this should favor getters and setters with Strings over those - //with complex types - Map<String, Method> ret = new HashMap<>(); - for (Map.Entry<String, List<Method>> e : getters.entrySet()) { - String paramName = e.getKey(); - //figure out how to skip Class level setters/getters - if ("class".equals(paramName)) { - continue; - } - List<Method> setterList = setters.get(paramName); - if (setterList == null || setterList.size() == 0) { - LOG.debug("Couldn't find setter for getter: " + paramName); - continue; - } - for (Method getter : e.getValue()) { - for (Method setter : setterList) { - Class setClass = setter.getParameters()[0].getType(); - if (getter - .getReturnType() - .equals(setClass)) { - ret.put(paramName, getter); - } - } - } - } - return ret; - } - - private static void serializeCollection(String fieldName, Object obj, JsonGenerator jsonGenerator) throws IOException, TikaSerializationException { - if (fieldName != null) { - jsonGenerator.writeFieldName(fieldName); - } - Class clazz = obj.getClass(); - if (clazz.isArray()) { - jsonGenerator.writeStartArray(); - for (Object item : (Object[]) obj) { - serialize(item, jsonGenerator); - } - jsonGenerator.writeEndArray(); - } else if (List.class.isAssignableFrom(clazz)) { - //should we get the generic type of the list via reflection - //so that we can set the superclass field in the item? - jsonGenerator.writeStartArray(); - for (Object item : (List) obj) { - serialize(item, jsonGenerator); - } - jsonGenerator.writeEndArray(); - } else if (Map.class.isAssignableFrom(clazz)) { - jsonGenerator.writeStartObject(); - for (Map.Entry<String, Object> e : ((Map<String, Object>) obj).entrySet()) { - serialize(e.getKey(), e.getValue(), jsonGenerator); - } - jsonGenerator.writeEndObject(); - } else { - throw new UnsupportedOperationException("Should have been a collection?! " + clazz); - } - } - - private static void serializePrimitiveAndBoxed(String paramName, Object obj, JsonGenerator jsonGenerator) throws IOException { - Class clazz = obj.getClass(); - if (paramName != null) { - jsonGenerator.writeFieldName(paramName); - } - if (clazz.equals(String.class)) { - jsonGenerator.writeString((String) obj); - } else if (clazz.equals(Integer.class)) { - jsonGenerator.writeNumber((Integer) obj); - } else if (clazz.equals(Short.class)) { - jsonGenerator.writeNumber((Short) obj); - } else if (clazz.equals(Long.class)) { - jsonGenerator.writeNumber((Long) obj); - } else if (clazz.equals(Float.class)) { - jsonGenerator.writeNumber((Float) obj); - } else if (clazz.equals(Double.class)) { - jsonGenerator.writeNumber((Double) obj); - } else if (clazz.equals(Boolean.class)) { - jsonGenerator.writeBoolean((Boolean) obj); - } else if (clazz.equals(short.class)) { - jsonGenerator.writeNumber((short) obj); - } else if (clazz.equals(int.class)) { - jsonGenerator.writeNumber((int) obj); - } else if (clazz.equals(long.class)) { - jsonGenerator.writeNumber((long) obj); - } else if (clazz.equals(float.class)) { - jsonGenerator.writeNumber((float) obj); - } else if (clazz.equals(double.class)) { - jsonGenerator.writeNumber((double) obj); - } else if (clazz.equals(boolean.class)) { - jsonGenerator.writeBoolean((boolean) obj); - } else { - throw new UnsupportedOperationException("I regret that I don't yet support " + clazz); - } - - } - - private static void add(String param, Method method, Map<String, List<Method>> map) { - List<Method> methods = map.get(param); - if (methods == null) { - methods = new ArrayList<>(); - map.put(param, methods); - } - methods.add(method); - } - - static String getParam(String prefix, String name) { - String ret = name.substring(prefix.length()); - ret = ret - .substring(0, 1) - .toLowerCase(Locale.ROOT) + ret.substring(1); - return ret; - } - -} diff --git a/tika-serialization/src/test/java/org/apache/tika/serialization/TikaJsonSerializationTest.java b/tika-serialization/src/test/java/org/apache/tika/serialization/TikaJsonSerializationTest.java deleted file mode 100644 index 8aefbc428..000000000 --- a/tika-serialization/src/test/java/org/apache/tika/serialization/TikaJsonSerializationTest.java +++ /dev/null @@ -1,49 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.tika.serialization; - -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertTrue; - -import java.io.StringReader; -import java.io.StringWriter; -import java.util.Optional; - -import com.fasterxml.jackson.core.JsonGenerator; -import com.fasterxml.jackson.databind.JsonNode; -import com.fasterxml.jackson.databind.ObjectMapper; -import org.junit.jupiter.api.Test; - -import org.apache.tika.serialization.mocks.ClassC; - -public class TikaJsonSerializationTest { - - @Test - public void testBasic() throws Exception { - StringWriter sw = new StringWriter(); - ClassC classA = new ClassC(); - try (JsonGenerator jsonGenerator = new ObjectMapper().createGenerator(sw)) { - TikaJsonSerializer.serialize(classA, jsonGenerator); - } - JsonNode root = new ObjectMapper().readTree(new StringReader(sw.toString())); - Optional opt = TikaJsonDeserializer.deserializeObject(root); - assertTrue(opt.isPresent()); - assertEquals(classA, opt.get()); - - } - -} diff --git a/tika-serialization/src/test/java/org/apache/tika/serialization/mocks/ClassA.java b/tika-serialization/src/test/java/org/apache/tika/serialization/mocks/ClassA.java deleted file mode 100644 index 5b17d7342..000000000 --- a/tika-serialization/src/test/java/org/apache/tika/serialization/mocks/ClassA.java +++ /dev/null @@ -1,156 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.tika.serialization.mocks; - -import java.util.Objects; - -public class ClassA { - private int a = 10; - private float b = 11.1f; - private short c = 2; - private long d = 13l; - private boolean e = false; - private Integer f = 14; - private Integer g = null; - private Long h = 15l; - private Long i = null; - private Boolean j = Boolean.TRUE; - private Boolean k = null; - - public int getA() { - return a; - } - - public void setA(int a) { - this.a = a; - } - - public float getB() { - return b; - } - - public void setB(float b) { - this.b = b; - } - - public short getC() { - return c; - } - - public void setC(short c) { - this.c = c; - } - - public long getD() { - return d; - } - - public void setD(long d) { - this.d = d; - } - - public boolean isE() { - return e; - } - - public void setE(boolean e) { - this.e = e; - } - - public Integer getF() { - return f; - } - - public void setF(Integer f) { - this.f = f; - } - - public Integer getG() { - return g; - } - - public void setG(Integer g) { - this.g = g; - } - - public Long getH() { - return h; - } - - public void setH(Long h) { - this.h = h; - } - - public Long getI() { - return i; - } - - public void setI(Long i) { - this.i = i; - } - - public Boolean getJ() { - return j; - } - - public void setJ(Boolean j) { - this.j = j; - } - - public Boolean getK() { - return k; - } - - public void setK(Boolean k) { - this.k = k; - } - - @Override - public String toString() { - return "ClassA{" + "a=" + a + ", b=" + b + ", c=" + c + ", d=" + d + ", e=" + e + ", f=" + f + ", g=" + g + ", h=" + h + ", i=" + i + ", j=" + j + ", k=" + k + '}'; - } - - @Override - public boolean equals(Object o) { - if (this == o) { - return true; - } - if (o == null || getClass() != o.getClass()) { - return false; - } - - ClassA classA = (ClassA) o; - return a == classA.a && Float.compare(b, classA.b) == 0 && c == classA.c && d == classA.d && e == classA.e && Objects.equals(f, classA.f) && Objects.equals(g, classA.g) && - Objects.equals(h, classA.h) && Objects.equals(i, classA.i) && Objects.equals(j, classA.j) && Objects.equals(k, classA.k); - } - - @Override - public int hashCode() { - int result = a; - result = 31 * result + Float.hashCode(b); - result = 31 * result + c; - result = 31 * result + Long.hashCode(d); - result = 31 * result + Boolean.hashCode(e); - result = 31 * result + Objects.hashCode(f); - result = 31 * result + Objects.hashCode(g); - result = 31 * result + Objects.hashCode(h); - result = 31 * result + Objects.hashCode(i); - result = 31 * result + Objects.hashCode(j); - result = 31 * result + Objects.hashCode(k); - return result; - } -} diff --git a/tika-serialization/src/test/java/org/apache/tika/serialization/mocks/ClassB.java b/tika-serialization/src/test/java/org/apache/tika/serialization/mocks/ClassB.java deleted file mode 100644 index e5b8d1d17..000000000 --- a/tika-serialization/src/test/java/org/apache/tika/serialization/mocks/ClassB.java +++ /dev/null @@ -1,96 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.tika.serialization.mocks; - -import java.util.ArrayList; -import java.util.Arrays; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.Objects; - -public class ClassB extends ClassA { - private String s = "hello world"; - private Map<String, String> counts = new HashMap<>(); - private Integer[] ints = new Integer[]{1, 2, 3, 4}; - private List<Float> floats = new ArrayList<>(); - - public ClassB() { - floats.add(2.3f); - floats.add(3.4f); - counts.put("k1", "v1"); - counts.put("k2", "v2"); - } - - public String getS() { - return s; - } - - public void setS(String s) { - this.s = s; - } - - public Map<String, String> getCounts() { - return counts; - } - - public void setCounts(Map<String, String> counts) { - this.counts = counts; - } - - public Integer[] getInts() { - return ints; - } - - public void setInts(Integer[] ints) { - this.ints = ints; - } - - public List<Float> getFloats() { - return floats; - } - - public void setFloats(List<Float> floats) { - this.floats = floats; - } - - @Override - public boolean equals(Object o) { - if (this == o) { - return true; - } - if (o == null || getClass() != o.getClass()) { - return false; - } - if (!super.equals(o)) { - return false; - } - - ClassB classB = (ClassB) o; - return Objects.equals(s, classB.s) && Objects.equals(counts, classB.counts) && Arrays.equals(ints, classB.ints) && Objects.equals(floats, classB.floats); - } - - @Override - public int hashCode() { - int result = super.hashCode(); - result = 31 * result + Objects.hashCode(s); - result = 31 * result + Objects.hashCode(counts); - result = 31 * result + Arrays.hashCode(ints); - result = 31 * result + Objects.hashCode(floats); - return result; - } -} diff --git a/tika-serialization/src/test/java/org/apache/tika/serialization/mocks/ClassC.java b/tika-serialization/src/test/java/org/apache/tika/serialization/mocks/ClassC.java deleted file mode 100644 index 7da5752c7..000000000 --- a/tika-serialization/src/test/java/org/apache/tika/serialization/mocks/ClassC.java +++ /dev/null @@ -1,50 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.tika.serialization.mocks; - -import java.util.Objects; - -public class ClassC { - - ClassB classB = new ClassB(); - - public ClassB getClassB() { - return classB; - } - - public void setClassB(ClassB classB) { - this.classB = classB; - } - - @Override - public boolean equals(Object o) { - if (this == o) { - return true; - } - if (o == null || getClass() != o.getClass()) { - return false; - } - - ClassC classC = (ClassC) o; - return Objects.equals(classB, classC.classB); - } - - @Override - public int hashCode() { - return Objects.hashCode(classB); - } -}
