Added support for type for runtime parameters Project: http://git-wip-us.apache.org/repos/asf/tika/repo Commit: http://git-wip-us.apache.org/repos/asf/tika/commit/01869923 Tree: http://git-wip-us.apache.org/repos/asf/tika/tree/01869923 Diff: http://git-wip-us.apache.org/repos/asf/tika/diff/01869923
Branch: refs/heads/TIKA-1508 Commit: 01869923533b330ec7728995e3ee5feceee1b90e Parents: b64612d Author: Thamme Gowda <tgow...@gmail.com> Authored: Wed May 25 17:18:25 2016 -0700 Committer: Thamme Gowda <tgow...@gmail.com> Committed: Wed May 25 17:18:25 2016 -0700 ---------------------------------------------------------------------- .../java/org/apache/tika/base/Configurable.java | 3 +- .../main/java/org/apache/tika/config/Param.java | 191 +++++++++++++++++++ .../java/org/apache/tika/config/TikaConfig.java | 12 +- .../org/apache/tika/parser/AbstractParser.java | 3 +- .../org/apache/tika/parser/ParseContext.java | 7 +- .../java/org/apache/tika/config/ParamTest.java | 71 +++++++ .../tika/parser/DummyConfigurableParser.java | 5 +- .../tika/config/TIKA-1508-configurable.xml | 2 +- 8 files changed, 283 insertions(+), 11 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/tika/blob/01869923/tika-core/src/main/java/org/apache/tika/base/Configurable.java ---------------------------------------------------------------------- diff --git a/tika-core/src/main/java/org/apache/tika/base/Configurable.java b/tika-core/src/main/java/org/apache/tika/base/Configurable.java index 4e6418d..f1eb91a 100644 --- a/tika-core/src/main/java/org/apache/tika/base/Configurable.java +++ b/tika-core/src/main/java/org/apache/tika/base/Configurable.java @@ -16,6 +16,7 @@ */ package org.apache.tika.base; +import org.apache.tika.config.Param; import org.apache.tika.exception.TikaConfigException; import org.apache.tika.parser.ParseContext; @@ -39,5 +40,5 @@ public interface Configurable { * Gets parameters of this configurable instance * @return parameters in the form of a map of key value pairs */ - Map<String, String> getParams(); + Map<String, Param<?>> getParams(); } http://git-wip-us.apache.org/repos/asf/tika/blob/01869923/tika-core/src/main/java/org/apache/tika/config/Param.java ---------------------------------------------------------------------- diff --git a/tika-core/src/main/java/org/apache/tika/config/Param.java b/tika-core/src/main/java/org/apache/tika/config/Param.java new file mode 100644 index 0000000..b54f6be --- /dev/null +++ b/tika-core/src/main/java/org/apache/tika/config/Param.java @@ -0,0 +1,191 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.tika.config; + +import org.w3c.dom.Node; + +import javax.xml.bind.JAXBContext; +import javax.xml.bind.JAXBException; +import javax.xml.bind.Marshaller; +import javax.xml.bind.Unmarshaller; +import javax.xml.bind.annotation.*; +import javax.xml.bind.helpers.DefaultValidationEventHandler; +import java.io.File; +import java.io.InputStream; +import java.io.OutputStream; +import java.io.Serializable; +import java.lang.reflect.Constructor; +import java.lang.reflect.InvocationTargetException; +import java.math.BigInteger; +import java.net.URI; +import java.net.URL; +import java.util.HashMap; +import java.util.Map; + + +/** + * This is a JAXB serializable model class for parameters from configuration file. + * + * @param <T> value type. Should be serializable to string and have a constructor with string param + * @since Apache Tika 1.14 + */ +@XmlRootElement() +@XmlAccessorType(XmlAccessType.NONE) +public class Param<T> implements Serializable { + + private static final JAXBContext JAXB_CTX; + private static final Marshaller MARSHALLER; + private static final Unmarshaller UNMARSHALLER; + private static final Map<Class<?>, String> map = new HashMap<>(); + private static final Map<String, Class<?>> reverseMap = new HashMap<>(); + + static { + map.put(Boolean.class, "bool"); + map.put(String.class, "string"); + map.put(Byte.class, "byte"); + map.put(Short.class, "short"); + map.put(Integer.class, "int"); + map.put(Long.class, "long"); + map.put(BigInteger.class, "bigint"); + map.put(Float.class, "float"); + map.put(Double.class, "double"); + map.put(File.class, "file"); + map.put(URI.class, "uri"); + map.put(URL.class, "url"); + for (Map.Entry<Class<?>, String> entry : map.entrySet()) { + reverseMap.put(entry.getValue(), entry.getKey()); + } + try { + JAXB_CTX = JAXBContext.newInstance(Param.class); + MARSHALLER = JAXB_CTX.createMarshaller(); + MARSHALLER.setEventHandler(new DefaultValidationEventHandler()); + UNMARSHALLER = JAXB_CTX.createUnmarshaller(); + UNMARSHALLER.setEventHandler(new DefaultValidationEventHandler()); + } catch (JAXBException e) { + throw new RuntimeException(e); + } + } + + @XmlTransient + private Class<T> type; + + @XmlAttribute(name = "name") + private String name; + + @XmlValue() + private String value; + + @XmlTransient + private T actualValue; + + public Param(){ + } + + public Param(String name, Class<T> type, T value){ + this.name = name; + this.type = type; + this.value = value.toString(); + } + + public Param(String name, T value){ + this(name, (Class<T>) value.getClass(), value); + } + + public String getName() { + return name; + } + + public void setName(String name) { + this.name = name; + } + + @XmlTransient + public Class<T> getType() { + return type; + } + + public void setType(Class<T> type) { + this.type = type; + } + + @XmlAttribute(name = "type") + public String getTypeString(){ + if (type == null) { + return null; + } + if (map.containsKey(type)){ + return map.get(type); + } + return type.getName(); + } + + public void setTypeString(String type){ + if (type == null || type.isEmpty()){ + return; + } + if (reverseMap.containsKey(type)){ + this.type = (Class<T>) reverseMap.get(type); + } else try { + this.type = (Class<T>) Class.forName(type); + } catch (ClassNotFoundException e) { + throw new RuntimeException(e); + } + this.actualValue = null; + } + + @XmlTransient + public T getValue(){ + if (actualValue == null) { + try { + Constructor<T> constructor = type.getConstructor(String.class); + constructor.setAccessible(true); + this.actualValue = constructor.newInstance(value); + } catch (NoSuchMethodException e) { + throw new RuntimeException(type + " doesnt have a constructor that takes String arg", e); + } catch (IllegalAccessException | InstantiationException | InvocationTargetException e) { + throw new RuntimeException(e); + } + } + return actualValue; + } + + @Override + public String toString() { + return "Param{" + + "name='" + name + '\'' + + ", value='" + value + '\'' + + ", actualValue=" + actualValue + + '}'; + } + + public void save(OutputStream stream) throws JAXBException { + MARSHALLER.marshal(this, stream); + } + + public void save(Node node) throws JAXBException { + MARSHALLER.marshal(this, node); + } + + public static <T> Param<T> load(InputStream stream) throws JAXBException { + return (Param<T>) UNMARSHALLER.unmarshal(stream); + } + + public static <T> Param<T> load(Node node) throws JAXBException { + return (Param<T>) UNMARSHALLER.unmarshal(node); + } + +} http://git-wip-us.apache.org/repos/asf/tika/blob/01869923/tika-core/src/main/java/org/apache/tika/config/TikaConfig.java ---------------------------------------------------------------------- diff --git a/tika-core/src/main/java/org/apache/tika/config/TikaConfig.java b/tika-core/src/main/java/org/apache/tika/config/TikaConfig.java index 896b51b..17b735e 100644 --- a/tika-core/src/main/java/org/apache/tika/config/TikaConfig.java +++ b/tika-core/src/main/java/org/apache/tika/config/TikaConfig.java @@ -17,6 +17,7 @@ package org.apache.tika.config; import javax.imageio.spi.ServiceRegistry; +import javax.xml.bind.JAXBException; import javax.xml.parsers.DocumentBuilder; import java.io.File; import java.io.IOException; @@ -598,8 +599,8 @@ public class TikaConfig { * @param el xml node which has {@link #PARAMS_TAG_NAME} child * @return Map of key values read from xml */ - Map<String, String> getParams(Element el){ - Map<String, String> params = new HashMap<>(); + Map<String, Param<?>> getParams(Element el){ + Map<String, Param<?>> params = new HashMap<>(); for (Node child = el.getFirstChild(); child != null; child = child.getNextSibling()){ if (PARAMS_TAG_NAME.equals(child.getNodeName())){ //found the node @@ -608,7 +609,12 @@ public class TikaConfig { for (int i = 0; i < childNodes.getLength(); i++) { Node item = childNodes.item(i); if (item.getNodeType() == Node.ELEMENT_NODE){ - params.put(item.getNodeName().trim(), item.getTextContent().trim()); + try { + Param<?> param = Param.load(item); + params.put(param.getName(), param); + } catch (JAXBException e) { + throw new RuntimeException(e); + } } } } http://git-wip-us.apache.org/repos/asf/tika/blob/01869923/tika-core/src/main/java/org/apache/tika/parser/AbstractParser.java ---------------------------------------------------------------------- diff --git a/tika-core/src/main/java/org/apache/tika/parser/AbstractParser.java b/tika-core/src/main/java/org/apache/tika/parser/AbstractParser.java index 00fac7b..5c045db 100644 --- a/tika-core/src/main/java/org/apache/tika/parser/AbstractParser.java +++ b/tika-core/src/main/java/org/apache/tika/parser/AbstractParser.java @@ -21,6 +21,7 @@ import java.io.InputStream; import java.util.Map; import java.util.Properties; +import org.apache.tika.config.Param; import org.apache.tika.exception.TikaConfigException; import org.apache.tika.exception.TikaException; import org.apache.tika.metadata.Metadata; @@ -80,7 +81,7 @@ public abstract class AbstractParser implements ConfigurableParser { * @since Apache Tika 1.14 */ @Override - public Map<String, String> getParams() { + public Map<String, Param<?>> getParams() { return this.context.getParams(); } } http://git-wip-us.apache.org/repos/asf/tika/blob/01869923/tika-core/src/main/java/org/apache/tika/parser/ParseContext.java ---------------------------------------------------------------------- diff --git a/tika-core/src/main/java/org/apache/tika/parser/ParseContext.java b/tika-core/src/main/java/org/apache/tika/parser/ParseContext.java index e58f5c8..c47bbec 100644 --- a/tika-core/src/main/java/org/apache/tika/parser/ParseContext.java +++ b/tika-core/src/main/java/org/apache/tika/parser/ParseContext.java @@ -32,6 +32,7 @@ import java.lang.reflect.Method; import java.util.HashMap; import java.util.Map; +import org.apache.tika.config.Param; import org.apache.tika.exception.TikaException; import org.xml.sax.EntityResolver; import org.xml.sax.InputSource; @@ -57,7 +58,7 @@ public class ParseContext implements Serializable { /** * Map of configurable arguments */ - private final Map<String, String> params = new HashMap<>(); + private final Map<String, Param<?>> params = new HashMap<>(); private static final EntityResolver IGNORING_SAX_ENTITY_RESOLVER = new EntityResolver() { public InputSource resolveEntity(String publicId, String systemId) throws SAXException, IOException { @@ -205,7 +206,7 @@ public class ParseContext implements Serializable { * @param key parameter name * @param value value */ - public void setParam(String key, String value){ + public void setParam(String key, Param<?> value){ this.params.put(key, value); } @@ -221,7 +222,7 @@ public class ParseContext implements Serializable { * Gets all the params * @return map of key values */ - public Map<String, String> getParams() { + public Map<String, Param<?>> getParams() { return params; } http://git-wip-us.apache.org/repos/asf/tika/blob/01869923/tika-core/src/test/java/org/apache/tika/config/ParamTest.java ---------------------------------------------------------------------- diff --git a/tika-core/src/test/java/org/apache/tika/config/ParamTest.java b/tika-core/src/test/java/org/apache/tika/config/ParamTest.java new file mode 100644 index 0000000..7c9007e --- /dev/null +++ b/tika-core/src/test/java/org/apache/tika/config/ParamTest.java @@ -0,0 +1,71 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.tika.config; + +import org.junit.Test; + +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.File; +import java.math.BigInteger; +import java.net.URI; +import java.net.URL; +import java.util.HashMap; +import java.util.HashSet; + +import static org.junit.Assert.*; + +public class ParamTest { + + @Test + public void testSaveAndLoad() throws Exception { + + Object objects [] = { + Integer.MAX_VALUE, + 2.5f, + 4000.57576, + true, + false, + Long.MAX_VALUE, + "Hello this is a boring string", + new URL("http://apache.org"), + new URI("tika://org.apache.tika.ner.parser?impl=xyz"), + new BigInteger(Long.MAX_VALUE + "").add(new BigInteger(Long.MAX_VALUE + "")), + new File("."), + }; + + for (Object object : objects) { + String name = "name" + System.currentTimeMillis(); + Param<?> param = new Param<>(name, object); + ByteArrayOutputStream stream = new ByteArrayOutputStream(); + param.save(stream); + ByteArrayInputStream inStream = new ByteArrayInputStream(stream.toByteArray()); + stream.close(); + inStream.close(); + Param<?> loaded = Param.load(inStream); + assertEquals(param.getName(), loaded.getName()); + assertEquals(param.getTypeString(), loaded.getTypeString()); + assertEquals(param.getType(), loaded.getType()); + assertEquals(param.getValue(), loaded.getValue()); + + assertEquals(loaded.getValue(), object); + assertEquals(loaded.getName(), name); + assertEquals(loaded.getType(), object.getClass()); + } + } + +} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/tika/blob/01869923/tika-core/src/test/java/org/apache/tika/parser/DummyConfigurableParser.java ---------------------------------------------------------------------- diff --git a/tika-core/src/test/java/org/apache/tika/parser/DummyConfigurableParser.java b/tika-core/src/test/java/org/apache/tika/parser/DummyConfigurableParser.java index b8775e0..5a874ac 100644 --- a/tika-core/src/test/java/org/apache/tika/parser/DummyConfigurableParser.java +++ b/tika-core/src/test/java/org/apache/tika/parser/DummyConfigurableParser.java @@ -16,6 +16,7 @@ */ package org.apache.tika.parser; +import org.apache.tika.config.Param; import org.apache.tika.exception.TikaException; import org.apache.tika.metadata.Metadata; import org.apache.tika.mime.MediaType; @@ -56,8 +57,8 @@ public class DummyConfigurableParser extends AbstractParser { public void parse(InputStream stream, ContentHandler handler, Metadata metadata, ParseContext context) throws IOException, SAXException, TikaException { - for (Map.Entry<String, String> entry : getParams().entrySet()) { - metadata.add(entry.getKey(), entry.getValue()); + for (Map.Entry<String, Param<?>> entry : getParams().entrySet()) { + metadata.add(entry.getKey(), entry.getValue().getValue().toString()); } } http://git-wip-us.apache.org/repos/asf/tika/blob/01869923/tika-core/src/test/resources/org/apache/tika/config/TIKA-1508-configurable.xml ---------------------------------------------------------------------- diff --git a/tika-core/src/test/resources/org/apache/tika/config/TIKA-1508-configurable.xml b/tika-core/src/test/resources/org/apache/tika/config/TIKA-1508-configurable.xml index 999cb45..37c71c9 100644 --- a/tika-core/src/test/resources/org/apache/tika/config/TIKA-1508-configurable.xml +++ b/tika-core/src/test/resources/org/apache/tika/config/TIKA-1508-configurable.xml @@ -19,7 +19,7 @@ <parsers> <parser class="org.apache.tika.parser.DummyConfigurableParser"> <params> - <testparam>testparamval</testparam> + <param name="testparam" type="string">testparamval</param> </params> </parser>