TIKA-1986 -- add Initializable, strip out handling of params passed via 
ParseContext in PDFParser


Project: http://git-wip-us.apache.org/repos/asf/tika/repo
Commit: http://git-wip-us.apache.org/repos/asf/tika/commit/01320372
Tree: http://git-wip-us.apache.org/repos/asf/tika/tree/01320372
Diff: http://git-wip-us.apache.org/repos/asf/tika/diff/01320372

Branch: refs/heads/master
Commit: 01320372fdbfc5e4ff0cfe0fe85fab91b5b369e7
Parents: d9dcd59
Author: tballison <[email protected]>
Authored: Wed Jun 15 13:32:58 2016 -0400
Committer: tballison <[email protected]>
Committed: Wed Jun 15 13:32:58 2016 -0400

----------------------------------------------------------------------
 .../org/apache/tika/config/Initializable.java   | 31 ++++++++++
 .../java/org/apache/tika/config/TikaConfig.java |  3 +
 .../tika/parser/DummyInitializableParser.java   | 64 ++++++++++++++++++++
 .../tika/parser/DummyParameterizedParser.java   |  1 +
 .../tika/parser/InitializableParserTest.java    | 45 ++++++++++++++
 .../tika/parser/ParameterizedParserTest.java    |  2 +-
 .../tika/config/TIKA-1986-initializable.xml     | 28 +++++++++
 .../tika/config/TIKA-1986-some-parameters.xml   |  2 +-
 .../org/apache/tika/parser/pdf/PDFParser.java   | 10 +--
 .../apache/tika/parser/pdf/PDFParserTest.java   |  2 +
 10 files changed, 178 insertions(+), 10 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/tika/blob/01320372/tika-core/src/main/java/org/apache/tika/config/Initializable.java
----------------------------------------------------------------------
diff --git a/tika-core/src/main/java/org/apache/tika/config/Initializable.java 
b/tika-core/src/main/java/org/apache/tika/config/Initializable.java
new file mode 100644
index 0000000..bc7769c
--- /dev/null
+++ b/tika-core/src/main/java/org/apache/tika/config/Initializable.java
@@ -0,0 +1,31 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.config;
+
+import org.apache.tika.exception.TikaConfigException;
+
+/**
+ * Components that must do special processing across multiple fields
+ * at initialization time should implement this interface.
+ * <p>
+ * TikaConfig will call initialize on Initializable classes after
+ * setting the parameters.
+ */
+public interface Initializable {
+
+    void initialize() throws TikaConfigException;
+}

http://git-wip-us.apache.org/repos/asf/tika/blob/01320372/tika-core/src/main/java/org/apache/tika/config/TikaConfig.java
----------------------------------------------------------------------
diff --git a/tika-core/src/main/java/org/apache/tika/config/TikaConfig.java 
b/tika-core/src/main/java/org/apache/tika/config/TikaConfig.java
index 49c5e26..fbafe7e 100644
--- a/tika-core/src/main/java/org/apache/tika/config/TikaConfig.java
+++ b/tika-core/src/main/java/org/apache/tika/config/TikaConfig.java
@@ -565,6 +565,9 @@ public class TikaConfig {
                 Map<String, Param<?>> params = getParams(element);
                 //Assigning the params to bean fields/setters
                 AnnotationUtils.assignFieldParams(loaded, params);
+                if (loaded instanceof Initializable) {
+                    ((Initializable) loaded).initialize();
+                }
 
                 // Have any decoration performed, eg explicit mimetypes
                 loaded = decorate(loaded, element);

http://git-wip-us.apache.org/repos/asf/tika/blob/01320372/tika-core/src/test/java/org/apache/tika/parser/DummyInitializableParser.java
----------------------------------------------------------------------
diff --git 
a/tika-core/src/test/java/org/apache/tika/parser/DummyInitializableParser.java 
b/tika-core/src/test/java/org/apache/tika/parser/DummyInitializableParser.java
new file mode 100644
index 0000000..4bb8668
--- /dev/null
+++ 
b/tika-core/src/test/java/org/apache/tika/parser/DummyInitializableParser.java
@@ -0,0 +1,64 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tika.parser;
+
+import org.apache.tika.config.Field;
+import org.apache.tika.config.Initializable;
+import org.apache.tika.exception.TikaConfigException;
+import org.apache.tika.exception.TikaException;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.mime.MediaType;
+import org.xml.sax.ContentHandler;
+import org.xml.sax.SAXException;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.HashSet;
+import java.util.Set;
+
+/**
+ * This tests that initialize() is called after adding the parameters
+ * configured via TikaConfig
+ */
+public class DummyInitializableParser extends AbstractParser implements 
Initializable {
+
+    public static String SUM_FIELD = "SUM";
+    private static Set<MediaType> MIMES = new HashSet<>();
+    static {
+        MIMES.add(MediaType.TEXT_PLAIN);
+    }
+
+    @Field private short shortA = -2;
+    @Field private short shortB = -3;
+    private int sum = 0;
+
+    @Override
+    public Set<MediaType> getSupportedTypes(ParseContext context) {
+        return MIMES;
+    }
+
+    @Override
+    public void parse(InputStream stream, ContentHandler handler, Metadata 
metadata, ParseContext context) throws IOException, SAXException, TikaException 
{
+        metadata.set(SUM_FIELD, Integer.toString(sum));
+    }
+
+    @Override
+    public void initialize() throws TikaConfigException {
+        sum = shortA+shortB;
+    }
+}

http://git-wip-us.apache.org/repos/asf/tika/blob/01320372/tika-core/src/test/java/org/apache/tika/parser/DummyParameterizedParser.java
----------------------------------------------------------------------
diff --git 
a/tika-core/src/test/java/org/apache/tika/parser/DummyParameterizedParser.java 
b/tika-core/src/test/java/org/apache/tika/parser/DummyParameterizedParser.java
index 801d65e..435dc52 100644
--- 
a/tika-core/src/test/java/org/apache/tika/parser/DummyParameterizedParser.java
+++ 
b/tika-core/src/test/java/org/apache/tika/parser/DummyParameterizedParser.java
@@ -63,6 +63,7 @@ public class DummyParameterizedParser extends AbstractParser {
 
     @Field private String missing = "default";
 
+
     private String inner = "inner";
     private File xfile;
 

http://git-wip-us.apache.org/repos/asf/tika/blob/01320372/tika-core/src/test/java/org/apache/tika/parser/InitializableParserTest.java
----------------------------------------------------------------------
diff --git 
a/tika-core/src/test/java/org/apache/tika/parser/InitializableParserTest.java 
b/tika-core/src/test/java/org/apache/tika/parser/InitializableParserTest.java
new file mode 100644
index 0000000..b9d378d
--- /dev/null
+++ 
b/tika-core/src/test/java/org/apache/tika/parser/InitializableParserTest.java
@@ -0,0 +1,45 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.parser;
+
+import org.apache.tika.Tika;
+import org.apache.tika.config.TikaConfig;
+import org.apache.tika.io.TikaInputStream;
+import org.apache.tika.metadata.Metadata;
+import org.junit.Assert;
+import org.junit.Test;
+
+import java.net.URL;
+import java.nio.charset.StandardCharsets;
+
+import static org.junit.Assert.assertEquals;
+
+public class InitializableParserTest {
+
+    public static final String TIKA_CFG_FILE = 
"org/apache/tika/config/TIKA-1986-initializable.xml";
+
+    @Test
+    public void testInitializableParser() throws Exception {
+        URL configFileUrl = 
getClass().getClassLoader().getResource(TIKA_CFG_FILE);
+        assert configFileUrl != null;
+        TikaConfig config = new TikaConfig(configFileUrl);
+        Tika tika = new Tika(config);
+        Metadata md = new Metadata();
+        
tika.parse(TikaInputStream.get("someString".getBytes(StandardCharsets.ISO_8859_1)),
 md);
+        assertEquals("5", md.get(DummyInitializableParser.SUM_FIELD));
+    }
+}

http://git-wip-us.apache.org/repos/asf/tika/blob/01320372/tika-core/src/test/java/org/apache/tika/parser/ParameterizedParserTest.java
----------------------------------------------------------------------
diff --git 
a/tika-core/src/test/java/org/apache/tika/parser/ParameterizedParserTest.java 
b/tika-core/src/test/java/org/apache/tika/parser/ParameterizedParserTest.java
index 31c59c0..1471504 100644
--- 
a/tika-core/src/test/java/org/apache/tika/parser/ParameterizedParserTest.java
+++ 
b/tika-core/src/test/java/org/apache/tika/parser/ParameterizedParserTest.java
@@ -82,7 +82,7 @@ public class ParameterizedParserTest {
         Metadata md = getMetadata("TIKA-1986-some-parameters.xml");
         assertEquals("-6.0", md.get("xdouble"));
         assertEquals("testparamval", md.get("testparam"));
-        assertEquals("true", md.get("xbool"));
+        assertEquals("false", md.get("xbool"));
     }
 
     @Test

http://git-wip-us.apache.org/repos/asf/tika/blob/01320372/tika-core/src/test/resources/org/apache/tika/config/TIKA-1986-initializable.xml
----------------------------------------------------------------------
diff --git 
a/tika-core/src/test/resources/org/apache/tika/config/TIKA-1986-initializable.xml
 
b/tika-core/src/test/resources/org/apache/tika/config/TIKA-1986-initializable.xml
new file mode 100644
index 0000000..0b11bb4
--- /dev/null
+++ 
b/tika-core/src/test/resources/org/apache/tika/config/TIKA-1986-initializable.xml
@@ -0,0 +1,28 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+<properties>
+    <parsers>
+        <parser class="org.apache.tika.parser.DummyInitializableParser">
+            <params>
+                <param name="shortA" type="short">2</param>
+                <param name="shortB" type="short">3</param>
+            </params>
+        </parser>
+
+    </parsers>
+</properties>

http://git-wip-us.apache.org/repos/asf/tika/blob/01320372/tika-core/src/test/resources/org/apache/tika/config/TIKA-1986-some-parameters.xml
----------------------------------------------------------------------
diff --git 
a/tika-core/src/test/resources/org/apache/tika/config/TIKA-1986-some-parameters.xml
 
b/tika-core/src/test/resources/org/apache/tika/config/TIKA-1986-some-parameters.xml
index 250d439..dea8269 100644
--- 
a/tika-core/src/test/resources/org/apache/tika/config/TIKA-1986-some-parameters.xml
+++ 
b/tika-core/src/test/resources/org/apache/tika/config/TIKA-1986-some-parameters.xml
@@ -20,7 +20,7 @@
         <parser class="org.apache.tika.parser.DummyParameterizedParser">
             <params>
                 <param name="testparam" type="string">testparamval</param>
-                <param name="testbool" type="bool">false</param>
+                <param name="xbool" type="bool">false</param>
             </params>
         </parser>
 

http://git-wip-us.apache.org/repos/asf/tika/blob/01320372/tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDFParser.java
----------------------------------------------------------------------
diff --git 
a/tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDFParser.java 
b/tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDFParser.java
index a5673ee..7b12d58 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDFParser.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDFParser.java
@@ -109,14 +109,8 @@ public class PDFParser extends AbstractParser {
             Metadata metadata, ParseContext context)
             throws IOException, SAXException, TikaException {
 
-        //step 1, check to see if there are params for the PDFParser class
-        Map<String, Param<?>> params = context.getParams(PDFParser.class);
-        PDFParserConfig localConfig = new PDFParserConfig();
-        if (params != null) {
-            AnnotationUtils.assignFieldParams(localConfig, params);
-        } else if (context.get(PDFParserConfig.class) != null) {
-            localConfig = context.get(PDFParserConfig.class, defaultConfig);
-        }
+        PDFParserConfig localConfig = context.get(PDFParserConfig.class, 
defaultConfig);
+
         PDDocument pdfDocument = null;
         TemporaryResources tmp = new TemporaryResources();
 

http://git-wip-us.apache.org/repos/asf/tika/blob/01320372/tika-parsers/src/test/java/org/apache/tika/parser/pdf/PDFParserTest.java
----------------------------------------------------------------------
diff --git 
a/tika-parsers/src/test/java/org/apache/tika/parser/pdf/PDFParserTest.java 
b/tika-parsers/src/test/java/org/apache/tika/parser/pdf/PDFParserTest.java
index 2ef29f3..e9f55fe 100644
--- a/tika-parsers/src/test/java/org/apache/tika/parser/pdf/PDFParserTest.java
+++ b/tika-parsers/src/test/java/org/apache/tika/parser/pdf/PDFParserTest.java
@@ -61,6 +61,7 @@ import org.apache.tika.sax.BodyContentHandler;
 import org.apache.tika.sax.ContentHandlerDecorator;
 import org.junit.AfterClass;
 import org.junit.BeforeClass;
+import org.junit.Ignore;
 import org.junit.Test;
 import org.xml.sax.ContentHandler;
 
@@ -1231,6 +1232,7 @@ public class PDFParserTest extends TikaTest {
     }
 
     @Test
+    @Ignore("We've turned this off for now")
     public void testParameterizationViaContext() throws Exception {
         ParseContext context = new ParseContext();
 

Reply via email to