Remove Configurable entirely; update PDFParser example for one field.
Project: http://git-wip-us.apache.org/repos/asf/tika/repo Commit: http://git-wip-us.apache.org/repos/asf/tika/commit/03d38248 Tree: http://git-wip-us.apache.org/repos/asf/tika/tree/03d38248 Diff: http://git-wip-us.apache.org/repos/asf/tika/diff/03d38248 Branch: refs/heads/master Commit: 03d38248f37cca988866484ea759224f865c7765 Parents: ef1f7b9 Author: tballison <[email protected]> Authored: Mon Jun 13 14:38:33 2016 -0400 Committer: tballison <[email protected]> Committed: Mon Jun 13 14:38:33 2016 -0400 ---------------------------------------------------------------------- .../java/org/apache/tika/base/Configurable.java | 44 -------------------- .../main/java/org/apache/tika/config/Field.java | 2 +- .../java/org/apache/tika/config/TikaConfig.java | 2 - .../apache/tika/parser/ConfigurableParser.java | 32 -------------- .../org/apache/tika/parser/pdf/PDFParser.java | 27 +++++------- .../apache/tika/parser/pdf/PDFParserConfig.java | 2 + 6 files changed, 14 insertions(+), 95 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/tika/blob/03d38248/tika-core/src/main/java/org/apache/tika/base/Configurable.java ---------------------------------------------------------------------- diff --git a/tika-core/src/main/java/org/apache/tika/base/Configurable.java b/tika-core/src/main/java/org/apache/tika/base/Configurable.java deleted file mode 100644 index f1eb91a..0000000 --- a/tika-core/src/main/java/org/apache/tika/base/Configurable.java +++ /dev/null @@ -1,44 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.tika.base; - -import org.apache.tika.config.Param; -import org.apache.tika.exception.TikaConfigException; -import org.apache.tika.parser.ParseContext; - -import java.util.Map; - -/** - * Defines contract for configurable services - * @since Apache Tika 1.14 - */ -public interface Configurable { - - /** - * Configure an instance with Tika Context - * @param context configuration instance in the form of context - * @throws TikaConfigException when an instance fails to work at the given context - * @since Apache Tika 1.14 - */ - void configure(ParseContext context) throws TikaConfigException; - - /** - * Gets parameters of this configurable instance - * @return parameters in the form of a map of key value pairs - */ - Map<String, Param<?>> getParams(); -} http://git-wip-us.apache.org/repos/asf/tika/blob/03d38248/tika-core/src/main/java/org/apache/tika/config/Field.java ---------------------------------------------------------------------- diff --git a/tika-core/src/main/java/org/apache/tika/config/Field.java b/tika-core/src/main/java/org/apache/tika/config/Field.java index f4fe3f2..fb5a25b 100644 --- a/tika-core/src/main/java/org/apache/tika/config/Field.java +++ b/tika-core/src/main/java/org/apache/tika/config/Field.java @@ -23,7 +23,7 @@ import java.lang.annotation.Target; /** * Field annotation is a contract for binding {@link Param} value from - * Tika Configuration to any instance of {@link org.apache.tika.base.Configurable} + * Tika Configuration to an object. * services * @since Apache Tika 1.14 */ http://git-wip-us.apache.org/repos/asf/tika/blob/03d38248/tika-core/src/main/java/org/apache/tika/config/TikaConfig.java ---------------------------------------------------------------------- diff --git a/tika-core/src/main/java/org/apache/tika/config/TikaConfig.java b/tika-core/src/main/java/org/apache/tika/config/TikaConfig.java index e76b6e6..49c5e26 100644 --- a/tika-core/src/main/java/org/apache/tika/config/TikaConfig.java +++ b/tika-core/src/main/java/org/apache/tika/config/TikaConfig.java @@ -38,8 +38,6 @@ import java.util.Map; import java.util.Set; import java.util.concurrent.ExecutorService; -import org.apache.tika.base.Configurable; - import org.apache.tika.concurrent.ConfigurableThreadPoolExecutor; import org.apache.tika.concurrent.SimpleThreadPoolExecutor; import org.apache.tika.detect.CompositeDetector; http://git-wip-us.apache.org/repos/asf/tika/blob/03d38248/tika-core/src/main/java/org/apache/tika/parser/ConfigurableParser.java ---------------------------------------------------------------------- diff --git a/tika-core/src/main/java/org/apache/tika/parser/ConfigurableParser.java b/tika-core/src/main/java/org/apache/tika/parser/ConfigurableParser.java deleted file mode 100644 index 47feefa..0000000 --- a/tika-core/src/main/java/org/apache/tika/parser/ConfigurableParser.java +++ /dev/null @@ -1,32 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.tika.parser; - -import org.apache.tika.base.Configurable; - -import java.io.Serializable; - -/** - * Extension of {@link Parser} with {@link Configurable} contract. - * This interface shall be implemented to create parsers which accepts runtime parameters - * from tika configuration file - * - * @since Tika 1.14 - */ -public interface ConfigurableParser extends Parser, - Configurable, Serializable { -} http://git-wip-us.apache.org/repos/asf/tika/blob/03d38248/tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDFParser.java ---------------------------------------------------------------------- diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDFParser.java b/tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDFParser.java index dd03177..a5673ee 100644 --- a/tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDFParser.java +++ b/tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDFParser.java @@ -52,17 +52,16 @@ import org.apache.tika.metadata.Property; import org.apache.tika.metadata.TikaCoreProperties; import org.apache.tika.mime.MediaType; import org.apache.tika.parser.AbstractParser; -import org.apache.tika.parser.ConfigurableParser; import org.apache.tika.parser.ParseContext; import org.apache.tika.parser.PasswordProvider; import org.apache.tika.parser.image.xmp.JempboxExtractor; import org.apache.tika.parser.ocr.TesseractOCRParser; import org.apache.tika.sax.XHTMLContentHandler; +import org.apache.tika.utils.AnnotationUtils; import org.w3c.dom.Document; import org.xml.sax.ContentHandler; import org.xml.sax.ErrorHandler; import org.xml.sax.SAXException; -import static org.bouncycastle.asn1.x500.style.RFC4519Style.name; /** * PDF parser. @@ -105,26 +104,22 @@ public class PDFParser extends AbstractParser { return SUPPORTED_TYPES; } - @Field - private boolean sortByPosition = false; - public void parse( InputStream stream, ContentHandler handler, Metadata metadata, ParseContext context) throws IOException, SAXException, TikaException { + //step 1, check to see if there are params for the PDFParser class + Map<String, Param<?>> params = context.getParams(PDFParser.class); + PDFParserConfig localConfig = new PDFParserConfig(); + if (params != null) { + AnnotationUtils.assignFieldParams(localConfig, params); + } else if (context.get(PDFParserConfig.class) != null) { + localConfig = context.get(PDFParserConfig.class, defaultConfig); + } PDDocument pdfDocument = null; TemporaryResources tmp = new TemporaryResources(); - //config from context, or default if not set via context - PDFParserConfig localConfig = context.get(PDFParserConfig.class, defaultConfig); - //TODO: get rid of this after dev of TIKA-1508!!! - localConfig.setSortByPosition(sortByPosition); - //TODO: this is just a mockup...move elsewhere - Map<String, Param<?>> params = context.getParams(PDFParser.class); - if (params != null && params.containsKey("sortByPosition")) { - localConfig.setSortByPosition((Boolean)params.get("sortByPosition").getValue()); - } String password = ""; try { // PDFBox can process entirely in memory, or can use a temp file @@ -590,7 +585,7 @@ public class PDFParser extends AbstractParser { * @deprecated use {@link #getPDFParserConfig()} */ public boolean getSortByPosition() { - return sortByPosition; + return defaultConfig.getSortByPosition(); } /** @@ -605,7 +600,7 @@ public class PDFParser extends AbstractParser { */ @Field public void setSortByPosition(boolean v) { - sortByPosition = v; + defaultConfig.setSortByPosition(v); } http://git-wip-us.apache.org/repos/asf/tika/blob/03d38248/tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDFParserConfig.java ---------------------------------------------------------------------- diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDFParserConfig.java b/tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDFParserConfig.java index 296b191..3f8555c 100644 --- a/tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDFParserConfig.java +++ b/tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDFParserConfig.java @@ -25,6 +25,7 @@ import java.util.Properties; import org.apache.pdfbox.rendering.ImageType; import org.apache.pdfbox.text.PDFTextStripper; +import org.apache.tika.config.Field; /** * Config for PDFParser. @@ -79,6 +80,7 @@ public class PDFParserConfig implements Serializable { // True if we should sort text tokens by position // (necessary for some PDFs, but messes up other PDFs): + @Field private boolean sortByPosition = false; //True if acroform content should be extracted
