This is an automated email from the ASF dual-hosted git repository. davsclaus pushed a commit to branch tika in repository https://gitbox.apache.org/repos/asf/camel.git
commit 0379f6a5ceb246af3aca42bf8d745de0473cf6a3 Author: Claus Ibsen <[email protected]> AuthorDate: Fri Dec 5 16:08:28 2025 +0100 CAMEL-22757: camel-tika - Upgrade to tika 3.2.3 from 2.9.4 --- .../generated/resources/org/apache/camel/catalog/components/tika.json | 2 +- components/camel-tika/pom.xml | 2 +- .../resources/META-INF/org/apache/camel/component/tika/tika.json | 2 +- .../java/org/apache/camel/component/tika/TikaParseOutputFormat.java | 3 --- .../src/main/java/org/apache/camel/component/tika/TikaProducer.java | 4 ---- docs/user-manual/modules/ROOT/pages/camel-4x-upgrade-guide-4_17.adoc | 4 ++++ parent/pom.xml | 2 +- 7 files changed, 8 insertions(+), 11 deletions(-) diff --git a/catalog/camel-catalog/src/generated/resources/org/apache/camel/catalog/components/tika.json b/catalog/camel-catalog/src/generated/resources/org/apache/camel/catalog/components/tika.json index a08b39dc0c3f..59b6a408cc5e 100644 --- a/catalog/camel-catalog/src/generated/resources/org/apache/camel/catalog/components/tika.json +++ b/catalog/camel-catalog/src/generated/resources/org/apache/camel/catalog/components/tika.json @@ -30,7 +30,7 @@ "properties": { "operation": { "index": 0, "kind": "path", "displayName": "Operation", "group": "producer", "label": "", "required": true, "type": "enum", "javaType": "org.apache.camel.component.tika.TikaOperation", "enum": [ "parse", "detect" ], "deprecated": false, "deprecationNote": "", "autowired": false, "secret": false, "configurationClass": "org.apache.camel.component.tika.TikaConfiguration", "configurationField": "tikaConfiguration", "description": "Operation type" }, "tikaParseOutputEncoding": { "index": 1, "kind": "parameter", "displayName": "Tika Parse Output Encoding", "group": "producer", "label": "", "required": false, "type": "string", "javaType": "java.lang.String", "deprecated": false, "autowired": false, "secret": false, "configurationClass": "org.apache.camel.component.tika.TikaConfiguration", "configurationField": "tikaConfiguration", "description": "Tika Parse Output Encoding" }, - "tikaParseOutputFormat": { "index": 2, "kind": "parameter", "displayName": "Tika Parse Output Format", "group": "producer", "label": "", "required": false, "type": "enum", "javaType": "org.apache.camel.component.tika.TikaParseOutputFormat", "enum": [ "xml", "html", "text", "textMain" ], "deprecated": false, "autowired": false, "secret": false, "defaultValue": "xml", "configurationClass": "org.apache.camel.component.tika.TikaConfiguration", "configurationField": "tikaConfiguration", " [...] + "tikaParseOutputFormat": { "index": 2, "kind": "parameter", "displayName": "Tika Parse Output Format", "group": "producer", "label": "", "required": false, "type": "enum", "javaType": "org.apache.camel.component.tika.TikaParseOutputFormat", "enum": [ "xml", "html", "text" ], "deprecated": false, "autowired": false, "secret": false, "defaultValue": "xml", "configurationClass": "org.apache.camel.component.tika.TikaConfiguration", "configurationField": "tikaConfiguration", "description" [...] "lazyStartProducer": { "index": 3, "kind": "parameter", "displayName": "Lazy Start Producer", "group": "producer (advanced)", "label": "producer,advanced", "required": false, "type": "boolean", "javaType": "boolean", "deprecated": false, "autowired": false, "secret": false, "defaultValue": false, "description": "Whether the producer should be started lazy (on the first message). By starting lazy you can use this to allow CamelContext and routes to startup in situations where a produc [...] "tikaConfig": { "index": 4, "kind": "parameter", "displayName": "Tika Config", "group": "advanced", "label": "advanced", "required": false, "type": "object", "javaType": "org.apache.tika.config.TikaConfig", "deprecated": false, "autowired": false, "secret": false, "configurationClass": "org.apache.camel.component.tika.TikaConfiguration", "configurationField": "tikaConfiguration", "description": "Tika Config" }, "tikaConfigUri": { "index": 5, "kind": "parameter", "displayName": "Tika Config Uri", "group": "advanced", "label": "advanced", "required": false, "type": "string", "javaType": "java.lang.String", "deprecated": false, "autowired": false, "secret": false, "configurationClass": "org.apache.camel.component.tika.TikaConfiguration", "configurationField": "tikaConfiguration", "description": "Tika Config Url" } diff --git a/components/camel-tika/pom.xml b/components/camel-tika/pom.xml index a2a5f563b480..4e7c735ec524 100644 --- a/components/camel-tika/pom.xml +++ b/components/camel-tika/pom.xml @@ -48,7 +48,7 @@ </dependency> <dependency> <groupId>org.apache.tika</groupId> - <artifactId>tika-parser-html-commons</artifactId> + <artifactId>tika-parser-html-module</artifactId> <version>${tika-version}</version> </dependency> <dependency> diff --git a/components/camel-tika/src/generated/resources/META-INF/org/apache/camel/component/tika/tika.json b/components/camel-tika/src/generated/resources/META-INF/org/apache/camel/component/tika/tika.json index a08b39dc0c3f..59b6a408cc5e 100644 --- a/components/camel-tika/src/generated/resources/META-INF/org/apache/camel/component/tika/tika.json +++ b/components/camel-tika/src/generated/resources/META-INF/org/apache/camel/component/tika/tika.json @@ -30,7 +30,7 @@ "properties": { "operation": { "index": 0, "kind": "path", "displayName": "Operation", "group": "producer", "label": "", "required": true, "type": "enum", "javaType": "org.apache.camel.component.tika.TikaOperation", "enum": [ "parse", "detect" ], "deprecated": false, "deprecationNote": "", "autowired": false, "secret": false, "configurationClass": "org.apache.camel.component.tika.TikaConfiguration", "configurationField": "tikaConfiguration", "description": "Operation type" }, "tikaParseOutputEncoding": { "index": 1, "kind": "parameter", "displayName": "Tika Parse Output Encoding", "group": "producer", "label": "", "required": false, "type": "string", "javaType": "java.lang.String", "deprecated": false, "autowired": false, "secret": false, "configurationClass": "org.apache.camel.component.tika.TikaConfiguration", "configurationField": "tikaConfiguration", "description": "Tika Parse Output Encoding" }, - "tikaParseOutputFormat": { "index": 2, "kind": "parameter", "displayName": "Tika Parse Output Format", "group": "producer", "label": "", "required": false, "type": "enum", "javaType": "org.apache.camel.component.tika.TikaParseOutputFormat", "enum": [ "xml", "html", "text", "textMain" ], "deprecated": false, "autowired": false, "secret": false, "defaultValue": "xml", "configurationClass": "org.apache.camel.component.tika.TikaConfiguration", "configurationField": "tikaConfiguration", " [...] + "tikaParseOutputFormat": { "index": 2, "kind": "parameter", "displayName": "Tika Parse Output Format", "group": "producer", "label": "", "required": false, "type": "enum", "javaType": "org.apache.camel.component.tika.TikaParseOutputFormat", "enum": [ "xml", "html", "text" ], "deprecated": false, "autowired": false, "secret": false, "defaultValue": "xml", "configurationClass": "org.apache.camel.component.tika.TikaConfiguration", "configurationField": "tikaConfiguration", "description" [...] "lazyStartProducer": { "index": 3, "kind": "parameter", "displayName": "Lazy Start Producer", "group": "producer (advanced)", "label": "producer,advanced", "required": false, "type": "boolean", "javaType": "boolean", "deprecated": false, "autowired": false, "secret": false, "defaultValue": false, "description": "Whether the producer should be started lazy (on the first message). By starting lazy you can use this to allow CamelContext and routes to startup in situations where a produc [...] "tikaConfig": { "index": 4, "kind": "parameter", "displayName": "Tika Config", "group": "advanced", "label": "advanced", "required": false, "type": "object", "javaType": "org.apache.tika.config.TikaConfig", "deprecated": false, "autowired": false, "secret": false, "configurationClass": "org.apache.camel.component.tika.TikaConfiguration", "configurationField": "tikaConfiguration", "description": "Tika Config" }, "tikaConfigUri": { "index": 5, "kind": "parameter", "displayName": "Tika Config Uri", "group": "advanced", "label": "advanced", "required": false, "type": "string", "javaType": "java.lang.String", "deprecated": false, "autowired": false, "secret": false, "configurationClass": "org.apache.camel.component.tika.TikaConfiguration", "configurationField": "tikaConfiguration", "description": "Tika Config Url" } diff --git a/components/camel-tika/src/main/java/org/apache/camel/component/tika/TikaParseOutputFormat.java b/components/camel-tika/src/main/java/org/apache/camel/component/tika/TikaParseOutputFormat.java index 67665d58f69c..0da0d3564061 100644 --- a/components/camel-tika/src/main/java/org/apache/camel/component/tika/TikaParseOutputFormat.java +++ b/components/camel-tika/src/main/java/org/apache/camel/component/tika/TikaParseOutputFormat.java @@ -23,8 +23,6 @@ package org.apache.camel.component.tika; * <li>xml: Returns Parsed Content as XML.</li> * <li>html: Returns Parsed Content as HTML.</li> * <li>text: Returns Parsed Content as Text.</li> - * <li>textMain: Uses the <a href="http://code.google.com/p/boilerpipe/">boilerpipe</a> library to automatically extract - * the main content from a web page.</li> * </ul> * */ @@ -32,5 +30,4 @@ public enum TikaParseOutputFormat { xml, html, text, - textMain; } diff --git a/components/camel-tika/src/main/java/org/apache/camel/component/tika/TikaProducer.java b/components/camel-tika/src/main/java/org/apache/camel/component/tika/TikaProducer.java index 58a663866767..9328dc47d344 100644 --- a/components/camel-tika/src/main/java/org/apache/camel/component/tika/TikaProducer.java +++ b/components/camel-tika/src/main/java/org/apache/camel/component/tika/TikaProducer.java @@ -46,7 +46,6 @@ import org.apache.tika.parser.ParseContext; import org.apache.tika.parser.Parser; import org.apache.tika.sax.BodyContentHandler; import org.apache.tika.sax.ExpandedTitleContentHandler; -import org.apache.tika.sax.boilerpipe.BoilerpipeContentHandler; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -150,9 +149,6 @@ public class TikaProducer extends DefaultProducer { case text: result = new BodyContentHandler(new OutputStreamWriter(outputStream, this.encoding)); break; - case textMain: - result = new BoilerpipeContentHandler(new OutputStreamWriter(outputStream, this.encoding)); - break; case html: result = new ExpandedTitleContentHandler(getTransformerHandler(outputStream, "html", true)); break; diff --git a/docs/user-manual/modules/ROOT/pages/camel-4x-upgrade-guide-4_17.adoc b/docs/user-manual/modules/ROOT/pages/camel-4x-upgrade-guide-4_17.adoc index 863a7ae6a614..2869feef0c07 100644 --- a/docs/user-manual/modules/ROOT/pages/camel-4x-upgrade-guide-4_17.adoc +++ b/docs/user-manual/modules/ROOT/pages/camel-4x-upgrade-guide-4_17.adoc @@ -53,6 +53,10 @@ Virtual threads are enabled by default for embedded mode with `camel-infinispan- If you choose to listen for cache events on the `infinispan-embedded` consumer, it is possible that events are not processed in a predicatable order. Disabling virtual threads by setting system property `org.infinispan.threads.virtual` to `false` leads to more predicatable event order processing. +=== camel-tika + +Upgraded to Tika v3, and removed `textMain` from `tikaParseOutputFormat` option. + === Component deprecation The `camel-stomp` component is deprecated. diff --git a/parent/pom.xml b/parent/pom.xml index d19b7c4ee24d..5208c8ad54b6 100644 --- a/parent/pom.xml +++ b/parent/pom.xml @@ -520,7 +520,7 @@ <tahu-version>1.0.17</tahu-version> <testcontainers-version>2.0.2</testcontainers-version> <thymeleaf-version>3.1.3.RELEASE</thymeleaf-version> - <tika-version>2.9.4</tika-version> + <tika-version>3.2.3</tika-version> <twilio-version>11.2.0</twilio-version> <twitter4j-version>4.1.2</twitter4j-version> <undertow-version>2.3.20.Final</undertow-version>
