This is an automated email from the ASF dual-hosted git repository.
davsclaus pushed a commit to branch camel-4.14.x
in repository https://gitbox.apache.org/repos/asf/camel.git
The following commit(s) were added to refs/heads/camel-4.14.x by this push:
new 3c088e35a70d CAMEL-22757: camel-tika - Upgrade to tika 3.2.3 from
2.9.4 (#20246)
3c088e35a70d is described below
commit 3c088e35a70dc5902ca8b58f7879dab49c603184
Author: Claus Ibsen <[email protected]>
AuthorDate: Fri Dec 5 19:02:49 2025 +0100
CAMEL-22757: camel-tika - Upgrade to tika 3.2.3 from 2.9.4 (#20246)
---
.../resources/org/apache/camel/catalog/components/tika.json | 2 +-
components/camel-tika/pom.xml | 2 +-
.../resources/META-INF/org/apache/camel/component/tika/tika.json | 2 +-
.../java/org/apache/camel/component/tika/TikaParseOutputFormat.java | 3 ---
.../src/main/java/org/apache/camel/component/tika/TikaProducer.java | 4 ----
.../user-manual/modules/ROOT/pages/camel-4x-upgrade-guide-4_14.adoc | 6 ++++++
parent/pom.xml | 2 +-
7 files changed, 10 insertions(+), 11 deletions(-)
diff --git
a/catalog/camel-catalog/src/generated/resources/org/apache/camel/catalog/components/tika.json
b/catalog/camel-catalog/src/generated/resources/org/apache/camel/catalog/components/tika.json
index 76df687e1bcd..0906fa1e49ff 100644
---
a/catalog/camel-catalog/src/generated/resources/org/apache/camel/catalog/components/tika.json
+++
b/catalog/camel-catalog/src/generated/resources/org/apache/camel/catalog/components/tika.json
@@ -30,7 +30,7 @@
"properties": {
"operation": { "index": 0, "kind": "path", "displayName": "Operation",
"group": "producer", "label": "", "required": true, "type": "enum", "javaType":
"org.apache.camel.component.tika.TikaOperation", "enum": [ "parse", "detect" ],
"deprecated": false, "deprecationNote": "", "autowired": false, "secret":
false, "configurationClass":
"org.apache.camel.component.tika.TikaConfiguration", "configurationField":
"tikaConfiguration", "description": "Operation type" },
"tikaParseOutputEncoding": { "index": 1, "kind": "parameter",
"displayName": "Tika Parse Output Encoding", "group": "producer", "label": "",
"required": false, "type": "string", "javaType": "java.lang.String",
"deprecated": false, "autowired": false, "secret": false, "configurationClass":
"org.apache.camel.component.tika.TikaConfiguration", "configurationField":
"tikaConfiguration", "description": "Tika Parse Output Encoding" },
- "tikaParseOutputFormat": { "index": 2, "kind": "parameter", "displayName":
"Tika Parse Output Format", "group": "producer", "label": "", "required":
false, "type": "enum", "javaType":
"org.apache.camel.component.tika.TikaParseOutputFormat", "enum": [ "xml",
"html", "text", "textMain" ], "deprecated": false, "autowired": false,
"secret": false, "defaultValue": "xml", "configurationClass":
"org.apache.camel.component.tika.TikaConfiguration", "configurationField":
"tikaConfiguration", " [...]
+ "tikaParseOutputFormat": { "index": 2, "kind": "parameter", "displayName":
"Tika Parse Output Format", "group": "producer", "label": "", "required":
false, "type": "enum", "javaType":
"org.apache.camel.component.tika.TikaParseOutputFormat", "enum": [ "xml",
"html", "text" ], "deprecated": false, "autowired": false, "secret": false,
"defaultValue": "xml", "configurationClass":
"org.apache.camel.component.tika.TikaConfiguration", "configurationField":
"tikaConfiguration", "description" [...]
"lazyStartProducer": { "index": 3, "kind": "parameter", "displayName":
"Lazy Start Producer", "group": "producer (advanced)", "label":
"producer,advanced", "required": false, "type": "boolean", "javaType":
"boolean", "deprecated": false, "autowired": false, "secret": false,
"defaultValue": false, "description": "Whether the producer should be started
lazy (on the first message). By starting lazy you can use this to allow
CamelContext and routes to startup in situations where a produc [...]
"tikaConfig": { "index": 4, "kind": "parameter", "displayName": "Tika
Config", "group": "advanced", "label": "advanced", "required": false, "type":
"object", "javaType": "org.apache.tika.config.TikaConfig", "deprecated": false,
"autowired": false, "secret": false, "configurationClass":
"org.apache.camel.component.tika.TikaConfiguration", "configurationField":
"tikaConfiguration", "description": "Tika Config" },
"tikaConfigUri": { "index": 5, "kind": "parameter", "displayName": "Tika
Config Uri", "group": "advanced", "label": "advanced", "required": false,
"type": "string", "javaType": "java.lang.String", "deprecated": false,
"autowired": false, "secret": false, "configurationClass":
"org.apache.camel.component.tika.TikaConfiguration", "configurationField":
"tikaConfiguration", "description": "Tika Config Url" }
diff --git a/components/camel-tika/pom.xml b/components/camel-tika/pom.xml
index a9da3fc205e9..5935555b5cf4 100644
--- a/components/camel-tika/pom.xml
+++ b/components/camel-tika/pom.xml
@@ -48,7 +48,7 @@
</dependency>
<dependency>
<groupId>org.apache.tika</groupId>
- <artifactId>tika-parser-html-commons</artifactId>
+ <artifactId>tika-parser-html-module</artifactId>
<version>${tika-version}</version>
</dependency>
<dependency>
diff --git
a/components/camel-tika/src/generated/resources/META-INF/org/apache/camel/component/tika/tika.json
b/components/camel-tika/src/generated/resources/META-INF/org/apache/camel/component/tika/tika.json
index 76df687e1bcd..0906fa1e49ff 100644
---
a/components/camel-tika/src/generated/resources/META-INF/org/apache/camel/component/tika/tika.json
+++
b/components/camel-tika/src/generated/resources/META-INF/org/apache/camel/component/tika/tika.json
@@ -30,7 +30,7 @@
"properties": {
"operation": { "index": 0, "kind": "path", "displayName": "Operation",
"group": "producer", "label": "", "required": true, "type": "enum", "javaType":
"org.apache.camel.component.tika.TikaOperation", "enum": [ "parse", "detect" ],
"deprecated": false, "deprecationNote": "", "autowired": false, "secret":
false, "configurationClass":
"org.apache.camel.component.tika.TikaConfiguration", "configurationField":
"tikaConfiguration", "description": "Operation type" },
"tikaParseOutputEncoding": { "index": 1, "kind": "parameter",
"displayName": "Tika Parse Output Encoding", "group": "producer", "label": "",
"required": false, "type": "string", "javaType": "java.lang.String",
"deprecated": false, "autowired": false, "secret": false, "configurationClass":
"org.apache.camel.component.tika.TikaConfiguration", "configurationField":
"tikaConfiguration", "description": "Tika Parse Output Encoding" },
- "tikaParseOutputFormat": { "index": 2, "kind": "parameter", "displayName":
"Tika Parse Output Format", "group": "producer", "label": "", "required":
false, "type": "enum", "javaType":
"org.apache.camel.component.tika.TikaParseOutputFormat", "enum": [ "xml",
"html", "text", "textMain" ], "deprecated": false, "autowired": false,
"secret": false, "defaultValue": "xml", "configurationClass":
"org.apache.camel.component.tika.TikaConfiguration", "configurationField":
"tikaConfiguration", " [...]
+ "tikaParseOutputFormat": { "index": 2, "kind": "parameter", "displayName":
"Tika Parse Output Format", "group": "producer", "label": "", "required":
false, "type": "enum", "javaType":
"org.apache.camel.component.tika.TikaParseOutputFormat", "enum": [ "xml",
"html", "text" ], "deprecated": false, "autowired": false, "secret": false,
"defaultValue": "xml", "configurationClass":
"org.apache.camel.component.tika.TikaConfiguration", "configurationField":
"tikaConfiguration", "description" [...]
"lazyStartProducer": { "index": 3, "kind": "parameter", "displayName":
"Lazy Start Producer", "group": "producer (advanced)", "label":
"producer,advanced", "required": false, "type": "boolean", "javaType":
"boolean", "deprecated": false, "autowired": false, "secret": false,
"defaultValue": false, "description": "Whether the producer should be started
lazy (on the first message). By starting lazy you can use this to allow
CamelContext and routes to startup in situations where a produc [...]
"tikaConfig": { "index": 4, "kind": "parameter", "displayName": "Tika
Config", "group": "advanced", "label": "advanced", "required": false, "type":
"object", "javaType": "org.apache.tika.config.TikaConfig", "deprecated": false,
"autowired": false, "secret": false, "configurationClass":
"org.apache.camel.component.tika.TikaConfiguration", "configurationField":
"tikaConfiguration", "description": "Tika Config" },
"tikaConfigUri": { "index": 5, "kind": "parameter", "displayName": "Tika
Config Uri", "group": "advanced", "label": "advanced", "required": false,
"type": "string", "javaType": "java.lang.String", "deprecated": false,
"autowired": false, "secret": false, "configurationClass":
"org.apache.camel.component.tika.TikaConfiguration", "configurationField":
"tikaConfiguration", "description": "Tika Config Url" }
diff --git
a/components/camel-tika/src/main/java/org/apache/camel/component/tika/TikaParseOutputFormat.java
b/components/camel-tika/src/main/java/org/apache/camel/component/tika/TikaParseOutputFormat.java
index 67665d58f69c..0da0d3564061 100644
---
a/components/camel-tika/src/main/java/org/apache/camel/component/tika/TikaParseOutputFormat.java
+++
b/components/camel-tika/src/main/java/org/apache/camel/component/tika/TikaParseOutputFormat.java
@@ -23,8 +23,6 @@ package org.apache.camel.component.tika;
* <li>xml: Returns Parsed Content as XML.</li>
* <li>html: Returns Parsed Content as HTML.</li>
* <li>text: Returns Parsed Content as Text.</li>
- * <li>textMain: Uses the <a
href="http://code.google.com/p/boilerpipe/">boilerpipe</a> library to
automatically extract
- * the main content from a web page.</li>
* </ul>
*
*/
@@ -32,5 +30,4 @@ public enum TikaParseOutputFormat {
xml,
html,
text,
- textMain;
}
diff --git
a/components/camel-tika/src/main/java/org/apache/camel/component/tika/TikaProducer.java
b/components/camel-tika/src/main/java/org/apache/camel/component/tika/TikaProducer.java
index 58a663866767..9328dc47d344 100644
---
a/components/camel-tika/src/main/java/org/apache/camel/component/tika/TikaProducer.java
+++
b/components/camel-tika/src/main/java/org/apache/camel/component/tika/TikaProducer.java
@@ -46,7 +46,6 @@ import org.apache.tika.parser.ParseContext;
import org.apache.tika.parser.Parser;
import org.apache.tika.sax.BodyContentHandler;
import org.apache.tika.sax.ExpandedTitleContentHandler;
-import org.apache.tika.sax.boilerpipe.BoilerpipeContentHandler;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -150,9 +149,6 @@ public class TikaProducer extends DefaultProducer {
case text:
result = new BodyContentHandler(new
OutputStreamWriter(outputStream, this.encoding));
break;
- case textMain:
- result = new BoilerpipeContentHandler(new
OutputStreamWriter(outputStream, this.encoding));
- break;
case html:
result = new
ExpandedTitleContentHandler(getTransformerHandler(outputStream, "html", true));
break;
diff --git
a/docs/user-manual/modules/ROOT/pages/camel-4x-upgrade-guide-4_14.adoc
b/docs/user-manual/modules/ROOT/pages/camel-4x-upgrade-guide-4_14.adoc
index 7ad3b5b335a9..c2c1b2ae321e 100644
--- a/docs/user-manual/modules/ROOT/pages/camel-4x-upgrade-guide-4_14.adoc
+++ b/docs/user-manual/modules/ROOT/pages/camel-4x-upgrade-guide-4_14.adoc
@@ -4,6 +4,12 @@ This document is for helping you upgrade your Apache Camel
application
from Camel 4.x to 4.y. For example, if you are upgrading Camel 4.0 to 4.2,
then you should follow the guides
from both 4.0 to 4.1 and 4.1 to 4.2.
+== Upgrading from 4.14.2 to 4.14.3
+
+=== camel-tika
+
+Upgraded to Tika v3, and removed `textMain` from `tikaParseOutputFormat`
option.
+
== Upgrading from 4.14.1 to 4.14.2
=== camel-kamelet
diff --git a/parent/pom.xml b/parent/pom.xml
index 8b8a720c4215..c8a779315c25 100644
--- a/parent/pom.xml
+++ b/parent/pom.xml
@@ -503,7 +503,7 @@
<tahu-version>1.0.14</tahu-version>
<testcontainers-version>1.21.3</testcontainers-version>
<thymeleaf-version>3.1.3.RELEASE</thymeleaf-version>
- <tika-version>2.9.4</tika-version>
+ <tika-version>3.2.3</tika-version>
<twilio-version>10.9.2</twilio-version>
<twitter4j-version>4.1.2</twitter4j-version>
<undertow-version>2.3.20.Final</undertow-version>