This is an automated email from the ASF dual-hosted git repository.

davsclaus pushed a commit to branch camel-4.10.x
in repository https://gitbox.apache.org/repos/asf/camel.git


The following commit(s) were added to refs/heads/camel-4.10.x by this push:
     new 582c22d4255e CAMEL-22757: camel-tika - Upgrade to tika 3.2.3 from 
2.9.4 (#20246)
582c22d4255e is described below

commit 582c22d4255ebdf8d7c936495615ad11909e9062
Author: Claus Ibsen <[email protected]>
AuthorDate: Fri Dec 5 19:02:49 2025 +0100

    CAMEL-22757: camel-tika - Upgrade to tika 3.2.3 from 2.9.4 (#20246)
---
 .../resources/org/apache/camel/catalog/components/tika.json         | 2 +-
 components/camel-tika/pom.xml                                       | 2 +-
 .../resources/META-INF/org/apache/camel/component/tika/tika.json    | 2 +-
 .../java/org/apache/camel/component/tika/TikaParseOutputFormat.java | 3 ---
 .../src/main/java/org/apache/camel/component/tika/TikaProducer.java | 4 ----
 .../user-manual/modules/ROOT/pages/camel-4x-upgrade-guide-4_10.adoc | 6 ++++++
 parent/pom.xml                                                      | 2 +-
 7 files changed, 10 insertions(+), 11 deletions(-)

diff --git 
a/catalog/camel-catalog/src/generated/resources/org/apache/camel/catalog/components/tika.json
 
b/catalog/camel-catalog/src/generated/resources/org/apache/camel/catalog/components/tika.json
index 862033d42ce4..c98e16835b2d 100644
--- 
a/catalog/camel-catalog/src/generated/resources/org/apache/camel/catalog/components/tika.json
+++ 
b/catalog/camel-catalog/src/generated/resources/org/apache/camel/catalog/components/tika.json
@@ -30,7 +30,7 @@
   "properties": {
     "operation": { "index": 0, "kind": "path", "displayName": "Operation", 
"group": "producer", "label": "", "required": true, "type": "object", 
"javaType": "org.apache.camel.component.tika.TikaOperation", "enum": [ "parse", 
"detect" ], "deprecated": false, "deprecationNote": "", "autowired": false, 
"secret": false, "configurationClass": 
"org.apache.camel.component.tika.TikaConfiguration", "configurationField": 
"tikaConfiguration", "description": "Operation type" },
     "tikaParseOutputEncoding": { "index": 1, "kind": "parameter", 
"displayName": "Tika Parse Output Encoding", "group": "producer", "label": "", 
"required": false, "type": "string", "javaType": "java.lang.String", 
"deprecated": false, "autowired": false, "secret": false, "configurationClass": 
"org.apache.camel.component.tika.TikaConfiguration", "configurationField": 
"tikaConfiguration", "description": "Tika Parse Output Encoding" },
-    "tikaParseOutputFormat": { "index": 2, "kind": "parameter", "displayName": 
"Tika Parse Output Format", "group": "producer", "label": "", "required": 
false, "type": "object", "javaType": 
"org.apache.camel.component.tika.TikaParseOutputFormat", "enum": [ "xml", 
"html", "text", "textMain" ], "deprecated": false, "autowired": false, 
"secret": false, "defaultValue": "xml", "configurationClass": 
"org.apache.camel.component.tika.TikaConfiguration", "configurationField": 
"tikaConfiguration", [...]
+    "tikaParseOutputFormat": { "index": 2, "kind": "parameter", "displayName": 
"Tika Parse Output Format", "group": "producer", "label": "", "required": 
false, "type": "object", "javaType": 
"org.apache.camel.component.tika.TikaParseOutputFormat", "enum": [ "xml", 
"html", "text" ], "deprecated": false, "autowired": false, "secret": false, 
"defaultValue": "xml", "configurationClass": 
"org.apache.camel.component.tika.TikaConfiguration", "configurationField": 
"tikaConfiguration", "descriptio [...]
     "lazyStartProducer": { "index": 3, "kind": "parameter", "displayName": 
"Lazy Start Producer", "group": "producer (advanced)", "label": 
"producer,advanced", "required": false, "type": "boolean", "javaType": 
"boolean", "deprecated": false, "autowired": false, "secret": false, 
"defaultValue": false, "description": "Whether the producer should be started 
lazy (on the first message). By starting lazy you can use this to allow 
CamelContext and routes to startup in situations where a produc [...]
     "tikaConfig": { "index": 4, "kind": "parameter", "displayName": "Tika 
Config", "group": "advanced", "label": "advanced", "required": false, "type": 
"object", "javaType": "org.apache.tika.config.TikaConfig", "deprecated": false, 
"autowired": false, "secret": false, "configurationClass": 
"org.apache.camel.component.tika.TikaConfiguration", "configurationField": 
"tikaConfiguration", "description": "Tika Config" },
     "tikaConfigUri": { "index": 5, "kind": "parameter", "displayName": "Tika 
Config Uri", "group": "advanced", "label": "advanced", "required": false, 
"type": "string", "javaType": "java.lang.String", "deprecated": false, 
"autowired": false, "secret": false, "configurationClass": 
"org.apache.camel.component.tika.TikaConfiguration", "configurationField": 
"tikaConfiguration", "description": "Tika Config Url" }
diff --git a/components/camel-tika/pom.xml b/components/camel-tika/pom.xml
index 2fb821b68eb1..823d94a987e9 100644
--- a/components/camel-tika/pom.xml
+++ b/components/camel-tika/pom.xml
@@ -48,7 +48,7 @@
         </dependency>
         <dependency>
             <groupId>org.apache.tika</groupId>
-            <artifactId>tika-parser-html-commons</artifactId>
+            <artifactId>tika-parser-html-module</artifactId>
             <version>${tika-version}</version>
         </dependency>
         <dependency>
diff --git 
a/components/camel-tika/src/generated/resources/META-INF/org/apache/camel/component/tika/tika.json
 
b/components/camel-tika/src/generated/resources/META-INF/org/apache/camel/component/tika/tika.json
index 862033d42ce4..c98e16835b2d 100644
--- 
a/components/camel-tika/src/generated/resources/META-INF/org/apache/camel/component/tika/tika.json
+++ 
b/components/camel-tika/src/generated/resources/META-INF/org/apache/camel/component/tika/tika.json
@@ -30,7 +30,7 @@
   "properties": {
     "operation": { "index": 0, "kind": "path", "displayName": "Operation", 
"group": "producer", "label": "", "required": true, "type": "object", 
"javaType": "org.apache.camel.component.tika.TikaOperation", "enum": [ "parse", 
"detect" ], "deprecated": false, "deprecationNote": "", "autowired": false, 
"secret": false, "configurationClass": 
"org.apache.camel.component.tika.TikaConfiguration", "configurationField": 
"tikaConfiguration", "description": "Operation type" },
     "tikaParseOutputEncoding": { "index": 1, "kind": "parameter", 
"displayName": "Tika Parse Output Encoding", "group": "producer", "label": "", 
"required": false, "type": "string", "javaType": "java.lang.String", 
"deprecated": false, "autowired": false, "secret": false, "configurationClass": 
"org.apache.camel.component.tika.TikaConfiguration", "configurationField": 
"tikaConfiguration", "description": "Tika Parse Output Encoding" },
-    "tikaParseOutputFormat": { "index": 2, "kind": "parameter", "displayName": 
"Tika Parse Output Format", "group": "producer", "label": "", "required": 
false, "type": "object", "javaType": 
"org.apache.camel.component.tika.TikaParseOutputFormat", "enum": [ "xml", 
"html", "text", "textMain" ], "deprecated": false, "autowired": false, 
"secret": false, "defaultValue": "xml", "configurationClass": 
"org.apache.camel.component.tika.TikaConfiguration", "configurationField": 
"tikaConfiguration", [...]
+    "tikaParseOutputFormat": { "index": 2, "kind": "parameter", "displayName": 
"Tika Parse Output Format", "group": "producer", "label": "", "required": 
false, "type": "object", "javaType": 
"org.apache.camel.component.tika.TikaParseOutputFormat", "enum": [ "xml", 
"html", "text" ], "deprecated": false, "autowired": false, "secret": false, 
"defaultValue": "xml", "configurationClass": 
"org.apache.camel.component.tika.TikaConfiguration", "configurationField": 
"tikaConfiguration", "descriptio [...]
     "lazyStartProducer": { "index": 3, "kind": "parameter", "displayName": 
"Lazy Start Producer", "group": "producer (advanced)", "label": 
"producer,advanced", "required": false, "type": "boolean", "javaType": 
"boolean", "deprecated": false, "autowired": false, "secret": false, 
"defaultValue": false, "description": "Whether the producer should be started 
lazy (on the first message). By starting lazy you can use this to allow 
CamelContext and routes to startup in situations where a produc [...]
     "tikaConfig": { "index": 4, "kind": "parameter", "displayName": "Tika 
Config", "group": "advanced", "label": "advanced", "required": false, "type": 
"object", "javaType": "org.apache.tika.config.TikaConfig", "deprecated": false, 
"autowired": false, "secret": false, "configurationClass": 
"org.apache.camel.component.tika.TikaConfiguration", "configurationField": 
"tikaConfiguration", "description": "Tika Config" },
     "tikaConfigUri": { "index": 5, "kind": "parameter", "displayName": "Tika 
Config Uri", "group": "advanced", "label": "advanced", "required": false, 
"type": "string", "javaType": "java.lang.String", "deprecated": false, 
"autowired": false, "secret": false, "configurationClass": 
"org.apache.camel.component.tika.TikaConfiguration", "configurationField": 
"tikaConfiguration", "description": "Tika Config Url" }
diff --git 
a/components/camel-tika/src/main/java/org/apache/camel/component/tika/TikaParseOutputFormat.java
 
b/components/camel-tika/src/main/java/org/apache/camel/component/tika/TikaParseOutputFormat.java
index 67665d58f69c..0da0d3564061 100644
--- 
a/components/camel-tika/src/main/java/org/apache/camel/component/tika/TikaParseOutputFormat.java
+++ 
b/components/camel-tika/src/main/java/org/apache/camel/component/tika/TikaParseOutputFormat.java
@@ -23,8 +23,6 @@ package org.apache.camel.component.tika;
  * <li>xml: Returns Parsed Content as XML.</li>
  * <li>html: Returns Parsed Content as HTML.</li>
  * <li>text: Returns Parsed Content as Text.</li>
- * <li>textMain: Uses the <a 
href="http://code.google.com/p/boilerpipe/";>boilerpipe</a> library to 
automatically extract
- * the main content from a web page.</li>
  * </ul>
  *
  */
@@ -32,5 +30,4 @@ public enum TikaParseOutputFormat {
     xml,
     html,
     text,
-    textMain;
 }
diff --git 
a/components/camel-tika/src/main/java/org/apache/camel/component/tika/TikaProducer.java
 
b/components/camel-tika/src/main/java/org/apache/camel/component/tika/TikaProducer.java
index 58a663866767..9328dc47d344 100644
--- 
a/components/camel-tika/src/main/java/org/apache/camel/component/tika/TikaProducer.java
+++ 
b/components/camel-tika/src/main/java/org/apache/camel/component/tika/TikaProducer.java
@@ -46,7 +46,6 @@ import org.apache.tika.parser.ParseContext;
 import org.apache.tika.parser.Parser;
 import org.apache.tika.sax.BodyContentHandler;
 import org.apache.tika.sax.ExpandedTitleContentHandler;
-import org.apache.tika.sax.boilerpipe.BoilerpipeContentHandler;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -150,9 +149,6 @@ public class TikaProducer extends DefaultProducer {
             case text:
                 result = new BodyContentHandler(new 
OutputStreamWriter(outputStream, this.encoding));
                 break;
-            case textMain:
-                result = new BoilerpipeContentHandler(new 
OutputStreamWriter(outputStream, this.encoding));
-                break;
             case html:
                 result = new 
ExpandedTitleContentHandler(getTransformerHandler(outputStream, "html", true));
                 break;
diff --git 
a/docs/user-manual/modules/ROOT/pages/camel-4x-upgrade-guide-4_10.adoc 
b/docs/user-manual/modules/ROOT/pages/camel-4x-upgrade-guide-4_10.adoc
index f571dd9d230e..00c34cffad17 100644
--- a/docs/user-manual/modules/ROOT/pages/camel-4x-upgrade-guide-4_10.adoc
+++ b/docs/user-manual/modules/ROOT/pages/camel-4x-upgrade-guide-4_10.adoc
@@ -4,6 +4,12 @@ This document is for helping you upgrade your Apache Camel 
application
 from Camel 4.x to 4.y. For example, if you are upgrading Camel 4.0 to 4.2, 
then you should follow the guides
 from both 4.0 to 4.1 and 4.1 to 4.2.
 
+== Upgrading from 4.10.7 to 4.10.8
+
+=== camel-tika
+
+Upgraded to Tika v3, and removed `textMain` from `tikaParseOutputFormat` 
option.
+
 == Upgrading from 4.10.2 to 4.10.7
 
 === camel-file / camel-ftp / camel-smb / camel-azure-files
diff --git a/parent/pom.xml b/parent/pom.xml
index 552d4675618f..86b05de9a57b 100644
--- a/parent/pom.xml
+++ b/parent/pom.xml
@@ -487,7 +487,7 @@
         <tahu-version>1.0.13</tahu-version>
         <testcontainers-version>1.20.4</testcontainers-version>
         <thymeleaf-version>3.1.3.RELEASE</thymeleaf-version>
-        <tika-version>2.9.4</tika-version>
+        <tika-version>3.2.3</tika-version>
         <twilio-version>10.6.8</twilio-version>
         <twitter4j-version>4.1.2</twitter4j-version>
         <undertow-version>2.3.20.Final</undertow-version>

Reply via email to