This is an automated email from the ASF dual-hosted git repository.

davsclaus pushed a commit to branch camel-4.14.x
in repository https://gitbox.apache.org/repos/asf/camel.git


The following commit(s) were added to refs/heads/camel-4.14.x by this push:
     new 3c088e35a70d CAMEL-22757: camel-tika - Upgrade to tika 3.2.3 from 
2.9.4 (#20246)
3c088e35a70d is described below

commit 3c088e35a70dc5902ca8b58f7879dab49c603184
Author: Claus Ibsen <[email protected]>
AuthorDate: Fri Dec 5 19:02:49 2025 +0100

    CAMEL-22757: camel-tika - Upgrade to tika 3.2.3 from 2.9.4 (#20246)
---
 .../resources/org/apache/camel/catalog/components/tika.json         | 2 +-
 components/camel-tika/pom.xml                                       | 2 +-
 .../resources/META-INF/org/apache/camel/component/tika/tika.json    | 2 +-
 .../java/org/apache/camel/component/tika/TikaParseOutputFormat.java | 3 ---
 .../src/main/java/org/apache/camel/component/tika/TikaProducer.java | 4 ----
 .../user-manual/modules/ROOT/pages/camel-4x-upgrade-guide-4_14.adoc | 6 ++++++
 parent/pom.xml                                                      | 2 +-
 7 files changed, 10 insertions(+), 11 deletions(-)

diff --git 
a/catalog/camel-catalog/src/generated/resources/org/apache/camel/catalog/components/tika.json
 
b/catalog/camel-catalog/src/generated/resources/org/apache/camel/catalog/components/tika.json
index 76df687e1bcd..0906fa1e49ff 100644
--- 
a/catalog/camel-catalog/src/generated/resources/org/apache/camel/catalog/components/tika.json
+++ 
b/catalog/camel-catalog/src/generated/resources/org/apache/camel/catalog/components/tika.json
@@ -30,7 +30,7 @@
   "properties": {
     "operation": { "index": 0, "kind": "path", "displayName": "Operation", 
"group": "producer", "label": "", "required": true, "type": "enum", "javaType": 
"org.apache.camel.component.tika.TikaOperation", "enum": [ "parse", "detect" ], 
"deprecated": false, "deprecationNote": "", "autowired": false, "secret": 
false, "configurationClass": 
"org.apache.camel.component.tika.TikaConfiguration", "configurationField": 
"tikaConfiguration", "description": "Operation type" },
     "tikaParseOutputEncoding": { "index": 1, "kind": "parameter", 
"displayName": "Tika Parse Output Encoding", "group": "producer", "label": "", 
"required": false, "type": "string", "javaType": "java.lang.String", 
"deprecated": false, "autowired": false, "secret": false, "configurationClass": 
"org.apache.camel.component.tika.TikaConfiguration", "configurationField": 
"tikaConfiguration", "description": "Tika Parse Output Encoding" },
-    "tikaParseOutputFormat": { "index": 2, "kind": "parameter", "displayName": 
"Tika Parse Output Format", "group": "producer", "label": "", "required": 
false, "type": "enum", "javaType": 
"org.apache.camel.component.tika.TikaParseOutputFormat", "enum": [ "xml", 
"html", "text", "textMain" ], "deprecated": false, "autowired": false, 
"secret": false, "defaultValue": "xml", "configurationClass": 
"org.apache.camel.component.tika.TikaConfiguration", "configurationField": 
"tikaConfiguration", " [...]
+    "tikaParseOutputFormat": { "index": 2, "kind": "parameter", "displayName": 
"Tika Parse Output Format", "group": "producer", "label": "", "required": 
false, "type": "enum", "javaType": 
"org.apache.camel.component.tika.TikaParseOutputFormat", "enum": [ "xml", 
"html", "text" ], "deprecated": false, "autowired": false, "secret": false, 
"defaultValue": "xml", "configurationClass": 
"org.apache.camel.component.tika.TikaConfiguration", "configurationField": 
"tikaConfiguration", "description" [...]
     "lazyStartProducer": { "index": 3, "kind": "parameter", "displayName": 
"Lazy Start Producer", "group": "producer (advanced)", "label": 
"producer,advanced", "required": false, "type": "boolean", "javaType": 
"boolean", "deprecated": false, "autowired": false, "secret": false, 
"defaultValue": false, "description": "Whether the producer should be started 
lazy (on the first message). By starting lazy you can use this to allow 
CamelContext and routes to startup in situations where a produc [...]
     "tikaConfig": { "index": 4, "kind": "parameter", "displayName": "Tika 
Config", "group": "advanced", "label": "advanced", "required": false, "type": 
"object", "javaType": "org.apache.tika.config.TikaConfig", "deprecated": false, 
"autowired": false, "secret": false, "configurationClass": 
"org.apache.camel.component.tika.TikaConfiguration", "configurationField": 
"tikaConfiguration", "description": "Tika Config" },
     "tikaConfigUri": { "index": 5, "kind": "parameter", "displayName": "Tika 
Config Uri", "group": "advanced", "label": "advanced", "required": false, 
"type": "string", "javaType": "java.lang.String", "deprecated": false, 
"autowired": false, "secret": false, "configurationClass": 
"org.apache.camel.component.tika.TikaConfiguration", "configurationField": 
"tikaConfiguration", "description": "Tika Config Url" }
diff --git a/components/camel-tika/pom.xml b/components/camel-tika/pom.xml
index a9da3fc205e9..5935555b5cf4 100644
--- a/components/camel-tika/pom.xml
+++ b/components/camel-tika/pom.xml
@@ -48,7 +48,7 @@
         </dependency>
         <dependency>
             <groupId>org.apache.tika</groupId>
-            <artifactId>tika-parser-html-commons</artifactId>
+            <artifactId>tika-parser-html-module</artifactId>
             <version>${tika-version}</version>
         </dependency>
         <dependency>
diff --git 
a/components/camel-tika/src/generated/resources/META-INF/org/apache/camel/component/tika/tika.json
 
b/components/camel-tika/src/generated/resources/META-INF/org/apache/camel/component/tika/tika.json
index 76df687e1bcd..0906fa1e49ff 100644
--- 
a/components/camel-tika/src/generated/resources/META-INF/org/apache/camel/component/tika/tika.json
+++ 
b/components/camel-tika/src/generated/resources/META-INF/org/apache/camel/component/tika/tika.json
@@ -30,7 +30,7 @@
   "properties": {
     "operation": { "index": 0, "kind": "path", "displayName": "Operation", 
"group": "producer", "label": "", "required": true, "type": "enum", "javaType": 
"org.apache.camel.component.tika.TikaOperation", "enum": [ "parse", "detect" ], 
"deprecated": false, "deprecationNote": "", "autowired": false, "secret": 
false, "configurationClass": 
"org.apache.camel.component.tika.TikaConfiguration", "configurationField": 
"tikaConfiguration", "description": "Operation type" },
     "tikaParseOutputEncoding": { "index": 1, "kind": "parameter", 
"displayName": "Tika Parse Output Encoding", "group": "producer", "label": "", 
"required": false, "type": "string", "javaType": "java.lang.String", 
"deprecated": false, "autowired": false, "secret": false, "configurationClass": 
"org.apache.camel.component.tika.TikaConfiguration", "configurationField": 
"tikaConfiguration", "description": "Tika Parse Output Encoding" },
-    "tikaParseOutputFormat": { "index": 2, "kind": "parameter", "displayName": 
"Tika Parse Output Format", "group": "producer", "label": "", "required": 
false, "type": "enum", "javaType": 
"org.apache.camel.component.tika.TikaParseOutputFormat", "enum": [ "xml", 
"html", "text", "textMain" ], "deprecated": false, "autowired": false, 
"secret": false, "defaultValue": "xml", "configurationClass": 
"org.apache.camel.component.tika.TikaConfiguration", "configurationField": 
"tikaConfiguration", " [...]
+    "tikaParseOutputFormat": { "index": 2, "kind": "parameter", "displayName": 
"Tika Parse Output Format", "group": "producer", "label": "", "required": 
false, "type": "enum", "javaType": 
"org.apache.camel.component.tika.TikaParseOutputFormat", "enum": [ "xml", 
"html", "text" ], "deprecated": false, "autowired": false, "secret": false, 
"defaultValue": "xml", "configurationClass": 
"org.apache.camel.component.tika.TikaConfiguration", "configurationField": 
"tikaConfiguration", "description" [...]
     "lazyStartProducer": { "index": 3, "kind": "parameter", "displayName": 
"Lazy Start Producer", "group": "producer (advanced)", "label": 
"producer,advanced", "required": false, "type": "boolean", "javaType": 
"boolean", "deprecated": false, "autowired": false, "secret": false, 
"defaultValue": false, "description": "Whether the producer should be started 
lazy (on the first message). By starting lazy you can use this to allow 
CamelContext and routes to startup in situations where a produc [...]
     "tikaConfig": { "index": 4, "kind": "parameter", "displayName": "Tika 
Config", "group": "advanced", "label": "advanced", "required": false, "type": 
"object", "javaType": "org.apache.tika.config.TikaConfig", "deprecated": false, 
"autowired": false, "secret": false, "configurationClass": 
"org.apache.camel.component.tika.TikaConfiguration", "configurationField": 
"tikaConfiguration", "description": "Tika Config" },
     "tikaConfigUri": { "index": 5, "kind": "parameter", "displayName": "Tika 
Config Uri", "group": "advanced", "label": "advanced", "required": false, 
"type": "string", "javaType": "java.lang.String", "deprecated": false, 
"autowired": false, "secret": false, "configurationClass": 
"org.apache.camel.component.tika.TikaConfiguration", "configurationField": 
"tikaConfiguration", "description": "Tika Config Url" }
diff --git 
a/components/camel-tika/src/main/java/org/apache/camel/component/tika/TikaParseOutputFormat.java
 
b/components/camel-tika/src/main/java/org/apache/camel/component/tika/TikaParseOutputFormat.java
index 67665d58f69c..0da0d3564061 100644
--- 
a/components/camel-tika/src/main/java/org/apache/camel/component/tika/TikaParseOutputFormat.java
+++ 
b/components/camel-tika/src/main/java/org/apache/camel/component/tika/TikaParseOutputFormat.java
@@ -23,8 +23,6 @@ package org.apache.camel.component.tika;
  * <li>xml: Returns Parsed Content as XML.</li>
  * <li>html: Returns Parsed Content as HTML.</li>
  * <li>text: Returns Parsed Content as Text.</li>
- * <li>textMain: Uses the <a 
href="http://code.google.com/p/boilerpipe/";>boilerpipe</a> library to 
automatically extract
- * the main content from a web page.</li>
  * </ul>
  *
  */
@@ -32,5 +30,4 @@ public enum TikaParseOutputFormat {
     xml,
     html,
     text,
-    textMain;
 }
diff --git 
a/components/camel-tika/src/main/java/org/apache/camel/component/tika/TikaProducer.java
 
b/components/camel-tika/src/main/java/org/apache/camel/component/tika/TikaProducer.java
index 58a663866767..9328dc47d344 100644
--- 
a/components/camel-tika/src/main/java/org/apache/camel/component/tika/TikaProducer.java
+++ 
b/components/camel-tika/src/main/java/org/apache/camel/component/tika/TikaProducer.java
@@ -46,7 +46,6 @@ import org.apache.tika.parser.ParseContext;
 import org.apache.tika.parser.Parser;
 import org.apache.tika.sax.BodyContentHandler;
 import org.apache.tika.sax.ExpandedTitleContentHandler;
-import org.apache.tika.sax.boilerpipe.BoilerpipeContentHandler;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -150,9 +149,6 @@ public class TikaProducer extends DefaultProducer {
             case text:
                 result = new BodyContentHandler(new 
OutputStreamWriter(outputStream, this.encoding));
                 break;
-            case textMain:
-                result = new BoilerpipeContentHandler(new 
OutputStreamWriter(outputStream, this.encoding));
-                break;
             case html:
                 result = new 
ExpandedTitleContentHandler(getTransformerHandler(outputStream, "html", true));
                 break;
diff --git 
a/docs/user-manual/modules/ROOT/pages/camel-4x-upgrade-guide-4_14.adoc 
b/docs/user-manual/modules/ROOT/pages/camel-4x-upgrade-guide-4_14.adoc
index 7ad3b5b335a9..c2c1b2ae321e 100644
--- a/docs/user-manual/modules/ROOT/pages/camel-4x-upgrade-guide-4_14.adoc
+++ b/docs/user-manual/modules/ROOT/pages/camel-4x-upgrade-guide-4_14.adoc
@@ -4,6 +4,12 @@ This document is for helping you upgrade your Apache Camel 
application
 from Camel 4.x to 4.y. For example, if you are upgrading Camel 4.0 to 4.2, 
then you should follow the guides
 from both 4.0 to 4.1 and 4.1 to 4.2.
 
+== Upgrading from 4.14.2 to 4.14.3
+
+=== camel-tika
+
+Upgraded to Tika v3, and removed `textMain` from `tikaParseOutputFormat` 
option.
+
 == Upgrading from 4.14.1 to 4.14.2
 
 === camel-kamelet
diff --git a/parent/pom.xml b/parent/pom.xml
index 8b8a720c4215..c8a779315c25 100644
--- a/parent/pom.xml
+++ b/parent/pom.xml
@@ -503,7 +503,7 @@
         <tahu-version>1.0.14</tahu-version>
         <testcontainers-version>1.21.3</testcontainers-version>
         <thymeleaf-version>3.1.3.RELEASE</thymeleaf-version>
-        <tika-version>2.9.4</tika-version>
+        <tika-version>3.2.3</tika-version>
         <twilio-version>10.9.2</twilio-version>
         <twitter4j-version>4.1.2</twitter4j-version>
         <undertow-version>2.3.20.Final</undertow-version>

Reply via email to