This is an automated email from the ASF dual-hosted git repository.

davsclaus pushed a commit to branch tika
in repository https://gitbox.apache.org/repos/asf/camel.git

commit 0379f6a5ceb246af3aca42bf8d745de0473cf6a3
Author: Claus Ibsen <[email protected]>
AuthorDate: Fri Dec 5 16:08:28 2025 +0100

    CAMEL-22757: camel-tika - Upgrade to tika 3.2.3 from 2.9.4
---
 .../generated/resources/org/apache/camel/catalog/components/tika.json | 2 +-
 components/camel-tika/pom.xml                                         | 2 +-
 .../resources/META-INF/org/apache/camel/component/tika/tika.json      | 2 +-
 .../java/org/apache/camel/component/tika/TikaParseOutputFormat.java   | 3 ---
 .../src/main/java/org/apache/camel/component/tika/TikaProducer.java   | 4 ----
 docs/user-manual/modules/ROOT/pages/camel-4x-upgrade-guide-4_17.adoc  | 4 ++++
 parent/pom.xml                                                        | 2 +-
 7 files changed, 8 insertions(+), 11 deletions(-)

diff --git 
a/catalog/camel-catalog/src/generated/resources/org/apache/camel/catalog/components/tika.json
 
b/catalog/camel-catalog/src/generated/resources/org/apache/camel/catalog/components/tika.json
index a08b39dc0c3f..59b6a408cc5e 100644
--- 
a/catalog/camel-catalog/src/generated/resources/org/apache/camel/catalog/components/tika.json
+++ 
b/catalog/camel-catalog/src/generated/resources/org/apache/camel/catalog/components/tika.json
@@ -30,7 +30,7 @@
   "properties": {
     "operation": { "index": 0, "kind": "path", "displayName": "Operation", 
"group": "producer", "label": "", "required": true, "type": "enum", "javaType": 
"org.apache.camel.component.tika.TikaOperation", "enum": [ "parse", "detect" ], 
"deprecated": false, "deprecationNote": "", "autowired": false, "secret": 
false, "configurationClass": 
"org.apache.camel.component.tika.TikaConfiguration", "configurationField": 
"tikaConfiguration", "description": "Operation type" },
     "tikaParseOutputEncoding": { "index": 1, "kind": "parameter", 
"displayName": "Tika Parse Output Encoding", "group": "producer", "label": "", 
"required": false, "type": "string", "javaType": "java.lang.String", 
"deprecated": false, "autowired": false, "secret": false, "configurationClass": 
"org.apache.camel.component.tika.TikaConfiguration", "configurationField": 
"tikaConfiguration", "description": "Tika Parse Output Encoding" },
-    "tikaParseOutputFormat": { "index": 2, "kind": "parameter", "displayName": 
"Tika Parse Output Format", "group": "producer", "label": "", "required": 
false, "type": "enum", "javaType": 
"org.apache.camel.component.tika.TikaParseOutputFormat", "enum": [ "xml", 
"html", "text", "textMain" ], "deprecated": false, "autowired": false, 
"secret": false, "defaultValue": "xml", "configurationClass": 
"org.apache.camel.component.tika.TikaConfiguration", "configurationField": 
"tikaConfiguration", " [...]
+    "tikaParseOutputFormat": { "index": 2, "kind": "parameter", "displayName": 
"Tika Parse Output Format", "group": "producer", "label": "", "required": 
false, "type": "enum", "javaType": 
"org.apache.camel.component.tika.TikaParseOutputFormat", "enum": [ "xml", 
"html", "text" ], "deprecated": false, "autowired": false, "secret": false, 
"defaultValue": "xml", "configurationClass": 
"org.apache.camel.component.tika.TikaConfiguration", "configurationField": 
"tikaConfiguration", "description" [...]
     "lazyStartProducer": { "index": 3, "kind": "parameter", "displayName": 
"Lazy Start Producer", "group": "producer (advanced)", "label": 
"producer,advanced", "required": false, "type": "boolean", "javaType": 
"boolean", "deprecated": false, "autowired": false, "secret": false, 
"defaultValue": false, "description": "Whether the producer should be started 
lazy (on the first message). By starting lazy you can use this to allow 
CamelContext and routes to startup in situations where a produc [...]
     "tikaConfig": { "index": 4, "kind": "parameter", "displayName": "Tika 
Config", "group": "advanced", "label": "advanced", "required": false, "type": 
"object", "javaType": "org.apache.tika.config.TikaConfig", "deprecated": false, 
"autowired": false, "secret": false, "configurationClass": 
"org.apache.camel.component.tika.TikaConfiguration", "configurationField": 
"tikaConfiguration", "description": "Tika Config" },
     "tikaConfigUri": { "index": 5, "kind": "parameter", "displayName": "Tika 
Config Uri", "group": "advanced", "label": "advanced", "required": false, 
"type": "string", "javaType": "java.lang.String", "deprecated": false, 
"autowired": false, "secret": false, "configurationClass": 
"org.apache.camel.component.tika.TikaConfiguration", "configurationField": 
"tikaConfiguration", "description": "Tika Config Url" }
diff --git a/components/camel-tika/pom.xml b/components/camel-tika/pom.xml
index a2a5f563b480..4e7c735ec524 100644
--- a/components/camel-tika/pom.xml
+++ b/components/camel-tika/pom.xml
@@ -48,7 +48,7 @@
         </dependency>
         <dependency>
             <groupId>org.apache.tika</groupId>
-            <artifactId>tika-parser-html-commons</artifactId>
+            <artifactId>tika-parser-html-module</artifactId>
             <version>${tika-version}</version>
         </dependency>
         <dependency>
diff --git 
a/components/camel-tika/src/generated/resources/META-INF/org/apache/camel/component/tika/tika.json
 
b/components/camel-tika/src/generated/resources/META-INF/org/apache/camel/component/tika/tika.json
index a08b39dc0c3f..59b6a408cc5e 100644
--- 
a/components/camel-tika/src/generated/resources/META-INF/org/apache/camel/component/tika/tika.json
+++ 
b/components/camel-tika/src/generated/resources/META-INF/org/apache/camel/component/tika/tika.json
@@ -30,7 +30,7 @@
   "properties": {
     "operation": { "index": 0, "kind": "path", "displayName": "Operation", 
"group": "producer", "label": "", "required": true, "type": "enum", "javaType": 
"org.apache.camel.component.tika.TikaOperation", "enum": [ "parse", "detect" ], 
"deprecated": false, "deprecationNote": "", "autowired": false, "secret": 
false, "configurationClass": 
"org.apache.camel.component.tika.TikaConfiguration", "configurationField": 
"tikaConfiguration", "description": "Operation type" },
     "tikaParseOutputEncoding": { "index": 1, "kind": "parameter", 
"displayName": "Tika Parse Output Encoding", "group": "producer", "label": "", 
"required": false, "type": "string", "javaType": "java.lang.String", 
"deprecated": false, "autowired": false, "secret": false, "configurationClass": 
"org.apache.camel.component.tika.TikaConfiguration", "configurationField": 
"tikaConfiguration", "description": "Tika Parse Output Encoding" },
-    "tikaParseOutputFormat": { "index": 2, "kind": "parameter", "displayName": 
"Tika Parse Output Format", "group": "producer", "label": "", "required": 
false, "type": "enum", "javaType": 
"org.apache.camel.component.tika.TikaParseOutputFormat", "enum": [ "xml", 
"html", "text", "textMain" ], "deprecated": false, "autowired": false, 
"secret": false, "defaultValue": "xml", "configurationClass": 
"org.apache.camel.component.tika.TikaConfiguration", "configurationField": 
"tikaConfiguration", " [...]
+    "tikaParseOutputFormat": { "index": 2, "kind": "parameter", "displayName": 
"Tika Parse Output Format", "group": "producer", "label": "", "required": 
false, "type": "enum", "javaType": 
"org.apache.camel.component.tika.TikaParseOutputFormat", "enum": [ "xml", 
"html", "text" ], "deprecated": false, "autowired": false, "secret": false, 
"defaultValue": "xml", "configurationClass": 
"org.apache.camel.component.tika.TikaConfiguration", "configurationField": 
"tikaConfiguration", "description" [...]
     "lazyStartProducer": { "index": 3, "kind": "parameter", "displayName": 
"Lazy Start Producer", "group": "producer (advanced)", "label": 
"producer,advanced", "required": false, "type": "boolean", "javaType": 
"boolean", "deprecated": false, "autowired": false, "secret": false, 
"defaultValue": false, "description": "Whether the producer should be started 
lazy (on the first message). By starting lazy you can use this to allow 
CamelContext and routes to startup in situations where a produc [...]
     "tikaConfig": { "index": 4, "kind": "parameter", "displayName": "Tika 
Config", "group": "advanced", "label": "advanced", "required": false, "type": 
"object", "javaType": "org.apache.tika.config.TikaConfig", "deprecated": false, 
"autowired": false, "secret": false, "configurationClass": 
"org.apache.camel.component.tika.TikaConfiguration", "configurationField": 
"tikaConfiguration", "description": "Tika Config" },
     "tikaConfigUri": { "index": 5, "kind": "parameter", "displayName": "Tika 
Config Uri", "group": "advanced", "label": "advanced", "required": false, 
"type": "string", "javaType": "java.lang.String", "deprecated": false, 
"autowired": false, "secret": false, "configurationClass": 
"org.apache.camel.component.tika.TikaConfiguration", "configurationField": 
"tikaConfiguration", "description": "Tika Config Url" }
diff --git 
a/components/camel-tika/src/main/java/org/apache/camel/component/tika/TikaParseOutputFormat.java
 
b/components/camel-tika/src/main/java/org/apache/camel/component/tika/TikaParseOutputFormat.java
index 67665d58f69c..0da0d3564061 100644
--- 
a/components/camel-tika/src/main/java/org/apache/camel/component/tika/TikaParseOutputFormat.java
+++ 
b/components/camel-tika/src/main/java/org/apache/camel/component/tika/TikaParseOutputFormat.java
@@ -23,8 +23,6 @@ package org.apache.camel.component.tika;
  * <li>xml: Returns Parsed Content as XML.</li>
  * <li>html: Returns Parsed Content as HTML.</li>
  * <li>text: Returns Parsed Content as Text.</li>
- * <li>textMain: Uses the <a 
href="http://code.google.com/p/boilerpipe/";>boilerpipe</a> library to 
automatically extract
- * the main content from a web page.</li>
  * </ul>
  *
  */
@@ -32,5 +30,4 @@ public enum TikaParseOutputFormat {
     xml,
     html,
     text,
-    textMain;
 }
diff --git 
a/components/camel-tika/src/main/java/org/apache/camel/component/tika/TikaProducer.java
 
b/components/camel-tika/src/main/java/org/apache/camel/component/tika/TikaProducer.java
index 58a663866767..9328dc47d344 100644
--- 
a/components/camel-tika/src/main/java/org/apache/camel/component/tika/TikaProducer.java
+++ 
b/components/camel-tika/src/main/java/org/apache/camel/component/tika/TikaProducer.java
@@ -46,7 +46,6 @@ import org.apache.tika.parser.ParseContext;
 import org.apache.tika.parser.Parser;
 import org.apache.tika.sax.BodyContentHandler;
 import org.apache.tika.sax.ExpandedTitleContentHandler;
-import org.apache.tika.sax.boilerpipe.BoilerpipeContentHandler;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -150,9 +149,6 @@ public class TikaProducer extends DefaultProducer {
             case text:
                 result = new BodyContentHandler(new 
OutputStreamWriter(outputStream, this.encoding));
                 break;
-            case textMain:
-                result = new BoilerpipeContentHandler(new 
OutputStreamWriter(outputStream, this.encoding));
-                break;
             case html:
                 result = new 
ExpandedTitleContentHandler(getTransformerHandler(outputStream, "html", true));
                 break;
diff --git 
a/docs/user-manual/modules/ROOT/pages/camel-4x-upgrade-guide-4_17.adoc 
b/docs/user-manual/modules/ROOT/pages/camel-4x-upgrade-guide-4_17.adoc
index 863a7ae6a614..2869feef0c07 100644
--- a/docs/user-manual/modules/ROOT/pages/camel-4x-upgrade-guide-4_17.adoc
+++ b/docs/user-manual/modules/ROOT/pages/camel-4x-upgrade-guide-4_17.adoc
@@ -53,6 +53,10 @@ Virtual threads are enabled by default for embedded mode 
with `camel-infinispan-
 If you choose to listen for cache events on the `infinispan-embedded` 
consumer, it is possible that events are not processed in a predicatable order.
 Disabling virtual threads by setting system property 
`org.infinispan.threads.virtual` to `false` leads to more predicatable event 
order processing.
 
+=== camel-tika
+
+Upgraded to Tika v3, and removed `textMain` from `tikaParseOutputFormat` 
option.
+
 === Component deprecation
 
 The `camel-stomp` component is deprecated.
diff --git a/parent/pom.xml b/parent/pom.xml
index d19b7c4ee24d..5208c8ad54b6 100644
--- a/parent/pom.xml
+++ b/parent/pom.xml
@@ -520,7 +520,7 @@
         <tahu-version>1.0.17</tahu-version>
         <testcontainers-version>2.0.2</testcontainers-version>
         <thymeleaf-version>3.1.3.RELEASE</thymeleaf-version>
-        <tika-version>2.9.4</tika-version>
+        <tika-version>3.2.3</tika-version>
         <twilio-version>11.2.0</twilio-version>
         <twitter4j-version>4.1.2</twitter4j-version>
         <undertow-version>2.3.20.Final</undertow-version>

Reply via email to