This is an automated email from the ASF dual-hosted git repository.
fmariani pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/camel.git
The following commit(s) were added to refs/heads/main by this push:
new a24e4497b5e0 Expose docling serve chunking mechanism
a24e4497b5e0 is described below
commit a24e4497b5e0fd0526cb7a497697270f91cd0a7d
Author: Croway <[email protected]>
AuthorDate: Mon Feb 23 17:07:48 2026 +0100
Expose docling serve chunking mechanism
---
.../apache/camel/catalog/components/docling.json | 43 +++--
.../docling/DoclingComponentConfigurer.java | 30 +++
.../docling/DoclingConfigurationConfigurer.java | 30 +++
.../docling/DoclingEndpointConfigurer.java | 30 +++
.../docling/DoclingEndpointUriFactory.java | 7 +-
.../apache/camel/component/docling/docling.json | 43 +++--
.../src/main/docs/docling-component.adoc | 183 +++++++++++++++++-
.../component/docling/DoclingConfiguration.java | 62 ++++++-
.../camel/component/docling/DoclingHeaders.java | 9 +
.../camel/component/docling/DoclingOperations.java | 12 +-
.../camel/component/docling/DoclingProducer.java | 135 ++++++++++++++
.../component/docling/integration/ChunkingIT.java | 206 +++++++++++++++++++++
.../src/test/resources/multi_chapter_lorem.pdf | 150 +++++++++++++++
.../dsl/DoclingComponentBuilderFactory.java | 87 +++++++++
.../dsl/DoclingEndpointBuilderFactory.java | 174 +++++++++++++++++
15 files changed, 1167 insertions(+), 34 deletions(-)
diff --git
a/catalog/camel-catalog/src/generated/resources/org/apache/camel/catalog/components/docling.json
b/catalog/camel-catalog/src/generated/resources/org/apache/camel/catalog/components/docling.json
index ba8c86f7f589..1366dbfdaf55 100644
---
a/catalog/camel-catalog/src/generated/resources/org/apache/camel/catalog/components/docling.json
+++
b/catalog/camel-catalog/src/generated/resources/org/apache/camel/catalog/components/docling.json
@@ -31,7 +31,7 @@
"includeLayoutInfo": { "index": 4, "kind": "property", "displayName":
"Include Layout Info", "group": "producer", "label": "", "required": false,
"type": "boolean", "javaType": "boolean", "deprecated": false,
"deprecationNote": "", "autowired": false, "secret": false, "defaultValue":
false, "configurationClass":
"org.apache.camel.component.docling.DoclingConfiguration",
"configurationField": "configuration", "description": "Show layout information
with bounding boxes" },
"lazyStartProducer": { "index": 5, "kind": "property", "displayName":
"Lazy Start Producer", "group": "producer", "label": "producer", "required":
false, "type": "boolean", "javaType": "boolean", "deprecated": false,
"autowired": false, "secret": false, "defaultValue": false, "description":
"Whether the producer should be started lazy (on the first message). By
starting lazy you can use this to allow CamelContext and routes to startup in
situations where a producer may otherwise fail [...]
"ocrLanguage": { "index": 6, "kind": "property", "displayName": "Ocr
Language", "group": "producer", "label": "", "required": false, "type":
"string", "javaType": "java.lang.String", "deprecated": false,
"deprecationNote": "", "autowired": false, "secret": false, "defaultValue":
"en", "configurationClass":
"org.apache.camel.component.docling.DoclingConfiguration",
"configurationField": "configuration", "description": "Language code for OCR
processing" },
- "operation": { "index": 7, "kind": "property", "displayName": "Operation",
"group": "producer", "label": "", "required": true, "type": "enum", "javaType":
"org.apache.camel.component.docling.DoclingOperations", "enum": [
"CONVERT_TO_MARKDOWN", "CONVERT_TO_HTML", "CONVERT_TO_JSON", "EXTRACT_TEXT",
"EXTRACT_STRUCTURED_DATA", "SUBMIT_ASYNC_CONVERSION",
"CHECK_CONVERSION_STATUS", "BATCH_CONVERT_TO_MARKDOWN",
"BATCH_CONVERT_TO_HTML", "BATCH_CONVERT_TO_JSON", "BATCH_EXTRACT_TEXT",
"BATCH_E [...]
+ "operation": { "index": 7, "kind": "property", "displayName": "Operation",
"group": "producer", "label": "", "required": true, "type": "enum", "javaType":
"org.apache.camel.component.docling.DoclingOperations", "enum": [
"CONVERT_TO_MARKDOWN", "CONVERT_TO_HTML", "CONVERT_TO_JSON", "EXTRACT_TEXT",
"EXTRACT_STRUCTURED_DATA", "SUBMIT_ASYNC_CONVERSION",
"CHECK_CONVERSION_STATUS", "BATCH_CONVERT_TO_MARKDOWN",
"BATCH_CONVERT_TO_HTML", "BATCH_CONVERT_TO_JSON", "BATCH_EXTRACT_TEXT",
"BATCH_E [...]
"outputFormat": { "index": 8, "kind": "property", "displayName": "Output
Format", "group": "producer", "label": "", "required": false, "type": "string",
"javaType": "java.lang.String", "deprecated": false, "deprecationNote": "",
"autowired": false, "secret": false, "defaultValue": "markdown",
"configurationClass":
"org.apache.camel.component.docling.DoclingConfiguration",
"configurationField": "configuration", "description": "Output format for
document conversion" },
"useDoclingServe": { "index": 9, "kind": "property", "displayName": "Use
Docling Serve", "group": "producer", "label": "", "required": false, "type":
"boolean", "javaType": "boolean", "deprecated": false, "deprecationNote": "",
"autowired": false, "secret": false, "defaultValue": false,
"configurationClass":
"org.apache.camel.component.docling.DoclingConfiguration",
"configurationField": "configuration", "description": "Use docling-serve API
instead of CLI command" },
"abortOnError": { "index": 10, "kind": "property", "displayName": "Abort
On Error", "group": "advanced", "label": "advanced", "required": false, "type":
"boolean", "javaType": "java.lang.Boolean", "deprecated": false,
"deprecationNote": "", "autowired": false, "secret": false, "defaultValue":
false, "configurationClass":
"org.apache.camel.component.docling.DoclingConfiguration",
"configurationField": "configuration", "description": "Abort processing on
error" },
@@ -64,12 +64,17 @@
"batchSize": { "index": 37, "kind": "property", "displayName": "Batch
Size", "group": "batch", "label": "batch", "required": false, "type":
"integer", "javaType": "int", "deprecated": false, "deprecationNote": "",
"autowired": false, "secret": false, "defaultValue": 10, "configurationClass":
"org.apache.camel.component.docling.DoclingConfiguration",
"configurationField": "configuration", "description": "Maximum number of
documents to process in a single batch (batch operations only)" },
"batchTimeout": { "index": 38, "kind": "property", "displayName": "Batch
Timeout", "group": "batch", "label": "batch", "required": false, "type":
"integer", "javaType": "long", "deprecated": false, "deprecationNote": "",
"autowired": false, "secret": false, "defaultValue": 300000,
"configurationClass":
"org.apache.camel.component.docling.DoclingConfiguration",
"configurationField": "configuration", "description": "Maximum time to wait for
batch completion in milliseconds" },
"splitBatchResults": { "index": 39, "kind": "property", "displayName":
"Split Batch Results", "group": "batch", "label": "batch", "required": false,
"type": "boolean", "javaType": "boolean", "deprecated": false,
"deprecationNote": "", "autowired": false, "secret": false, "defaultValue":
false, "configurationClass":
"org.apache.camel.component.docling.DoclingConfiguration",
"configurationField": "configuration", "description": "Split batch results into
individual exchanges (one per do [...]
- "includeMetadataInHeaders": { "index": 40, "kind": "property",
"displayName": "Include Metadata In Headers", "group": "metadata", "label":
"metadata", "required": false, "type": "boolean", "javaType": "boolean",
"deprecated": false, "deprecationNote": "", "autowired": false, "secret":
false, "defaultValue": true, "configurationClass":
"org.apache.camel.component.docling.DoclingConfiguration",
"configurationField": "configuration", "description": "Include metadata in
message headers w [...]
- "includeRawMetadata": { "index": 41, "kind": "property", "displayName":
"Include Raw Metadata", "group": "metadata", "label": "metadata", "required":
false, "type": "boolean", "javaType": "boolean", "deprecated": false,
"deprecationNote": "", "autowired": false, "secret": false, "defaultValue":
false, "configurationClass":
"org.apache.camel.component.docling.DoclingConfiguration",
"configurationField": "configuration", "description": "Include raw metadata as
returned by the parser" },
- "apiKeyHeader": { "index": 42, "kind": "property", "displayName": "Api Key
Header", "group": "security", "label": "security", "required": false, "type":
"string", "javaType": "java.lang.String", "deprecated": false,
"deprecationNote": "", "autowired": false, "secret": false, "defaultValue":
"X-API-Key", "configurationClass":
"org.apache.camel.component.docling.DoclingConfiguration",
"configurationField": "configuration", "description": "Header name for API key
authentication" },
- "authenticationScheme": { "index": 43, "kind": "property", "displayName":
"Authentication Scheme", "group": "security", "label": "security", "required":
false, "type": "enum", "javaType":
"org.apache.camel.component.docling.AuthenticationScheme", "enum": [ "NONE",
"BEARER", "API_KEY" ], "deprecated": false, "deprecationNote": "", "autowired":
false, "secret": false, "defaultValue": "NONE", "configurationClass":
"org.apache.camel.component.docling.DoclingConfiguration", "configuration [...]
- "authenticationToken": { "index": 44, "kind": "property", "displayName":
"Authentication Token", "group": "security", "label": "security", "required":
false, "type": "string", "javaType": "java.lang.String", "deprecated": false,
"deprecationNote": "", "autowired": false, "secret": true,
"configurationClass":
"org.apache.camel.component.docling.DoclingConfiguration",
"configurationField": "configuration", "description": "Authentication token for
docling-serve API (Bearer token or API [...]
- "maxFileSize": { "index": 45, "kind": "property", "displayName": "Max File
Size", "group": "security", "label": "security", "required": false, "type":
"integer", "javaType": "long", "deprecated": false, "deprecationNote": "",
"autowired": false, "secret": false, "defaultValue": 52428800,
"configurationClass":
"org.apache.camel.component.docling.DoclingConfiguration",
"configurationField": "configuration", "description": "Maximum file size in
bytes for processing" }
+ "chunkingIncludeRawText": { "index": 40, "kind": "property",
"displayName": "Chunking Include Raw Text", "group": "chunking", "label":
"chunking", "required": false, "type": "boolean", "javaType":
"java.lang.Boolean", "deprecated": false, "deprecationNote": "", "autowired":
false, "secret": false, "defaultValue": false, "configurationClass":
"org.apache.camel.component.docling.DoclingConfiguration",
"configurationField": "configuration", "description": "Include raw text in
chunk output" },
+ "chunkingMaxTokens": { "index": 41, "kind": "property", "displayName":
"Chunking Max Tokens", "group": "chunking", "label": "chunking", "required":
false, "type": "integer", "javaType": "java.lang.Integer", "deprecated": false,
"deprecationNote": "", "autowired": false, "secret": false,
"configurationClass":
"org.apache.camel.component.docling.DoclingConfiguration",
"configurationField": "configuration", "description": "Maximum number of tokens
per chunk for hybrid chunking" },
+ "chunkingMergePeers": { "index": 42, "kind": "property", "displayName":
"Chunking Merge Peers", "group": "chunking", "label": "chunking", "required":
false, "type": "boolean", "javaType": "java.lang.Boolean", "deprecated": false,
"deprecationNote": "", "autowired": false, "secret": false, "defaultValue":
true, "configurationClass":
"org.apache.camel.component.docling.DoclingConfiguration",
"configurationField": "configuration", "description": "Whether to merge peer
chunks in hybrid c [...]
+ "chunkingTokenizer": { "index": 43, "kind": "property", "displayName":
"Chunking Tokenizer", "group": "chunking", "label": "chunking", "required":
false, "type": "string", "javaType": "java.lang.String", "deprecated": false,
"deprecationNote": "", "autowired": false, "secret": false,
"configurationClass":
"org.apache.camel.component.docling.DoclingConfiguration",
"configurationField": "configuration", "description": "Tokenizer model for
hybrid chunking (e.g. sentence-transformers\/al [...]
+ "chunkingUseMarkdownTables": { "index": 44, "kind": "property",
"displayName": "Chunking Use Markdown Tables", "group": "chunking", "label":
"chunking", "required": false, "type": "boolean", "javaType":
"java.lang.Boolean", "deprecated": false, "deprecationNote": "", "autowired":
false, "secret": false, "defaultValue": false, "configurationClass":
"org.apache.camel.component.docling.DoclingConfiguration",
"configurationField": "configuration", "description": "Use markdown format for
[...]
+ "includeMetadataInHeaders": { "index": 45, "kind": "property",
"displayName": "Include Metadata In Headers", "group": "metadata", "label":
"metadata", "required": false, "type": "boolean", "javaType": "boolean",
"deprecated": false, "deprecationNote": "", "autowired": false, "secret":
false, "defaultValue": true, "configurationClass":
"org.apache.camel.component.docling.DoclingConfiguration",
"configurationField": "configuration", "description": "Include metadata in
message headers w [...]
+ "includeRawMetadata": { "index": 46, "kind": "property", "displayName":
"Include Raw Metadata", "group": "metadata", "label": "metadata", "required":
false, "type": "boolean", "javaType": "boolean", "deprecated": false,
"deprecationNote": "", "autowired": false, "secret": false, "defaultValue":
false, "configurationClass":
"org.apache.camel.component.docling.DoclingConfiguration",
"configurationField": "configuration", "description": "Include raw metadata as
returned by the parser" },
+ "apiKeyHeader": { "index": 47, "kind": "property", "displayName": "Api Key
Header", "group": "security", "label": "security", "required": false, "type":
"string", "javaType": "java.lang.String", "deprecated": false,
"deprecationNote": "", "autowired": false, "secret": false, "defaultValue":
"X-API-Key", "configurationClass":
"org.apache.camel.component.docling.DoclingConfiguration",
"configurationField": "configuration", "description": "Header name for API key
authentication" },
+ "authenticationScheme": { "index": 48, "kind": "property", "displayName":
"Authentication Scheme", "group": "security", "label": "security", "required":
false, "type": "enum", "javaType":
"org.apache.camel.component.docling.AuthenticationScheme", "enum": [ "NONE",
"BEARER", "API_KEY" ], "deprecated": false, "deprecationNote": "", "autowired":
false, "secret": false, "defaultValue": "NONE", "configurationClass":
"org.apache.camel.component.docling.DoclingConfiguration", "configuration [...]
+ "authenticationToken": { "index": 49, "kind": "property", "displayName":
"Authentication Token", "group": "security", "label": "security", "required":
false, "type": "string", "javaType": "java.lang.String", "deprecated": false,
"deprecationNote": "", "autowired": false, "secret": true,
"configurationClass":
"org.apache.camel.component.docling.DoclingConfiguration",
"configurationField": "configuration", "description": "Authentication token for
docling-serve API (Bearer token or API [...]
+ "maxFileSize": { "index": 50, "kind": "property", "displayName": "Max File
Size", "group": "security", "label": "security", "required": false, "type":
"integer", "javaType": "long", "deprecated": false, "deprecationNote": "",
"autowired": false, "secret": false, "defaultValue": 52428800,
"configurationClass":
"org.apache.camel.component.docling.DoclingConfiguration",
"configurationField": "configuration", "description": "Maximum file size in
bytes for processing" }
},
"headers": {
"CamelDoclingOperation": { "index": 0, "kind": "header", "displayName":
"", "group": "producer", "label": "", "required": false, "javaType":
"DoclingOperations", "deprecated": false, "deprecationNote": "", "autowired":
false, "secret": false, "description": "The operation to perform",
"constantName": "org.apache.camel.component.docling.DoclingHeaders#OPERATION" },
@@ -99,7 +104,10 @@
"CamelDoclingMetadataFormat": { "index": 24, "kind": "header",
"displayName": "", "group": "producer", "label": "", "required": false,
"javaType": "String", "deprecated": false, "deprecationNote": "", "autowired":
false, "secret": false, "description": "Document format (MIME type)",
"constantName":
"org.apache.camel.component.docling.DoclingHeaders#METADATA_FORMAT" },
"CamelDoclingMetadataFileSize": { "index": 25, "kind": "header",
"displayName": "", "group": "producer", "label": "", "required": false,
"javaType": "Long", "deprecated": false, "deprecationNote": "", "autowired":
false, "secret": false, "description": "File size in bytes", "constantName":
"org.apache.camel.component.docling.DoclingHeaders#METADATA_FILE_SIZE" },
"CamelDoclingMetadataFileName": { "index": 26, "kind": "header",
"displayName": "", "group": "producer", "label": "", "required": false,
"javaType": "String", "deprecated": false, "deprecationNote": "", "autowired":
false, "secret": false, "description": "File name", "constantName":
"org.apache.camel.component.docling.DoclingHeaders#METADATA_FILE_NAME" },
- "CamelDoclingMetadataRaw": { "index": 27, "kind": "header", "displayName":
"", "group": "producer", "label": "", "required": false, "javaType":
"Map<String, Object>", "deprecated": false, "deprecationNote": "", "autowired":
false, "secret": false, "description": "Raw metadata fields as a Map",
"constantName":
"org.apache.camel.component.docling.DoclingHeaders#METADATA_RAW" }
+ "CamelDoclingMetadataRaw": { "index": 27, "kind": "header", "displayName":
"", "group": "producer", "label": "", "required": false, "javaType":
"Map<String, Object>", "deprecated": false, "deprecationNote": "", "autowired":
false, "secret": false, "description": "Raw metadata fields as a Map",
"constantName":
"org.apache.camel.component.docling.DoclingHeaders#METADATA_RAW" },
+ "CamelDoclingChunkingTokenizer": { "index": 28, "kind": "header",
"displayName": "", "group": "producer", "label": "", "required": false,
"javaType": "String", "deprecated": false, "deprecationNote": "", "autowired":
false, "secret": false, "description": "Tokenizer for hybrid chunking (e.g.
sentence-transformers\/all-MiniLM-L6-v2)", "constantName":
"org.apache.camel.component.docling.DoclingHeaders#CHUNKING_TOKENIZER" },
+ "CamelDoclingChunkingMaxTokens": { "index": 29, "kind": "header",
"displayName": "", "group": "producer", "label": "", "required": false,
"javaType": "Integer", "deprecated": false, "deprecationNote": "", "autowired":
false, "secret": false, "description": "Maximum tokens per chunk for hybrid
chunking", "constantName":
"org.apache.camel.component.docling.DoclingHeaders#CHUNKING_MAX_TOKENS" },
+ "CamelDoclingChunkingMergePeers": { "index": 30, "kind": "header",
"displayName": "", "group": "producer", "label": "", "required": false,
"javaType": "Boolean", "deprecated": false, "deprecationNote": "", "autowired":
false, "secret": false, "description": "Whether to merge peer chunks in hybrid
chunking", "constantName":
"org.apache.camel.component.docling.DoclingHeaders#CHUNKING_MERGE_PEERS" }
},
"properties": {
"operationId": { "index": 0, "kind": "path", "displayName": "Operation
Id", "group": "producer", "label": "", "required": true, "type": "string",
"javaType": "java.lang.String", "deprecated": false, "deprecationNote": "",
"autowired": false, "secret": false, "description": "The operation identifier"
},
@@ -108,7 +116,7 @@
"enableOCR": { "index": 3, "kind": "parameter", "displayName": "Enable
OCR", "group": "producer", "label": "", "required": false, "type": "boolean",
"javaType": "boolean", "deprecated": false, "deprecationNote": "", "autowired":
false, "secret": false, "defaultValue": true, "configurationClass":
"org.apache.camel.component.docling.DoclingConfiguration",
"configurationField": "configuration", "description": "Enable OCR processing
for scanned documents" },
"includeLayoutInfo": { "index": 4, "kind": "parameter", "displayName":
"Include Layout Info", "group": "producer", "label": "", "required": false,
"type": "boolean", "javaType": "boolean", "deprecated": false,
"deprecationNote": "", "autowired": false, "secret": false, "defaultValue":
false, "configurationClass":
"org.apache.camel.component.docling.DoclingConfiguration",
"configurationField": "configuration", "description": "Show layout information
with bounding boxes" },
"ocrLanguage": { "index": 5, "kind": "parameter", "displayName": "Ocr
Language", "group": "producer", "label": "", "required": false, "type":
"string", "javaType": "java.lang.String", "deprecated": false,
"deprecationNote": "", "autowired": false, "secret": false, "defaultValue":
"en", "configurationClass":
"org.apache.camel.component.docling.DoclingConfiguration",
"configurationField": "configuration", "description": "Language code for OCR
processing" },
- "operation": { "index": 6, "kind": "parameter", "displayName":
"Operation", "group": "producer", "label": "", "required": true, "type":
"enum", "javaType": "org.apache.camel.component.docling.DoclingOperations",
"enum": [ "CONVERT_TO_MARKDOWN", "CONVERT_TO_HTML", "CONVERT_TO_JSON",
"EXTRACT_TEXT", "EXTRACT_STRUCTURED_DATA", "SUBMIT_ASYNC_CONVERSION",
"CHECK_CONVERSION_STATUS", "BATCH_CONVERT_TO_MARKDOWN",
"BATCH_CONVERT_TO_HTML", "BATCH_CONVERT_TO_JSON", "BATCH_EXTRACT_TEXT", "BATCH_
[...]
+ "operation": { "index": 6, "kind": "parameter", "displayName":
"Operation", "group": "producer", "label": "", "required": true, "type":
"enum", "javaType": "org.apache.camel.component.docling.DoclingOperations",
"enum": [ "CONVERT_TO_MARKDOWN", "CONVERT_TO_HTML", "CONVERT_TO_JSON",
"EXTRACT_TEXT", "EXTRACT_STRUCTURED_DATA", "SUBMIT_ASYNC_CONVERSION",
"CHECK_CONVERSION_STATUS", "BATCH_CONVERT_TO_MARKDOWN",
"BATCH_CONVERT_TO_HTML", "BATCH_CONVERT_TO_JSON", "BATCH_EXTRACT_TEXT", "BATCH_
[...]
"outputFormat": { "index": 7, "kind": "parameter", "displayName": "Output
Format", "group": "producer", "label": "", "required": false, "type": "string",
"javaType": "java.lang.String", "deprecated": false, "deprecationNote": "",
"autowired": false, "secret": false, "defaultValue": "markdown",
"configurationClass":
"org.apache.camel.component.docling.DoclingConfiguration",
"configurationField": "configuration", "description": "Output format for
document conversion" },
"useDoclingServe": { "index": 8, "kind": "parameter", "displayName": "Use
Docling Serve", "group": "producer", "label": "", "required": false, "type":
"boolean", "javaType": "boolean", "deprecated": false, "deprecationNote": "",
"autowired": false, "secret": false, "defaultValue": false,
"configurationClass":
"org.apache.camel.component.docling.DoclingConfiguration",
"configurationField": "configuration", "description": "Use docling-serve API
instead of CLI command" },
"lazyStartProducer": { "index": 9, "kind": "parameter", "displayName":
"Lazy Start Producer", "group": "producer (advanced)", "label":
"producer,advanced", "required": false, "type": "boolean", "javaType":
"boolean", "deprecated": false, "autowired": false, "secret": false,
"defaultValue": false, "description": "Whether the producer should be started
lazy (on the first message). By starting lazy you can use this to allow
CamelContext and routes to startup in situations where a produc [...]
@@ -141,11 +149,16 @@
"batchSize": { "index": 36, "kind": "parameter", "displayName": "Batch
Size", "group": "batch", "label": "batch", "required": false, "type":
"integer", "javaType": "int", "deprecated": false, "deprecationNote": "",
"autowired": false, "secret": false, "defaultValue": 10, "configurationClass":
"org.apache.camel.component.docling.DoclingConfiguration",
"configurationField": "configuration", "description": "Maximum number of
documents to process in a single batch (batch operations only)" },
"batchTimeout": { "index": 37, "kind": "parameter", "displayName": "Batch
Timeout", "group": "batch", "label": "batch", "required": false, "type":
"integer", "javaType": "long", "deprecated": false, "deprecationNote": "",
"autowired": false, "secret": false, "defaultValue": 300000,
"configurationClass":
"org.apache.camel.component.docling.DoclingConfiguration",
"configurationField": "configuration", "description": "Maximum time to wait for
batch completion in milliseconds" },
"splitBatchResults": { "index": 38, "kind": "parameter", "displayName":
"Split Batch Results", "group": "batch", "label": "batch", "required": false,
"type": "boolean", "javaType": "boolean", "deprecated": false,
"deprecationNote": "", "autowired": false, "secret": false, "defaultValue":
false, "configurationClass":
"org.apache.camel.component.docling.DoclingConfiguration",
"configurationField": "configuration", "description": "Split batch results into
individual exchanges (one per d [...]
- "includeMetadataInHeaders": { "index": 39, "kind": "parameter",
"displayName": "Include Metadata In Headers", "group": "metadata", "label":
"metadata", "required": false, "type": "boolean", "javaType": "boolean",
"deprecated": false, "deprecationNote": "", "autowired": false, "secret":
false, "defaultValue": true, "configurationClass":
"org.apache.camel.component.docling.DoclingConfiguration",
"configurationField": "configuration", "description": "Include metadata in
message headers [...]
- "includeRawMetadata": { "index": 40, "kind": "parameter", "displayName":
"Include Raw Metadata", "group": "metadata", "label": "metadata", "required":
false, "type": "boolean", "javaType": "boolean", "deprecated": false,
"deprecationNote": "", "autowired": false, "secret": false, "defaultValue":
false, "configurationClass":
"org.apache.camel.component.docling.DoclingConfiguration",
"configurationField": "configuration", "description": "Include raw metadata as
returned by the parser" },
- "apiKeyHeader": { "index": 41, "kind": "parameter", "displayName": "Api
Key Header", "group": "security", "label": "security", "required": false,
"type": "string", "javaType": "java.lang.String", "deprecated": false,
"deprecationNote": "", "autowired": false, "secret": false, "defaultValue":
"X-API-Key", "configurationClass":
"org.apache.camel.component.docling.DoclingConfiguration",
"configurationField": "configuration", "description": "Header name for API key
authentication" },
- "authenticationScheme": { "index": 42, "kind": "parameter", "displayName":
"Authentication Scheme", "group": "security", "label": "security", "required":
false, "type": "enum", "javaType":
"org.apache.camel.component.docling.AuthenticationScheme", "enum": [ "NONE",
"BEARER", "API_KEY" ], "deprecated": false, "deprecationNote": "", "autowired":
false, "secret": false, "defaultValue": "NONE", "configurationClass":
"org.apache.camel.component.docling.DoclingConfiguration", "configuratio [...]
- "authenticationToken": { "index": 43, "kind": "parameter", "displayName":
"Authentication Token", "group": "security", "label": "security", "required":
false, "type": "string", "javaType": "java.lang.String", "deprecated": false,
"deprecationNote": "", "autowired": false, "secret": true,
"configurationClass":
"org.apache.camel.component.docling.DoclingConfiguration",
"configurationField": "configuration", "description": "Authentication token for
docling-serve API (Bearer token or API [...]
- "maxFileSize": { "index": 44, "kind": "parameter", "displayName": "Max
File Size", "group": "security", "label": "security", "required": false,
"type": "integer", "javaType": "long", "deprecated": false, "deprecationNote":
"", "autowired": false, "secret": false, "defaultValue": 52428800,
"configurationClass":
"org.apache.camel.component.docling.DoclingConfiguration",
"configurationField": "configuration", "description": "Maximum file size in
bytes for processing" }
+ "chunkingIncludeRawText": { "index": 39, "kind": "parameter",
"displayName": "Chunking Include Raw Text", "group": "chunking", "label":
"chunking", "required": false, "type": "boolean", "javaType":
"java.lang.Boolean", "deprecated": false, "deprecationNote": "", "autowired":
false, "secret": false, "defaultValue": false, "configurationClass":
"org.apache.camel.component.docling.DoclingConfiguration",
"configurationField": "configuration", "description": "Include raw text in
chunk output" },
+ "chunkingMaxTokens": { "index": 40, "kind": "parameter", "displayName":
"Chunking Max Tokens", "group": "chunking", "label": "chunking", "required":
false, "type": "integer", "javaType": "java.lang.Integer", "deprecated": false,
"deprecationNote": "", "autowired": false, "secret": false,
"configurationClass":
"org.apache.camel.component.docling.DoclingConfiguration",
"configurationField": "configuration", "description": "Maximum number of tokens
per chunk for hybrid chunking" },
+ "chunkingMergePeers": { "index": 41, "kind": "parameter", "displayName":
"Chunking Merge Peers", "group": "chunking", "label": "chunking", "required":
false, "type": "boolean", "javaType": "java.lang.Boolean", "deprecated": false,
"deprecationNote": "", "autowired": false, "secret": false, "defaultValue":
true, "configurationClass":
"org.apache.camel.component.docling.DoclingConfiguration",
"configurationField": "configuration", "description": "Whether to merge peer
chunks in hybrid [...]
+ "chunkingTokenizer": { "index": 42, "kind": "parameter", "displayName":
"Chunking Tokenizer", "group": "chunking", "label": "chunking", "required":
false, "type": "string", "javaType": "java.lang.String", "deprecated": false,
"deprecationNote": "", "autowired": false, "secret": false,
"configurationClass":
"org.apache.camel.component.docling.DoclingConfiguration",
"configurationField": "configuration", "description": "Tokenizer model for
hybrid chunking (e.g. sentence-transformers\/a [...]
+ "chunkingUseMarkdownTables": { "index": 43, "kind": "parameter",
"displayName": "Chunking Use Markdown Tables", "group": "chunking", "label":
"chunking", "required": false, "type": "boolean", "javaType":
"java.lang.Boolean", "deprecated": false, "deprecationNote": "", "autowired":
false, "secret": false, "defaultValue": false, "configurationClass":
"org.apache.camel.component.docling.DoclingConfiguration",
"configurationField": "configuration", "description": "Use markdown format for
[...]
+ "includeMetadataInHeaders": { "index": 44, "kind": "parameter",
"displayName": "Include Metadata In Headers", "group": "metadata", "label":
"metadata", "required": false, "type": "boolean", "javaType": "boolean",
"deprecated": false, "deprecationNote": "", "autowired": false, "secret":
false, "defaultValue": true, "configurationClass":
"org.apache.camel.component.docling.DoclingConfiguration",
"configurationField": "configuration", "description": "Include metadata in
message headers [...]
+ "includeRawMetadata": { "index": 45, "kind": "parameter", "displayName":
"Include Raw Metadata", "group": "metadata", "label": "metadata", "required":
false, "type": "boolean", "javaType": "boolean", "deprecated": false,
"deprecationNote": "", "autowired": false, "secret": false, "defaultValue":
false, "configurationClass":
"org.apache.camel.component.docling.DoclingConfiguration",
"configurationField": "configuration", "description": "Include raw metadata as
returned by the parser" },
+ "apiKeyHeader": { "index": 46, "kind": "parameter", "displayName": "Api
Key Header", "group": "security", "label": "security", "required": false,
"type": "string", "javaType": "java.lang.String", "deprecated": false,
"deprecationNote": "", "autowired": false, "secret": false, "defaultValue":
"X-API-Key", "configurationClass":
"org.apache.camel.component.docling.DoclingConfiguration",
"configurationField": "configuration", "description": "Header name for API key
authentication" },
+ "authenticationScheme": { "index": 47, "kind": "parameter", "displayName":
"Authentication Scheme", "group": "security", "label": "security", "required":
false, "type": "enum", "javaType":
"org.apache.camel.component.docling.AuthenticationScheme", "enum": [ "NONE",
"BEARER", "API_KEY" ], "deprecated": false, "deprecationNote": "", "autowired":
false, "secret": false, "defaultValue": "NONE", "configurationClass":
"org.apache.camel.component.docling.DoclingConfiguration", "configuratio [...]
+ "authenticationToken": { "index": 48, "kind": "parameter", "displayName":
"Authentication Token", "group": "security", "label": "security", "required":
false, "type": "string", "javaType": "java.lang.String", "deprecated": false,
"deprecationNote": "", "autowired": false, "secret": true,
"configurationClass":
"org.apache.camel.component.docling.DoclingConfiguration",
"configurationField": "configuration", "description": "Authentication token for
docling-serve API (Bearer token or API [...]
+ "maxFileSize": { "index": 49, "kind": "parameter", "displayName": "Max
File Size", "group": "security", "label": "security", "required": false,
"type": "integer", "javaType": "long", "deprecated": false, "deprecationNote":
"", "autowired": false, "secret": false, "defaultValue": 52428800,
"configurationClass":
"org.apache.camel.component.docling.DoclingConfiguration",
"configurationField": "configuration", "description": "Maximum file size in
bytes for processing" }
}
}
diff --git
a/components/camel-ai/camel-docling/src/generated/java/org/apache/camel/component/docling/DoclingComponentConfigurer.java
b/components/camel-ai/camel-docling/src/generated/java/org/apache/camel/component/docling/DoclingComponentConfigurer.java
index 5a13f84871a1..6ba30fb97860 100644
---
a/components/camel-ai/camel-docling/src/generated/java/org/apache/camel/component/docling/DoclingComponentConfigurer.java
+++
b/components/camel-ai/camel-docling/src/generated/java/org/apache/camel/component/docling/DoclingComponentConfigurer.java
@@ -52,6 +52,16 @@ public class DoclingComponentConfigurer extends
PropertyConfigurerSupport implem
case "batchSize":
getOrCreateConfiguration(target).setBatchSize(property(camelContext, int.class,
value)); return true;
case "batchtimeout":
case "batchTimeout":
getOrCreateConfiguration(target).setBatchTimeout(property(camelContext,
long.class, value)); return true;
+ case "chunkingincluderawtext":
+ case "chunkingIncludeRawText":
getOrCreateConfiguration(target).setChunkingIncludeRawText(property(camelContext,
java.lang.Boolean.class, value)); return true;
+ case "chunkingmaxtokens":
+ case "chunkingMaxTokens":
getOrCreateConfiguration(target).setChunkingMaxTokens(property(camelContext,
java.lang.Integer.class, value)); return true;
+ case "chunkingmergepeers":
+ case "chunkingMergePeers":
getOrCreateConfiguration(target).setChunkingMergePeers(property(camelContext,
java.lang.Boolean.class, value)); return true;
+ case "chunkingtokenizer":
+ case "chunkingTokenizer":
getOrCreateConfiguration(target).setChunkingTokenizer(property(camelContext,
java.lang.String.class, value)); return true;
+ case "chunkingusemarkdowntables":
+ case "chunkingUseMarkdownTables":
getOrCreateConfiguration(target).setChunkingUseMarkdownTables(property(camelContext,
java.lang.Boolean.class, value)); return true;
case "configuration": target.setConfiguration(property(camelContext,
org.apache.camel.component.docling.DoclingConfiguration.class, value)); return
true;
case "contentinbody":
case "contentInBody":
getOrCreateConfiguration(target).setContentInBody(property(camelContext,
boolean.class, value)); return true;
@@ -148,6 +158,16 @@ public class DoclingComponentConfigurer extends
PropertyConfigurerSupport implem
case "batchSize": return int.class;
case "batchtimeout":
case "batchTimeout": return long.class;
+ case "chunkingincluderawtext":
+ case "chunkingIncludeRawText": return java.lang.Boolean.class;
+ case "chunkingmaxtokens":
+ case "chunkingMaxTokens": return java.lang.Integer.class;
+ case "chunkingmergepeers":
+ case "chunkingMergePeers": return java.lang.Boolean.class;
+ case "chunkingtokenizer":
+ case "chunkingTokenizer": return java.lang.String.class;
+ case "chunkingusemarkdowntables":
+ case "chunkingUseMarkdownTables": return java.lang.Boolean.class;
case "configuration": return
org.apache.camel.component.docling.DoclingConfiguration.class;
case "contentinbody":
case "contentInBody": return boolean.class;
@@ -245,6 +265,16 @@ public class DoclingComponentConfigurer extends
PropertyConfigurerSupport implem
case "batchSize": return
getOrCreateConfiguration(target).getBatchSize();
case "batchtimeout":
case "batchTimeout": return
getOrCreateConfiguration(target).getBatchTimeout();
+ case "chunkingincluderawtext":
+ case "chunkingIncludeRawText": return
getOrCreateConfiguration(target).getChunkingIncludeRawText();
+ case "chunkingmaxtokens":
+ case "chunkingMaxTokens": return
getOrCreateConfiguration(target).getChunkingMaxTokens();
+ case "chunkingmergepeers":
+ case "chunkingMergePeers": return
getOrCreateConfiguration(target).getChunkingMergePeers();
+ case "chunkingtokenizer":
+ case "chunkingTokenizer": return
getOrCreateConfiguration(target).getChunkingTokenizer();
+ case "chunkingusemarkdowntables":
+ case "chunkingUseMarkdownTables": return
getOrCreateConfiguration(target).getChunkingUseMarkdownTables();
case "configuration": return target.getConfiguration();
case "contentinbody":
case "contentInBody": return
getOrCreateConfiguration(target).isContentInBody();
diff --git
a/components/camel-ai/camel-docling/src/generated/java/org/apache/camel/component/docling/DoclingConfigurationConfigurer.java
b/components/camel-ai/camel-docling/src/generated/java/org/apache/camel/component/docling/DoclingConfigurationConfigurer.java
index c7e9be154563..d237932aec55 100644
---
a/components/camel-ai/camel-docling/src/generated/java/org/apache/camel/component/docling/DoclingConfigurationConfigurer.java
+++
b/components/camel-ai/camel-docling/src/generated/java/org/apache/camel/component/docling/DoclingConfigurationConfigurer.java
@@ -43,6 +43,16 @@ public class DoclingConfigurationConfigurer extends
org.apache.camel.support.com
case "batchSize": target.setBatchSize(property(camelContext,
int.class, value)); return true;
case "batchtimeout":
case "batchTimeout": target.setBatchTimeout(property(camelContext,
long.class, value)); return true;
+ case "chunkingincluderawtext":
+ case "chunkingIncludeRawText":
target.setChunkingIncludeRawText(property(camelContext,
java.lang.Boolean.class, value)); return true;
+ case "chunkingmaxtokens":
+ case "chunkingMaxTokens":
target.setChunkingMaxTokens(property(camelContext, java.lang.Integer.class,
value)); return true;
+ case "chunkingmergepeers":
+ case "chunkingMergePeers":
target.setChunkingMergePeers(property(camelContext, java.lang.Boolean.class,
value)); return true;
+ case "chunkingtokenizer":
+ case "chunkingTokenizer":
target.setChunkingTokenizer(property(camelContext, java.lang.String.class,
value)); return true;
+ case "chunkingusemarkdowntables":
+ case "chunkingUseMarkdownTables":
target.setChunkingUseMarkdownTables(property(camelContext,
java.lang.Boolean.class, value)); return true;
case "contentinbody":
case "contentInBody": target.setContentInBody(property(camelContext,
boolean.class, value)); return true;
case "docodeenrichment":
@@ -134,6 +144,16 @@ public class DoclingConfigurationConfigurer extends
org.apache.camel.support.com
case "batchSize": return int.class;
case "batchtimeout":
case "batchTimeout": return long.class;
+ case "chunkingincluderawtext":
+ case "chunkingIncludeRawText": return java.lang.Boolean.class;
+ case "chunkingmaxtokens":
+ case "chunkingMaxTokens": return java.lang.Integer.class;
+ case "chunkingmergepeers":
+ case "chunkingMergePeers": return java.lang.Boolean.class;
+ case "chunkingtokenizer":
+ case "chunkingTokenizer": return java.lang.String.class;
+ case "chunkingusemarkdowntables":
+ case "chunkingUseMarkdownTables": return java.lang.Boolean.class;
case "contentinbody":
case "contentInBody": return boolean.class;
case "docodeenrichment":
@@ -226,6 +246,16 @@ public class DoclingConfigurationConfigurer extends
org.apache.camel.support.com
case "batchSize": return target.getBatchSize();
case "batchtimeout":
case "batchTimeout": return target.getBatchTimeout();
+ case "chunkingincluderawtext":
+ case "chunkingIncludeRawText": return
target.getChunkingIncludeRawText();
+ case "chunkingmaxtokens":
+ case "chunkingMaxTokens": return target.getChunkingMaxTokens();
+ case "chunkingmergepeers":
+ case "chunkingMergePeers": return target.getChunkingMergePeers();
+ case "chunkingtokenizer":
+ case "chunkingTokenizer": return target.getChunkingTokenizer();
+ case "chunkingusemarkdowntables":
+ case "chunkingUseMarkdownTables": return
target.getChunkingUseMarkdownTables();
case "contentinbody":
case "contentInBody": return target.isContentInBody();
case "docodeenrichment":
diff --git
a/components/camel-ai/camel-docling/src/generated/java/org/apache/camel/component/docling/DoclingEndpointConfigurer.java
b/components/camel-ai/camel-docling/src/generated/java/org/apache/camel/component/docling/DoclingEndpointConfigurer.java
index 565d710c791e..1442e4a80ffe 100644
---
a/components/camel-ai/camel-docling/src/generated/java/org/apache/camel/component/docling/DoclingEndpointConfigurer.java
+++
b/components/camel-ai/camel-docling/src/generated/java/org/apache/camel/component/docling/DoclingEndpointConfigurer.java
@@ -43,6 +43,16 @@ public class DoclingEndpointConfigurer extends
PropertyConfigurerSupport impleme
case "batchSize":
target.getConfiguration().setBatchSize(property(camelContext, int.class,
value)); return true;
case "batchtimeout":
case "batchTimeout":
target.getConfiguration().setBatchTimeout(property(camelContext, long.class,
value)); return true;
+ case "chunkingincluderawtext":
+ case "chunkingIncludeRawText":
target.getConfiguration().setChunkingIncludeRawText(property(camelContext,
java.lang.Boolean.class, value)); return true;
+ case "chunkingmaxtokens":
+ case "chunkingMaxTokens":
target.getConfiguration().setChunkingMaxTokens(property(camelContext,
java.lang.Integer.class, value)); return true;
+ case "chunkingmergepeers":
+ case "chunkingMergePeers":
target.getConfiguration().setChunkingMergePeers(property(camelContext,
java.lang.Boolean.class, value)); return true;
+ case "chunkingtokenizer":
+ case "chunkingTokenizer":
target.getConfiguration().setChunkingTokenizer(property(camelContext,
java.lang.String.class, value)); return true;
+ case "chunkingusemarkdowntables":
+ case "chunkingUseMarkdownTables":
target.getConfiguration().setChunkingUseMarkdownTables(property(camelContext,
java.lang.Boolean.class, value)); return true;
case "contentinbody":
case "contentInBody":
target.getConfiguration().setContentInBody(property(camelContext,
boolean.class, value)); return true;
case "docodeenrichment":
@@ -136,6 +146,16 @@ public class DoclingEndpointConfigurer extends
PropertyConfigurerSupport impleme
case "batchSize": return int.class;
case "batchtimeout":
case "batchTimeout": return long.class;
+ case "chunkingincluderawtext":
+ case "chunkingIncludeRawText": return java.lang.Boolean.class;
+ case "chunkingmaxtokens":
+ case "chunkingMaxTokens": return java.lang.Integer.class;
+ case "chunkingmergepeers":
+ case "chunkingMergePeers": return java.lang.Boolean.class;
+ case "chunkingtokenizer":
+ case "chunkingTokenizer": return java.lang.String.class;
+ case "chunkingusemarkdowntables":
+ case "chunkingUseMarkdownTables": return java.lang.Boolean.class;
case "contentinbody":
case "contentInBody": return boolean.class;
case "docodeenrichment":
@@ -230,6 +250,16 @@ public class DoclingEndpointConfigurer extends
PropertyConfigurerSupport impleme
case "batchSize": return target.getConfiguration().getBatchSize();
case "batchtimeout":
case "batchTimeout": return
target.getConfiguration().getBatchTimeout();
+ case "chunkingincluderawtext":
+ case "chunkingIncludeRawText": return
target.getConfiguration().getChunkingIncludeRawText();
+ case "chunkingmaxtokens":
+ case "chunkingMaxTokens": return
target.getConfiguration().getChunkingMaxTokens();
+ case "chunkingmergepeers":
+ case "chunkingMergePeers": return
target.getConfiguration().getChunkingMergePeers();
+ case "chunkingtokenizer":
+ case "chunkingTokenizer": return
target.getConfiguration().getChunkingTokenizer();
+ case "chunkingusemarkdowntables":
+ case "chunkingUseMarkdownTables": return
target.getConfiguration().getChunkingUseMarkdownTables();
case "contentinbody":
case "contentInBody": return
target.getConfiguration().isContentInBody();
case "docodeenrichment":
diff --git
a/components/camel-ai/camel-docling/src/generated/java/org/apache/camel/component/docling/DoclingEndpointUriFactory.java
b/components/camel-ai/camel-docling/src/generated/java/org/apache/camel/component/docling/DoclingEndpointUriFactory.java
index d0b451ff909d..e1c8dc113d21 100644
---
a/components/camel-ai/camel-docling/src/generated/java/org/apache/camel/component/docling/DoclingEndpointUriFactory.java
+++
b/components/camel-ai/camel-docling/src/generated/java/org/apache/camel/component/docling/DoclingEndpointUriFactory.java
@@ -23,7 +23,7 @@ public class DoclingEndpointUriFactory extends
org.apache.camel.support.componen
private static final Set<String> SECRET_PROPERTY_NAMES;
private static final Map<String, String> MULTI_VALUE_PREFIXES;
static {
- Set<String> props = new HashSet<>(45);
+ Set<String> props = new HashSet<>(50);
props.add("abortOnError");
props.add("apiKeyHeader");
props.add("asyncPollInterval");
@@ -34,6 +34,11 @@ public class DoclingEndpointUriFactory extends
org.apache.camel.support.componen
props.add("batchParallelism");
props.add("batchSize");
props.add("batchTimeout");
+ props.add("chunkingIncludeRawText");
+ props.add("chunkingMaxTokens");
+ props.add("chunkingMergePeers");
+ props.add("chunkingTokenizer");
+ props.add("chunkingUseMarkdownTables");
props.add("contentInBody");
props.add("doCodeEnrichment");
props.add("doFormulaEnrichment");
diff --git
a/components/camel-ai/camel-docling/src/generated/resources/META-INF/org/apache/camel/component/docling/docling.json
b/components/camel-ai/camel-docling/src/generated/resources/META-INF/org/apache/camel/component/docling/docling.json
index ba8c86f7f589..1366dbfdaf55 100644
---
a/components/camel-ai/camel-docling/src/generated/resources/META-INF/org/apache/camel/component/docling/docling.json
+++
b/components/camel-ai/camel-docling/src/generated/resources/META-INF/org/apache/camel/component/docling/docling.json
@@ -31,7 +31,7 @@
"includeLayoutInfo": { "index": 4, "kind": "property", "displayName":
"Include Layout Info", "group": "producer", "label": "", "required": false,
"type": "boolean", "javaType": "boolean", "deprecated": false,
"deprecationNote": "", "autowired": false, "secret": false, "defaultValue":
false, "configurationClass":
"org.apache.camel.component.docling.DoclingConfiguration",
"configurationField": "configuration", "description": "Show layout information
with bounding boxes" },
"lazyStartProducer": { "index": 5, "kind": "property", "displayName":
"Lazy Start Producer", "group": "producer", "label": "producer", "required":
false, "type": "boolean", "javaType": "boolean", "deprecated": false,
"autowired": false, "secret": false, "defaultValue": false, "description":
"Whether the producer should be started lazy (on the first message). By
starting lazy you can use this to allow CamelContext and routes to startup in
situations where a producer may otherwise fail [...]
"ocrLanguage": { "index": 6, "kind": "property", "displayName": "Ocr
Language", "group": "producer", "label": "", "required": false, "type":
"string", "javaType": "java.lang.String", "deprecated": false,
"deprecationNote": "", "autowired": false, "secret": false, "defaultValue":
"en", "configurationClass":
"org.apache.camel.component.docling.DoclingConfiguration",
"configurationField": "configuration", "description": "Language code for OCR
processing" },
- "operation": { "index": 7, "kind": "property", "displayName": "Operation",
"group": "producer", "label": "", "required": true, "type": "enum", "javaType":
"org.apache.camel.component.docling.DoclingOperations", "enum": [
"CONVERT_TO_MARKDOWN", "CONVERT_TO_HTML", "CONVERT_TO_JSON", "EXTRACT_TEXT",
"EXTRACT_STRUCTURED_DATA", "SUBMIT_ASYNC_CONVERSION",
"CHECK_CONVERSION_STATUS", "BATCH_CONVERT_TO_MARKDOWN",
"BATCH_CONVERT_TO_HTML", "BATCH_CONVERT_TO_JSON", "BATCH_EXTRACT_TEXT",
"BATCH_E [...]
+ "operation": { "index": 7, "kind": "property", "displayName": "Operation",
"group": "producer", "label": "", "required": true, "type": "enum", "javaType":
"org.apache.camel.component.docling.DoclingOperations", "enum": [
"CONVERT_TO_MARKDOWN", "CONVERT_TO_HTML", "CONVERT_TO_JSON", "EXTRACT_TEXT",
"EXTRACT_STRUCTURED_DATA", "SUBMIT_ASYNC_CONVERSION",
"CHECK_CONVERSION_STATUS", "BATCH_CONVERT_TO_MARKDOWN",
"BATCH_CONVERT_TO_HTML", "BATCH_CONVERT_TO_JSON", "BATCH_EXTRACT_TEXT",
"BATCH_E [...]
"outputFormat": { "index": 8, "kind": "property", "displayName": "Output
Format", "group": "producer", "label": "", "required": false, "type": "string",
"javaType": "java.lang.String", "deprecated": false, "deprecationNote": "",
"autowired": false, "secret": false, "defaultValue": "markdown",
"configurationClass":
"org.apache.camel.component.docling.DoclingConfiguration",
"configurationField": "configuration", "description": "Output format for
document conversion" },
"useDoclingServe": { "index": 9, "kind": "property", "displayName": "Use
Docling Serve", "group": "producer", "label": "", "required": false, "type":
"boolean", "javaType": "boolean", "deprecated": false, "deprecationNote": "",
"autowired": false, "secret": false, "defaultValue": false,
"configurationClass":
"org.apache.camel.component.docling.DoclingConfiguration",
"configurationField": "configuration", "description": "Use docling-serve API
instead of CLI command" },
"abortOnError": { "index": 10, "kind": "property", "displayName": "Abort
On Error", "group": "advanced", "label": "advanced", "required": false, "type":
"boolean", "javaType": "java.lang.Boolean", "deprecated": false,
"deprecationNote": "", "autowired": false, "secret": false, "defaultValue":
false, "configurationClass":
"org.apache.camel.component.docling.DoclingConfiguration",
"configurationField": "configuration", "description": "Abort processing on
error" },
@@ -64,12 +64,17 @@
"batchSize": { "index": 37, "kind": "property", "displayName": "Batch
Size", "group": "batch", "label": "batch", "required": false, "type":
"integer", "javaType": "int", "deprecated": false, "deprecationNote": "",
"autowired": false, "secret": false, "defaultValue": 10, "configurationClass":
"org.apache.camel.component.docling.DoclingConfiguration",
"configurationField": "configuration", "description": "Maximum number of
documents to process in a single batch (batch operations only)" },
"batchTimeout": { "index": 38, "kind": "property", "displayName": "Batch
Timeout", "group": "batch", "label": "batch", "required": false, "type":
"integer", "javaType": "long", "deprecated": false, "deprecationNote": "",
"autowired": false, "secret": false, "defaultValue": 300000,
"configurationClass":
"org.apache.camel.component.docling.DoclingConfiguration",
"configurationField": "configuration", "description": "Maximum time to wait for
batch completion in milliseconds" },
"splitBatchResults": { "index": 39, "kind": "property", "displayName":
"Split Batch Results", "group": "batch", "label": "batch", "required": false,
"type": "boolean", "javaType": "boolean", "deprecated": false,
"deprecationNote": "", "autowired": false, "secret": false, "defaultValue":
false, "configurationClass":
"org.apache.camel.component.docling.DoclingConfiguration",
"configurationField": "configuration", "description": "Split batch results into
individual exchanges (one per do [...]
- "includeMetadataInHeaders": { "index": 40, "kind": "property",
"displayName": "Include Metadata In Headers", "group": "metadata", "label":
"metadata", "required": false, "type": "boolean", "javaType": "boolean",
"deprecated": false, "deprecationNote": "", "autowired": false, "secret":
false, "defaultValue": true, "configurationClass":
"org.apache.camel.component.docling.DoclingConfiguration",
"configurationField": "configuration", "description": "Include metadata in
message headers w [...]
- "includeRawMetadata": { "index": 41, "kind": "property", "displayName":
"Include Raw Metadata", "group": "metadata", "label": "metadata", "required":
false, "type": "boolean", "javaType": "boolean", "deprecated": false,
"deprecationNote": "", "autowired": false, "secret": false, "defaultValue":
false, "configurationClass":
"org.apache.camel.component.docling.DoclingConfiguration",
"configurationField": "configuration", "description": "Include raw metadata as
returned by the parser" },
- "apiKeyHeader": { "index": 42, "kind": "property", "displayName": "Api Key
Header", "group": "security", "label": "security", "required": false, "type":
"string", "javaType": "java.lang.String", "deprecated": false,
"deprecationNote": "", "autowired": false, "secret": false, "defaultValue":
"X-API-Key", "configurationClass":
"org.apache.camel.component.docling.DoclingConfiguration",
"configurationField": "configuration", "description": "Header name for API key
authentication" },
- "authenticationScheme": { "index": 43, "kind": "property", "displayName":
"Authentication Scheme", "group": "security", "label": "security", "required":
false, "type": "enum", "javaType":
"org.apache.camel.component.docling.AuthenticationScheme", "enum": [ "NONE",
"BEARER", "API_KEY" ], "deprecated": false, "deprecationNote": "", "autowired":
false, "secret": false, "defaultValue": "NONE", "configurationClass":
"org.apache.camel.component.docling.DoclingConfiguration", "configuration [...]
- "authenticationToken": { "index": 44, "kind": "property", "displayName":
"Authentication Token", "group": "security", "label": "security", "required":
false, "type": "string", "javaType": "java.lang.String", "deprecated": false,
"deprecationNote": "", "autowired": false, "secret": true,
"configurationClass":
"org.apache.camel.component.docling.DoclingConfiguration",
"configurationField": "configuration", "description": "Authentication token for
docling-serve API (Bearer token or API [...]
- "maxFileSize": { "index": 45, "kind": "property", "displayName": "Max File
Size", "group": "security", "label": "security", "required": false, "type":
"integer", "javaType": "long", "deprecated": false, "deprecationNote": "",
"autowired": false, "secret": false, "defaultValue": 52428800,
"configurationClass":
"org.apache.camel.component.docling.DoclingConfiguration",
"configurationField": "configuration", "description": "Maximum file size in
bytes for processing" }
+ "chunkingIncludeRawText": { "index": 40, "kind": "property",
"displayName": "Chunking Include Raw Text", "group": "chunking", "label":
"chunking", "required": false, "type": "boolean", "javaType":
"java.lang.Boolean", "deprecated": false, "deprecationNote": "", "autowired":
false, "secret": false, "defaultValue": false, "configurationClass":
"org.apache.camel.component.docling.DoclingConfiguration",
"configurationField": "configuration", "description": "Include raw text in
chunk output" },
+ "chunkingMaxTokens": { "index": 41, "kind": "property", "displayName":
"Chunking Max Tokens", "group": "chunking", "label": "chunking", "required":
false, "type": "integer", "javaType": "java.lang.Integer", "deprecated": false,
"deprecationNote": "", "autowired": false, "secret": false,
"configurationClass":
"org.apache.camel.component.docling.DoclingConfiguration",
"configurationField": "configuration", "description": "Maximum number of tokens
per chunk for hybrid chunking" },
+ "chunkingMergePeers": { "index": 42, "kind": "property", "displayName":
"Chunking Merge Peers", "group": "chunking", "label": "chunking", "required":
false, "type": "boolean", "javaType": "java.lang.Boolean", "deprecated": false,
"deprecationNote": "", "autowired": false, "secret": false, "defaultValue":
true, "configurationClass":
"org.apache.camel.component.docling.DoclingConfiguration",
"configurationField": "configuration", "description": "Whether to merge peer
chunks in hybrid c [...]
+ "chunkingTokenizer": { "index": 43, "kind": "property", "displayName":
"Chunking Tokenizer", "group": "chunking", "label": "chunking", "required":
false, "type": "string", "javaType": "java.lang.String", "deprecated": false,
"deprecationNote": "", "autowired": false, "secret": false,
"configurationClass":
"org.apache.camel.component.docling.DoclingConfiguration",
"configurationField": "configuration", "description": "Tokenizer model for
hybrid chunking (e.g. sentence-transformers\/al [...]
+ "chunkingUseMarkdownTables": { "index": 44, "kind": "property",
"displayName": "Chunking Use Markdown Tables", "group": "chunking", "label":
"chunking", "required": false, "type": "boolean", "javaType":
"java.lang.Boolean", "deprecated": false, "deprecationNote": "", "autowired":
false, "secret": false, "defaultValue": false, "configurationClass":
"org.apache.camel.component.docling.DoclingConfiguration",
"configurationField": "configuration", "description": "Use markdown format for
[...]
+ "includeMetadataInHeaders": { "index": 45, "kind": "property",
"displayName": "Include Metadata In Headers", "group": "metadata", "label":
"metadata", "required": false, "type": "boolean", "javaType": "boolean",
"deprecated": false, "deprecationNote": "", "autowired": false, "secret":
false, "defaultValue": true, "configurationClass":
"org.apache.camel.component.docling.DoclingConfiguration",
"configurationField": "configuration", "description": "Include metadata in
message headers w [...]
+ "includeRawMetadata": { "index": 46, "kind": "property", "displayName":
"Include Raw Metadata", "group": "metadata", "label": "metadata", "required":
false, "type": "boolean", "javaType": "boolean", "deprecated": false,
"deprecationNote": "", "autowired": false, "secret": false, "defaultValue":
false, "configurationClass":
"org.apache.camel.component.docling.DoclingConfiguration",
"configurationField": "configuration", "description": "Include raw metadata as
returned by the parser" },
+ "apiKeyHeader": { "index": 47, "kind": "property", "displayName": "Api Key
Header", "group": "security", "label": "security", "required": false, "type":
"string", "javaType": "java.lang.String", "deprecated": false,
"deprecationNote": "", "autowired": false, "secret": false, "defaultValue":
"X-API-Key", "configurationClass":
"org.apache.camel.component.docling.DoclingConfiguration",
"configurationField": "configuration", "description": "Header name for API key
authentication" },
+ "authenticationScheme": { "index": 48, "kind": "property", "displayName":
"Authentication Scheme", "group": "security", "label": "security", "required":
false, "type": "enum", "javaType":
"org.apache.camel.component.docling.AuthenticationScheme", "enum": [ "NONE",
"BEARER", "API_KEY" ], "deprecated": false, "deprecationNote": "", "autowired":
false, "secret": false, "defaultValue": "NONE", "configurationClass":
"org.apache.camel.component.docling.DoclingConfiguration", "configuration [...]
+ "authenticationToken": { "index": 49, "kind": "property", "displayName":
"Authentication Token", "group": "security", "label": "security", "required":
false, "type": "string", "javaType": "java.lang.String", "deprecated": false,
"deprecationNote": "", "autowired": false, "secret": true,
"configurationClass":
"org.apache.camel.component.docling.DoclingConfiguration",
"configurationField": "configuration", "description": "Authentication token for
docling-serve API (Bearer token or API [...]
+ "maxFileSize": { "index": 50, "kind": "property", "displayName": "Max File
Size", "group": "security", "label": "security", "required": false, "type":
"integer", "javaType": "long", "deprecated": false, "deprecationNote": "",
"autowired": false, "secret": false, "defaultValue": 52428800,
"configurationClass":
"org.apache.camel.component.docling.DoclingConfiguration",
"configurationField": "configuration", "description": "Maximum file size in
bytes for processing" }
},
"headers": {
"CamelDoclingOperation": { "index": 0, "kind": "header", "displayName":
"", "group": "producer", "label": "", "required": false, "javaType":
"DoclingOperations", "deprecated": false, "deprecationNote": "", "autowired":
false, "secret": false, "description": "The operation to perform",
"constantName": "org.apache.camel.component.docling.DoclingHeaders#OPERATION" },
@@ -99,7 +104,10 @@
"CamelDoclingMetadataFormat": { "index": 24, "kind": "header",
"displayName": "", "group": "producer", "label": "", "required": false,
"javaType": "String", "deprecated": false, "deprecationNote": "", "autowired":
false, "secret": false, "description": "Document format (MIME type)",
"constantName":
"org.apache.camel.component.docling.DoclingHeaders#METADATA_FORMAT" },
"CamelDoclingMetadataFileSize": { "index": 25, "kind": "header",
"displayName": "", "group": "producer", "label": "", "required": false,
"javaType": "Long", "deprecated": false, "deprecationNote": "", "autowired":
false, "secret": false, "description": "File size in bytes", "constantName":
"org.apache.camel.component.docling.DoclingHeaders#METADATA_FILE_SIZE" },
"CamelDoclingMetadataFileName": { "index": 26, "kind": "header",
"displayName": "", "group": "producer", "label": "", "required": false,
"javaType": "String", "deprecated": false, "deprecationNote": "", "autowired":
false, "secret": false, "description": "File name", "constantName":
"org.apache.camel.component.docling.DoclingHeaders#METADATA_FILE_NAME" },
- "CamelDoclingMetadataRaw": { "index": 27, "kind": "header", "displayName":
"", "group": "producer", "label": "", "required": false, "javaType":
"Map<String, Object>", "deprecated": false, "deprecationNote": "", "autowired":
false, "secret": false, "description": "Raw metadata fields as a Map",
"constantName":
"org.apache.camel.component.docling.DoclingHeaders#METADATA_RAW" }
+ "CamelDoclingMetadataRaw": { "index": 27, "kind": "header", "displayName":
"", "group": "producer", "label": "", "required": false, "javaType":
"Map<String, Object>", "deprecated": false, "deprecationNote": "", "autowired":
false, "secret": false, "description": "Raw metadata fields as a Map",
"constantName":
"org.apache.camel.component.docling.DoclingHeaders#METADATA_RAW" },
+ "CamelDoclingChunkingTokenizer": { "index": 28, "kind": "header",
"displayName": "", "group": "producer", "label": "", "required": false,
"javaType": "String", "deprecated": false, "deprecationNote": "", "autowired":
false, "secret": false, "description": "Tokenizer for hybrid chunking (e.g.
sentence-transformers\/all-MiniLM-L6-v2)", "constantName":
"org.apache.camel.component.docling.DoclingHeaders#CHUNKING_TOKENIZER" },
+ "CamelDoclingChunkingMaxTokens": { "index": 29, "kind": "header",
"displayName": "", "group": "producer", "label": "", "required": false,
"javaType": "Integer", "deprecated": false, "deprecationNote": "", "autowired":
false, "secret": false, "description": "Maximum tokens per chunk for hybrid
chunking", "constantName":
"org.apache.camel.component.docling.DoclingHeaders#CHUNKING_MAX_TOKENS" },
+ "CamelDoclingChunkingMergePeers": { "index": 30, "kind": "header",
"displayName": "", "group": "producer", "label": "", "required": false,
"javaType": "Boolean", "deprecated": false, "deprecationNote": "", "autowired":
false, "secret": false, "description": "Whether to merge peer chunks in hybrid
chunking", "constantName":
"org.apache.camel.component.docling.DoclingHeaders#CHUNKING_MERGE_PEERS" }
},
"properties": {
"operationId": { "index": 0, "kind": "path", "displayName": "Operation
Id", "group": "producer", "label": "", "required": true, "type": "string",
"javaType": "java.lang.String", "deprecated": false, "deprecationNote": "",
"autowired": false, "secret": false, "description": "The operation identifier"
},
@@ -108,7 +116,7 @@
"enableOCR": { "index": 3, "kind": "parameter", "displayName": "Enable
OCR", "group": "producer", "label": "", "required": false, "type": "boolean",
"javaType": "boolean", "deprecated": false, "deprecationNote": "", "autowired":
false, "secret": false, "defaultValue": true, "configurationClass":
"org.apache.camel.component.docling.DoclingConfiguration",
"configurationField": "configuration", "description": "Enable OCR processing
for scanned documents" },
"includeLayoutInfo": { "index": 4, "kind": "parameter", "displayName":
"Include Layout Info", "group": "producer", "label": "", "required": false,
"type": "boolean", "javaType": "boolean", "deprecated": false,
"deprecationNote": "", "autowired": false, "secret": false, "defaultValue":
false, "configurationClass":
"org.apache.camel.component.docling.DoclingConfiguration",
"configurationField": "configuration", "description": "Show layout information
with bounding boxes" },
"ocrLanguage": { "index": 5, "kind": "parameter", "displayName": "Ocr
Language", "group": "producer", "label": "", "required": false, "type":
"string", "javaType": "java.lang.String", "deprecated": false,
"deprecationNote": "", "autowired": false, "secret": false, "defaultValue":
"en", "configurationClass":
"org.apache.camel.component.docling.DoclingConfiguration",
"configurationField": "configuration", "description": "Language code for OCR
processing" },
- "operation": { "index": 6, "kind": "parameter", "displayName":
"Operation", "group": "producer", "label": "", "required": true, "type":
"enum", "javaType": "org.apache.camel.component.docling.DoclingOperations",
"enum": [ "CONVERT_TO_MARKDOWN", "CONVERT_TO_HTML", "CONVERT_TO_JSON",
"EXTRACT_TEXT", "EXTRACT_STRUCTURED_DATA", "SUBMIT_ASYNC_CONVERSION",
"CHECK_CONVERSION_STATUS", "BATCH_CONVERT_TO_MARKDOWN",
"BATCH_CONVERT_TO_HTML", "BATCH_CONVERT_TO_JSON", "BATCH_EXTRACT_TEXT", "BATCH_
[...]
+ "operation": { "index": 6, "kind": "parameter", "displayName":
"Operation", "group": "producer", "label": "", "required": true, "type":
"enum", "javaType": "org.apache.camel.component.docling.DoclingOperations",
"enum": [ "CONVERT_TO_MARKDOWN", "CONVERT_TO_HTML", "CONVERT_TO_JSON",
"EXTRACT_TEXT", "EXTRACT_STRUCTURED_DATA", "SUBMIT_ASYNC_CONVERSION",
"CHECK_CONVERSION_STATUS", "BATCH_CONVERT_TO_MARKDOWN",
"BATCH_CONVERT_TO_HTML", "BATCH_CONVERT_TO_JSON", "BATCH_EXTRACT_TEXT", "BATCH_
[...]
"outputFormat": { "index": 7, "kind": "parameter", "displayName": "Output
Format", "group": "producer", "label": "", "required": false, "type": "string",
"javaType": "java.lang.String", "deprecated": false, "deprecationNote": "",
"autowired": false, "secret": false, "defaultValue": "markdown",
"configurationClass":
"org.apache.camel.component.docling.DoclingConfiguration",
"configurationField": "configuration", "description": "Output format for
document conversion" },
"useDoclingServe": { "index": 8, "kind": "parameter", "displayName": "Use
Docling Serve", "group": "producer", "label": "", "required": false, "type":
"boolean", "javaType": "boolean", "deprecated": false, "deprecationNote": "",
"autowired": false, "secret": false, "defaultValue": false,
"configurationClass":
"org.apache.camel.component.docling.DoclingConfiguration",
"configurationField": "configuration", "description": "Use docling-serve API
instead of CLI command" },
"lazyStartProducer": { "index": 9, "kind": "parameter", "displayName":
"Lazy Start Producer", "group": "producer (advanced)", "label":
"producer,advanced", "required": false, "type": "boolean", "javaType":
"boolean", "deprecated": false, "autowired": false, "secret": false,
"defaultValue": false, "description": "Whether the producer should be started
lazy (on the first message). By starting lazy you can use this to allow
CamelContext and routes to startup in situations where a produc [...]
@@ -141,11 +149,16 @@
"batchSize": { "index": 36, "kind": "parameter", "displayName": "Batch
Size", "group": "batch", "label": "batch", "required": false, "type":
"integer", "javaType": "int", "deprecated": false, "deprecationNote": "",
"autowired": false, "secret": false, "defaultValue": 10, "configurationClass":
"org.apache.camel.component.docling.DoclingConfiguration",
"configurationField": "configuration", "description": "Maximum number of
documents to process in a single batch (batch operations only)" },
"batchTimeout": { "index": 37, "kind": "parameter", "displayName": "Batch
Timeout", "group": "batch", "label": "batch", "required": false, "type":
"integer", "javaType": "long", "deprecated": false, "deprecationNote": "",
"autowired": false, "secret": false, "defaultValue": 300000,
"configurationClass":
"org.apache.camel.component.docling.DoclingConfiguration",
"configurationField": "configuration", "description": "Maximum time to wait for
batch completion in milliseconds" },
"splitBatchResults": { "index": 38, "kind": "parameter", "displayName":
"Split Batch Results", "group": "batch", "label": "batch", "required": false,
"type": "boolean", "javaType": "boolean", "deprecated": false,
"deprecationNote": "", "autowired": false, "secret": false, "defaultValue":
false, "configurationClass":
"org.apache.camel.component.docling.DoclingConfiguration",
"configurationField": "configuration", "description": "Split batch results into
individual exchanges (one per d [...]
- "includeMetadataInHeaders": { "index": 39, "kind": "parameter",
"displayName": "Include Metadata In Headers", "group": "metadata", "label":
"metadata", "required": false, "type": "boolean", "javaType": "boolean",
"deprecated": false, "deprecationNote": "", "autowired": false, "secret":
false, "defaultValue": true, "configurationClass":
"org.apache.camel.component.docling.DoclingConfiguration",
"configurationField": "configuration", "description": "Include metadata in
message headers [...]
- "includeRawMetadata": { "index": 40, "kind": "parameter", "displayName":
"Include Raw Metadata", "group": "metadata", "label": "metadata", "required":
false, "type": "boolean", "javaType": "boolean", "deprecated": false,
"deprecationNote": "", "autowired": false, "secret": false, "defaultValue":
false, "configurationClass":
"org.apache.camel.component.docling.DoclingConfiguration",
"configurationField": "configuration", "description": "Include raw metadata as
returned by the parser" },
- "apiKeyHeader": { "index": 41, "kind": "parameter", "displayName": "Api
Key Header", "group": "security", "label": "security", "required": false,
"type": "string", "javaType": "java.lang.String", "deprecated": false,
"deprecationNote": "", "autowired": false, "secret": false, "defaultValue":
"X-API-Key", "configurationClass":
"org.apache.camel.component.docling.DoclingConfiguration",
"configurationField": "configuration", "description": "Header name for API key
authentication" },
- "authenticationScheme": { "index": 42, "kind": "parameter", "displayName":
"Authentication Scheme", "group": "security", "label": "security", "required":
false, "type": "enum", "javaType":
"org.apache.camel.component.docling.AuthenticationScheme", "enum": [ "NONE",
"BEARER", "API_KEY" ], "deprecated": false, "deprecationNote": "", "autowired":
false, "secret": false, "defaultValue": "NONE", "configurationClass":
"org.apache.camel.component.docling.DoclingConfiguration", "configuratio [...]
- "authenticationToken": { "index": 43, "kind": "parameter", "displayName":
"Authentication Token", "group": "security", "label": "security", "required":
false, "type": "string", "javaType": "java.lang.String", "deprecated": false,
"deprecationNote": "", "autowired": false, "secret": true,
"configurationClass":
"org.apache.camel.component.docling.DoclingConfiguration",
"configurationField": "configuration", "description": "Authentication token for
docling-serve API (Bearer token or API [...]
- "maxFileSize": { "index": 44, "kind": "parameter", "displayName": "Max
File Size", "group": "security", "label": "security", "required": false,
"type": "integer", "javaType": "long", "deprecated": false, "deprecationNote":
"", "autowired": false, "secret": false, "defaultValue": 52428800,
"configurationClass":
"org.apache.camel.component.docling.DoclingConfiguration",
"configurationField": "configuration", "description": "Maximum file size in
bytes for processing" }
+ "chunkingIncludeRawText": { "index": 39, "kind": "parameter",
"displayName": "Chunking Include Raw Text", "group": "chunking", "label":
"chunking", "required": false, "type": "boolean", "javaType":
"java.lang.Boolean", "deprecated": false, "deprecationNote": "", "autowired":
false, "secret": false, "defaultValue": false, "configurationClass":
"org.apache.camel.component.docling.DoclingConfiguration",
"configurationField": "configuration", "description": "Include raw text in
chunk output" },
+ "chunkingMaxTokens": { "index": 40, "kind": "parameter", "displayName":
"Chunking Max Tokens", "group": "chunking", "label": "chunking", "required":
false, "type": "integer", "javaType": "java.lang.Integer", "deprecated": false,
"deprecationNote": "", "autowired": false, "secret": false,
"configurationClass":
"org.apache.camel.component.docling.DoclingConfiguration",
"configurationField": "configuration", "description": "Maximum number of tokens
per chunk for hybrid chunking" },
+ "chunkingMergePeers": { "index": 41, "kind": "parameter", "displayName":
"Chunking Merge Peers", "group": "chunking", "label": "chunking", "required":
false, "type": "boolean", "javaType": "java.lang.Boolean", "deprecated": false,
"deprecationNote": "", "autowired": false, "secret": false, "defaultValue":
true, "configurationClass":
"org.apache.camel.component.docling.DoclingConfiguration",
"configurationField": "configuration", "description": "Whether to merge peer
chunks in hybrid [...]
+ "chunkingTokenizer": { "index": 42, "kind": "parameter", "displayName":
"Chunking Tokenizer", "group": "chunking", "label": "chunking", "required":
false, "type": "string", "javaType": "java.lang.String", "deprecated": false,
"deprecationNote": "", "autowired": false, "secret": false,
"configurationClass":
"org.apache.camel.component.docling.DoclingConfiguration",
"configurationField": "configuration", "description": "Tokenizer model for
hybrid chunking (e.g. sentence-transformers\/a [...]
+ "chunkingUseMarkdownTables": { "index": 43, "kind": "parameter",
"displayName": "Chunking Use Markdown Tables", "group": "chunking", "label":
"chunking", "required": false, "type": "boolean", "javaType":
"java.lang.Boolean", "deprecated": false, "deprecationNote": "", "autowired":
false, "secret": false, "defaultValue": false, "configurationClass":
"org.apache.camel.component.docling.DoclingConfiguration",
"configurationField": "configuration", "description": "Use markdown format for
[...]
+ "includeMetadataInHeaders": { "index": 44, "kind": "parameter",
"displayName": "Include Metadata In Headers", "group": "metadata", "label":
"metadata", "required": false, "type": "boolean", "javaType": "boolean",
"deprecated": false, "deprecationNote": "", "autowired": false, "secret":
false, "defaultValue": true, "configurationClass":
"org.apache.camel.component.docling.DoclingConfiguration",
"configurationField": "configuration", "description": "Include metadata in
message headers [...]
+ "includeRawMetadata": { "index": 45, "kind": "parameter", "displayName":
"Include Raw Metadata", "group": "metadata", "label": "metadata", "required":
false, "type": "boolean", "javaType": "boolean", "deprecated": false,
"deprecationNote": "", "autowired": false, "secret": false, "defaultValue":
false, "configurationClass":
"org.apache.camel.component.docling.DoclingConfiguration",
"configurationField": "configuration", "description": "Include raw metadata as
returned by the parser" },
+ "apiKeyHeader": { "index": 46, "kind": "parameter", "displayName": "Api
Key Header", "group": "security", "label": "security", "required": false,
"type": "string", "javaType": "java.lang.String", "deprecated": false,
"deprecationNote": "", "autowired": false, "secret": false, "defaultValue":
"X-API-Key", "configurationClass":
"org.apache.camel.component.docling.DoclingConfiguration",
"configurationField": "configuration", "description": "Header name for API key
authentication" },
+ "authenticationScheme": { "index": 47, "kind": "parameter", "displayName":
"Authentication Scheme", "group": "security", "label": "security", "required":
false, "type": "enum", "javaType":
"org.apache.camel.component.docling.AuthenticationScheme", "enum": [ "NONE",
"BEARER", "API_KEY" ], "deprecated": false, "deprecationNote": "", "autowired":
false, "secret": false, "defaultValue": "NONE", "configurationClass":
"org.apache.camel.component.docling.DoclingConfiguration", "configuratio [...]
+ "authenticationToken": { "index": 48, "kind": "parameter", "displayName":
"Authentication Token", "group": "security", "label": "security", "required":
false, "type": "string", "javaType": "java.lang.String", "deprecated": false,
"deprecationNote": "", "autowired": false, "secret": true,
"configurationClass":
"org.apache.camel.component.docling.DoclingConfiguration",
"configurationField": "configuration", "description": "Authentication token for
docling-serve API (Bearer token or API [...]
+ "maxFileSize": { "index": 49, "kind": "parameter", "displayName": "Max
File Size", "group": "security", "label": "security", "required": false,
"type": "integer", "javaType": "long", "deprecated": false, "deprecationNote":
"", "autowired": false, "secret": false, "defaultValue": 52428800,
"configurationClass":
"org.apache.camel.component.docling.DoclingConfiguration",
"configurationField": "configuration", "description": "Maximum file size in
bytes for processing" }
}
}
diff --git
a/components/camel-ai/camel-docling/src/main/docs/docling-component.adoc
b/components/camel-ai/camel-docling/src/main/docs/docling-component.adoc
index 19762d7ce91d..c227d8bd26e6 100644
--- a/components/camel-ai/camel-docling/src/main/docs/docling-component.adoc
+++ b/components/camel-ai/camel-docling/src/main/docs/docling-component.adoc
@@ -99,6 +99,12 @@ The component supports the following operations:
| `CHECK_CONVERSION_STATUS`
| Check the status of an async conversion task (docling-serve only)
+| `CHUNK_HYBRID`
+| Chunk document using HybridChunker — token-aware and structure-aware
(docling-serve only)
+
+| `CHUNK_HIERARCHICAL`
+| Chunk document using HierarchicalChunker — structure-aware (docling-serve
only)
+
|===
// component options: START
@@ -897,6 +903,7 @@ YAML::
steps:
- to:
"file:///data/failed?fileName=${body.documentId}.error"
----
+====
NOTE: For the aggregation example above, you can also use a custom processor.
Create a Java class:
@@ -923,7 +930,6 @@ Then reference it in the YAML:
- name: documentListProcessor
type: "com.example.DocumentListProcessor"
----
-====
=== Batch Processing with Error Handling
@@ -1308,6 +1314,181 @@ YAML::
- `CamelDoclingBatchFailureCount`
- `CamelDoclingBatchProcessingTime`
+== Document Chunking
+
+The component supports document chunking via docling-serve, which splits
documents into semantically meaningful chunks suitable for RAG
(Retrieval-Augmented Generation) pipelines, vector databases, and other NLP
workflows. Unlike naive text splitting, docling chunking is structure-aware —
it respects document headings, paragraphs, and tables — and the hybrid chunker
is also token-aware, ensuring chunks fit within model token limits.
+
+NOTE: Both chunking operations require `useDoclingServe=true`. The Docling CLI
does not expose chunking as a command — chunking is only available through the
docling-serve REST API.
+
+`CHUNK_HYBRID` uses the HybridChunker, which is both structure-aware and
token-aware. It respects document structure while ensuring each chunk stays
within a configurable token limit. Best for RAG pipelines with embedding models
that have fixed token windows. Configure it with the `chunkingTokenizer`,
`chunkingMaxTokens`, and `chunkingMergePeers` options (see the endpoint options
table above).
+
+`CHUNK_HIERARCHICAL` uses the HierarchicalChunker, which is structure-aware
only. It splits at document structure boundaries (sections, paragraphs) without
enforcing token limits. Best when chunk size is less important than preserving
complete structural units.
+
+When `contentInBody=true`, the exchange body is set to a `List<Chunk>`
(`ai.docling.serve.api.chunk.response.Chunk`), ready for use with Camel's
`.split(body())` EIP. Each `Chunk` object provides:
+
+- `text` — the chunk text content
+- `chunkIndex` — zero-based position in the document
+- `filename` — source document filename
+- `headings` — list of heading strings leading to this chunk
+- `pageNumbers` — list of page numbers this chunk spans
+- `captions` — list of captions (e.g., table or figure captions)
+- `numTokens` — token count (hybrid chunker only)
+
+When `contentInBody=false`, the full `ChunkDocumentResponse` is returned.
+
+=== Basic Chunking
+
+[tabs]
+====
+Java::
++
+[source,java]
+----
+from("file:///data/documents?include=.*\\.pdf")
+ .setHeader("CamelDoclingInputFilePath", simple("${file:absolute.path}"))
+ .to("docling:CHUNK_HYBRID?" +
+ "useDoclingServe=true&" +
+ "contentInBody=true&" +
+ "chunkingTokenizer=sentence-transformers/all-MiniLM-L6-v2&" +
+ "chunkingMaxTokens=128&" +
+ "chunkingMergePeers=true")
+ .split(body())
+ .log("Chunk ${body.chunkIndex}: ${body.text}")
+ .end();
+----
+
+YAML::
++
+[source,yaml]
+----
+- route:
+ from:
+ uri: "file:///data/documents"
+ parameters:
+ include: ".*\\.pdf"
+ steps:
+ - setHeader:
+ name: CamelDoclingInputFilePath
+ simple: "${file:absolute.path}"
+ - to:
+ uri: "docling:CHUNK_HYBRID"
+ parameters:
+ useDoclingServe: true
+ contentInBody: true
+ chunkingTokenizer: "sentence-transformers/all-MiniLM-L6-v2"
+ chunkingMaxTokens: 128
+ chunkingMergePeers: true
+ - split:
+ simple: "${body}"
+ steps:
+ - log: "Chunk ${body.chunkIndex}: ${body.text}"
+----
+====
+
+=== Chunking for RAG Pipelines
+
+A common use case is to chunk documents, generate embeddings, and store them
in a vector database:
+
+[tabs]
+====
+Java::
++
+[source,java]
+----
+from("direct:ingest-pdf")
+ .setHeader("CamelDoclingInputFilePath", header("pdfFilePath"))
+ .to("docling:CHUNK_HYBRID?" +
+ "useDoclingServe=true&" +
+ "contentInBody=true&" +
+ "chunkingTokenizer={{embedding.tokenizer}}&" +
+ "chunkingMaxTokens={{embedding.max-tokens}}&" +
+ "chunkingMergePeers=true")
+ .split(body())
+ .setBody(simple("${body.text}"))
+ .to("openai:embeddings?embeddingModel={{embedding.model}}")
+ // store embedding in vector database
+ .to("direct:store-embedding")
+ .end();
+----
+
+YAML::
++
+[source,yaml]
+----
+- route:
+ id: ingest-pdf
+ from:
+ uri: "direct:ingest-pdf"
+ steps:
+ - setHeader:
+ name: CamelDoclingInputFilePath
+ simple: "${header.pdfFilePath}"
+ - to:
+ uri: "docling:CHUNK_HYBRID"
+ parameters:
+ useDoclingServe: true
+ contentInBody: true
+ chunkingTokenizer: "{{embedding.tokenizer}}"
+ chunkingMaxTokens: "{{embedding.max-tokens}}"
+ chunkingMergePeers: true
+ - split:
+ simple: "${body}"
+ steps:
+ - setBody:
+ simple: "${body.text}"
+ - to:
+ uri: "openai:embeddings"
+ parameters:
+ embeddingModel: "{{embedding.model}}"
+ - to: "direct:store-embedding"
+----
+====
+
+=== Hierarchical Chunking
+
+Use hierarchical chunking when you want to preserve complete structural units
without token limits:
+
+[tabs]
+====
+Java::
++
+[source,java]
+----
+from("file:///data/documents?include=.*\\.pdf")
+ .setHeader("CamelDoclingInputFilePath", simple("${file:absolute.path}"))
+ .to("docling:CHUNK_HIERARCHICAL?" +
+ "useDoclingServe=true&" +
+ "contentInBody=true")
+ .split(body())
+ .log("Section [${body.headings}] page ${body.pageNumbers}:
${body.text}")
+ .end();
+----
+
+YAML::
++
+[source,yaml]
+----
+- route:
+ from:
+ uri: "file:///data/documents"
+ parameters:
+ include: ".*\\.pdf"
+ steps:
+ - setHeader:
+ name: CamelDoclingInputFilePath
+ simple: "${file:absolute.path}"
+ - to:
+ uri: "docling:CHUNK_HIERARCHICAL"
+ parameters:
+ useDoclingServe: true
+ contentInBody: true
+ - split:
+ simple: "${body}"
+ steps:
+ - log: "Section [${body.headings}] page ${body.pageNumbers}:
${body.text}"
+----
+====
+
== Asynchronous Processing
The component supports asynchronous document conversion when using
docling-serve API mode. This is particularly useful for:
diff --git
a/components/camel-ai/camel-docling/src/main/java/org/apache/camel/component/docling/DoclingConfiguration.java
b/components/camel-ai/camel-docling/src/main/java/org/apache/camel/component/docling/DoclingConfiguration.java
index 0a965bbef4bb..76f4506d6ba8 100644
---
a/components/camel-ai/camel-docling/src/main/java/org/apache/camel/component/docling/DoclingConfiguration.java
+++
b/components/camel-ai/camel-docling/src/main/java/org/apache/camel/component/docling/DoclingConfiguration.java
@@ -31,7 +31,7 @@ public class DoclingConfiguration implements Cloneable {
@UriParam
@Metadata(required = true, defaultValue = "CONVERT_TO_MARKDOWN",
description = "The operation to perform",
- enums =
"CONVERT_TO_MARKDOWN,CONVERT_TO_HTML,CONVERT_TO_JSON,EXTRACT_TEXT,EXTRACT_STRUCTURED_DATA,SUBMIT_ASYNC_CONVERSION,CHECK_CONVERSION_STATUS,BATCH_CONVERT_TO_MARKDOWN,BATCH_CONVERT_TO_HTML,BATCH_CONVERT_TO_JSON,BATCH_EXTRACT_TEXT,BATCH_EXTRACT_STRUCTURED_DATA,EXTRACT_METADATA")
+ enums =
"CONVERT_TO_MARKDOWN,CONVERT_TO_HTML,CONVERT_TO_JSON,EXTRACT_TEXT,EXTRACT_STRUCTURED_DATA,SUBMIT_ASYNC_CONVERSION,CHECK_CONVERSION_STATUS,BATCH_CONVERT_TO_MARKDOWN,BATCH_CONVERT_TO_HTML,BATCH_CONVERT_TO_JSON,BATCH_EXTRACT_TEXT,BATCH_EXTRACT_STRUCTURED_DATA,EXTRACT_METADATA,CHUNK_HYBRID,CHUNK_HIERARCHICAL")
private DoclingOperations operation =
DoclingOperations.CONVERT_TO_MARKDOWN;
@UriParam(label = "advanced")
@@ -209,6 +209,26 @@ public class DoclingConfiguration implements Cloneable {
@Metadata(description = "Placeholder string for page breaks in markdown
output")
private String mdPageBreakPlaceholder;
+ @UriParam(label = "chunking")
+ @Metadata(description = "Tokenizer model for hybrid chunking (e.g.
sentence-transformers/all-MiniLM-L6-v2)")
+ private String chunkingTokenizer;
+
+ @UriParam(label = "chunking")
+ @Metadata(description = "Maximum number of tokens per chunk for hybrid
chunking")
+ private Integer chunkingMaxTokens;
+
+ @UriParam(label = "chunking")
+ @Metadata(description = "Whether to merge peer chunks in hybrid chunking",
defaultValue = "true")
+ private Boolean chunkingMergePeers;
+
+ @UriParam(label = "chunking")
+ @Metadata(description = "Include raw text in chunk output", defaultValue =
"false")
+ private Boolean chunkingIncludeRawText;
+
+ @UriParam(label = "chunking")
+ @Metadata(description = "Use markdown format for tables in chunk output",
defaultValue = "false")
+ private Boolean chunkingUseMarkdownTables;
+
public DoclingOperations getOperation() {
return operation;
}
@@ -553,6 +573,46 @@ public class DoclingConfiguration implements Cloneable {
this.mdPageBreakPlaceholder = mdPageBreakPlaceholder;
}
+ public String getChunkingTokenizer() {
+ return chunkingTokenizer;
+ }
+
+ public void setChunkingTokenizer(String chunkingTokenizer) {
+ this.chunkingTokenizer = chunkingTokenizer;
+ }
+
+ public Integer getChunkingMaxTokens() {
+ return chunkingMaxTokens;
+ }
+
+ public void setChunkingMaxTokens(Integer chunkingMaxTokens) {
+ this.chunkingMaxTokens = chunkingMaxTokens;
+ }
+
+ public Boolean getChunkingMergePeers() {
+ return chunkingMergePeers;
+ }
+
+ public void setChunkingMergePeers(Boolean chunkingMergePeers) {
+ this.chunkingMergePeers = chunkingMergePeers;
+ }
+
+ public Boolean getChunkingIncludeRawText() {
+ return chunkingIncludeRawText;
+ }
+
+ public void setChunkingIncludeRawText(Boolean chunkingIncludeRawText) {
+ this.chunkingIncludeRawText = chunkingIncludeRawText;
+ }
+
+ public Boolean getChunkingUseMarkdownTables() {
+ return chunkingUseMarkdownTables;
+ }
+
+ public void setChunkingUseMarkdownTables(Boolean
chunkingUseMarkdownTables) {
+ this.chunkingUseMarkdownTables = chunkingUseMarkdownTables;
+ }
+
public DoclingConfiguration copy() {
try {
return (DoclingConfiguration) super.clone();
diff --git
a/components/camel-ai/camel-docling/src/main/java/org/apache/camel/component/docling/DoclingHeaders.java
b/components/camel-ai/camel-docling/src/main/java/org/apache/camel/component/docling/DoclingHeaders.java
index ef9b49cb4b14..5f19834daa06 100644
---
a/components/camel-ai/camel-docling/src/main/java/org/apache/camel/component/docling/DoclingHeaders.java
+++
b/components/camel-ai/camel-docling/src/main/java/org/apache/camel/component/docling/DoclingHeaders.java
@@ -105,6 +105,15 @@ public final class DoclingHeaders {
@Metadata(description = "Raw metadata fields as a Map", javaType =
"Map<String, Object>")
public static final String METADATA_RAW = "CamelDoclingMetadataRaw";
+ @Metadata(description = "Tokenizer for hybrid chunking (e.g.
sentence-transformers/all-MiniLM-L6-v2)", javaType = "String")
+ public static final String CHUNKING_TOKENIZER =
"CamelDoclingChunkingTokenizer";
+
+ @Metadata(description = "Maximum tokens per chunk for hybrid chunking",
javaType = "Integer")
+ public static final String CHUNKING_MAX_TOKENS =
"CamelDoclingChunkingMaxTokens";
+
+ @Metadata(description = "Whether to merge peer chunks in hybrid chunking",
javaType = "Boolean")
+ public static final String CHUNKING_MERGE_PEERS =
"CamelDoclingChunkingMergePeers";
+
private DoclingHeaders() {
}
diff --git
a/components/camel-ai/camel-docling/src/main/java/org/apache/camel/component/docling/DoclingOperations.java
b/components/camel-ai/camel-docling/src/main/java/org/apache/camel/component/docling/DoclingOperations.java
index afe4151a598a..4f1836256d73 100644
---
a/components/camel-ai/camel-docling/src/main/java/org/apache/camel/component/docling/DoclingOperations.java
+++
b/components/camel-ai/camel-docling/src/main/java/org/apache/camel/component/docling/DoclingOperations.java
@@ -87,6 +87,16 @@ public enum DoclingOperations {
/**
* Extract document metadata (author, title, page count, creation date,
etc.)
*/
- EXTRACT_METADATA
+ EXTRACT_METADATA,
+
+ /**
+ * Chunk document using HybridChunker (token-aware, structure-aware)
(docling-serve only)
+ */
+ CHUNK_HYBRID,
+
+ /**
+ * Chunk document using HierarchicalChunker (structure-aware)
(docling-serve only)
+ */
+ CHUNK_HIERARCHICAL
}
diff --git
a/components/camel-ai/camel-docling/src/main/java/org/apache/camel/component/docling/DoclingProducer.java
b/components/camel-ai/camel-docling/src/main/java/org/apache/camel/component/docling/DoclingProducer.java
index 38b1b7b720ea..3e7d1b15d4b7 100644
---
a/components/camel-ai/camel-docling/src/main/java/org/apache/camel/component/docling/DoclingProducer.java
+++
b/components/camel-ai/camel-docling/src/main/java/org/apache/camel/component/docling/DoclingProducer.java
@@ -44,6 +44,11 @@ import java.util.stream.Stream;
import ai.docling.core.DoclingDocument;
import ai.docling.core.DoclingDocument.DocumentOrigin;
import ai.docling.serve.api.DoclingServeApi;
+import ai.docling.serve.api.chunk.request.HierarchicalChunkDocumentRequest;
+import ai.docling.serve.api.chunk.request.HybridChunkDocumentRequest;
+import ai.docling.serve.api.chunk.request.options.HierarchicalChunkerOptions;
+import ai.docling.serve.api.chunk.request.options.HybridChunkerOptions;
+import ai.docling.serve.api.chunk.response.ChunkDocumentResponse;
import ai.docling.serve.api.convert.request.ConvertDocumentRequest;
import ai.docling.serve.api.convert.request.options.ConvertDocumentOptions;
import ai.docling.serve.api.convert.request.options.ImageRefMode;
@@ -173,6 +178,12 @@ public class DoclingProducer extends DefaultProducer {
case EXTRACT_METADATA:
processExtractMetadata(exchange);
break;
+ case CHUNK_HYBRID:
+ processChunkHybrid(exchange);
+ break;
+ case CHUNK_HIERARCHICAL:
+ processChunkHierarchical(exchange);
+ break;
default:
throw new IllegalArgumentException("Unsupported operation: " +
operation);
}
@@ -378,6 +389,130 @@ public class DoclingProducer extends DefaultProducer {
LOG.debug("Metadata extraction completed for: {}", inputPath);
}
+ private void processChunkHybrid(Exchange exchange) throws Exception {
+ LOG.debug("DoclingProducer chunking with HybridChunker");
+
+ if (!configuration.isUseDoclingServe()) {
+ throw new IllegalStateException(
+ "CHUNK_HYBRID operation requires docling-serve mode
(useDoclingServe=true)");
+ }
+
+ String inputPath = getInputPath(exchange);
+
+ // Build HybridChunkerOptions from configuration and headers
+ HybridChunkerOptions.Builder chunkerOptionsBuilder =
HybridChunkerOptions.builder();
+
+ String tokenizer =
exchange.getIn().getHeader(DoclingHeaders.CHUNKING_TOKENIZER, String.class);
+ if (tokenizer == null) {
+ tokenizer = configuration.getChunkingTokenizer();
+ }
+ if (tokenizer != null) {
+ chunkerOptionsBuilder.tokenizer(tokenizer);
+ }
+
+ Integer maxTokens =
exchange.getIn().getHeader(DoclingHeaders.CHUNKING_MAX_TOKENS, Integer.class);
+ if (maxTokens == null) {
+ maxTokens = configuration.getChunkingMaxTokens();
+ }
+ if (maxTokens != null) {
+ chunkerOptionsBuilder.maxTokens(maxTokens);
+ }
+
+ Boolean mergePeers =
exchange.getIn().getHeader(DoclingHeaders.CHUNKING_MERGE_PEERS, Boolean.class);
+ if (mergePeers == null) {
+ mergePeers = configuration.getChunkingMergePeers();
+ }
+ if (mergePeers != null) {
+ chunkerOptionsBuilder.mergePeers(mergePeers);
+ }
+
+ if (configuration.getChunkingIncludeRawText() != null) {
+
chunkerOptionsBuilder.includeRawText(configuration.getChunkingIncludeRawText());
+ }
+ if (configuration.getChunkingUseMarkdownTables() != null) {
+
chunkerOptionsBuilder.useMarkdownTables(configuration.getChunkingUseMarkdownTables());
+ }
+
+ // Build the request
+ HybridChunkDocumentRequest.Builder requestBuilder =
HybridChunkDocumentRequest.builder();
+ addSourceToChunkRequest(requestBuilder, inputPath);
+ requestBuilder.chunkingOptions(chunkerOptionsBuilder.build());
+
+ HybridChunkDocumentRequest request = requestBuilder.build();
+ ChunkDocumentResponse response =
doclingServeApi.chunkSourceWithHybridChunker(request);
+
+ if (configuration.isContentInBody()) {
+ exchange.getIn().setBody(response.getChunks());
+ } else {
+ exchange.getIn().setBody(response);
+ }
+
+ LOG.debug("HybridChunker produced {} chunks", response.getChunks() !=
null ? response.getChunks().size() : 0);
+ }
+
+ private void processChunkHierarchical(Exchange exchange) throws Exception {
+ LOG.debug("DoclingProducer chunking with HierarchicalChunker");
+
+ if (!configuration.isUseDoclingServe()) {
+ throw new IllegalStateException(
+ "CHUNK_HIERARCHICAL operation requires docling-serve mode
(useDoclingServe=true)");
+ }
+
+ String inputPath = getInputPath(exchange);
+
+ // Build HierarchicalChunkerOptions from configuration
+ HierarchicalChunkerOptions.Builder chunkerOptionsBuilder =
HierarchicalChunkerOptions.builder();
+
+ if (configuration.getChunkingIncludeRawText() != null) {
+
chunkerOptionsBuilder.includeRawText(configuration.getChunkingIncludeRawText());
+ }
+ if (configuration.getChunkingUseMarkdownTables() != null) {
+
chunkerOptionsBuilder.useMarkdownTables(configuration.getChunkingUseMarkdownTables());
+ }
+
+ // Build the request
+ HierarchicalChunkDocumentRequest.Builder requestBuilder =
HierarchicalChunkDocumentRequest.builder();
+ addSourceToChunkRequest(requestBuilder, inputPath);
+ requestBuilder.chunkingOptions(chunkerOptionsBuilder.build());
+
+ HierarchicalChunkDocumentRequest request = requestBuilder.build();
+ ChunkDocumentResponse response =
doclingServeApi.chunkSourceWithHierarchicalChunker(request);
+
+ if (configuration.isContentInBody()) {
+ exchange.getIn().setBody(response.getChunks());
+ } else {
+ exchange.getIn().setBody(response);
+ }
+
+ LOG.debug("HierarchicalChunker produced {} chunks",
+ response.getChunks() != null ? response.getChunks().size() :
0);
+ }
+
+ private void addSourceToChunkRequest(
+ ai.docling.serve.api.chunk.request.ChunkDocumentRequest.Builder
requestBuilder, String inputSource)
+ throws IOException {
+ if (inputSource.startsWith("http://") ||
inputSource.startsWith("https://")) {
+ requestBuilder.source(
+ HttpSource.builder()
+ .url(URI.create(inputSource))
+ .build());
+ } else {
+ File file = new File(inputSource);
+ if (!file.exists()) {
+ throw new IOException("File not found: " + inputSource);
+ }
+
+ byte[] fileBytes = Files.readAllBytes(file.toPath());
+ String base64Content =
Base64.getEncoder().encodeToString(fileBytes);
+
+ requestBuilder.source(
+ FileSource.builder()
+ .filename(file.getName())
+ .base64String(base64Content)
+ .build());
+ }
+ }
+
private DocumentMetadata extractMetadataUsingApi(String inputPath) throws
IOException {
LOG.debug("Extracting metadata using docling-java: {}", inputPath);
diff --git
a/components/camel-ai/camel-docling/src/test/java/org/apache/camel/component/docling/integration/ChunkingIT.java
b/components/camel-ai/camel-docling/src/test/java/org/apache/camel/component/docling/integration/ChunkingIT.java
new file mode 100644
index 000000000000..96ebd2df9b54
--- /dev/null
+++
b/components/camel-ai/camel-docling/src/test/java/org/apache/camel/component/docling/integration/ChunkingIT.java
@@ -0,0 +1,206 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.camel.component.docling.integration;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.StandardCopyOption;
+import java.util.List;
+import java.util.Map;
+
+import ai.docling.serve.api.chunk.response.Chunk;
+import ai.docling.serve.api.chunk.response.ChunkDocumentResponse;
+import org.apache.camel.Exchange;
+import org.apache.camel.builder.RouteBuilder;
+import org.apache.camel.component.docling.DoclingHeaders;
+import org.junit.jupiter.api.Test;
+import org.junit.jupiter.api.condition.DisabledIfSystemProperty;
+
+import static org.assertj.core.api.Assertions.assertThat;
+
+@DisabledIfSystemProperty(named = "ci.env.name", matches = ".*",
disabledReason = "Too much resources on GitHub Actions")
+class ChunkingIT extends DoclingITestSupport {
+
+ @Test
+ void chunkWithHybridChunker() throws Exception {
+ Path testFile = createTestFile();
+
+ Exchange result = template.request("direct:chunk-hybrid",
+ e -> e.getIn().setHeader(DoclingHeaders.INPUT_FILE_PATH,
testFile.toString()));
+
+ @SuppressWarnings("unchecked")
+ List<Chunk> chunks = result.getIn().getBody(List.class);
+
+ assertThat(chunks).isNotEmpty();
+ assertThat(chunks).allSatisfy(chunk -> {
+ assertThat(chunk.getText()).isNotBlank();
+ assertThat(chunk.getChunkIndex()).isGreaterThanOrEqualTo(0);
+ });
+
+ LOG.info("HybridChunker produced {} chunks from markdown",
chunks.size());
+ }
+
+ @Test
+ void chunkWithHierarchicalChunker() throws Exception {
+ Path testFile = createTestFile();
+
+ Exchange result = template.request("direct:chunk-hierarchical",
+ e -> e.getIn().setHeader(DoclingHeaders.INPUT_FILE_PATH,
testFile.toString()));
+
+ @SuppressWarnings("unchecked")
+ List<Chunk> chunks = result.getIn().getBody(List.class);
+
+ assertThat(chunks).isNotEmpty();
+ assertThat(chunks).allSatisfy(chunk -> {
+ assertThat(chunk.getText()).isNotBlank();
+ });
+
+ LOG.info("HierarchicalChunker produced {} chunks from markdown",
chunks.size());
+ }
+
+ @Test
+ void chunkHybridReturnsFullResponse() throws Exception {
+ Path testFile = createTestFile();
+
+ Exchange result = template.request("direct:chunk-hybrid-full-response",
+ e -> e.getIn().setHeader(DoclingHeaders.INPUT_FILE_PATH,
testFile.toString()));
+
+ ChunkDocumentResponse response =
result.getIn().getBody(ChunkDocumentResponse.class);
+
+ assertThat(response).isNotNull();
+ assertThat(response.getChunks()).isNotEmpty();
+ }
+
+ @Test
+ void chunkHybridWithHeaderOverrides() throws Exception {
+ Path testFile = createTestFile();
+
+ Exchange result = template.request("direct:chunk-hybrid", e -> {
+ e.getIn().setHeader(DoclingHeaders.INPUT_FILE_PATH,
testFile.toString());
+ e.getIn().setHeader(DoclingHeaders.CHUNKING_TOKENIZER,
"sentence-transformers/all-MiniLM-L6-v2");
+ e.getIn().setHeader(DoclingHeaders.CHUNKING_MAX_TOKENS, 64);
+ e.getIn().setHeader(DoclingHeaders.CHUNKING_MERGE_PEERS, true);
+ });
+
+ @SuppressWarnings("unchecked")
+ List<Chunk> chunks = result.getIn().getBody(List.class);
+
+ assertThat(chunks).isNotEmpty();
+
+ LOG.info("HybridChunker with header overrides produced {} chunks",
chunks.size());
+ }
+
+ @Test
+ void chunkHybridFromPdf() throws Exception {
+ Path testFile = createMultiChapterPdf();
+
+ Exchange result = template.request("direct:chunk-hybrid",
+ e -> e.getIn().setHeader(DoclingHeaders.INPUT_FILE_PATH,
testFile.toString()));
+
+ @SuppressWarnings("unchecked")
+ List<Chunk> chunks = result.getIn().getBody(List.class);
+
+ assertThat(chunks).hasSizeGreaterThan(1);
+ assertThat(chunks).allSatisfy(chunk -> {
+ assertThat(chunk.getText()).isNotBlank();
+ assertThat(chunk.getFilename()).isNotBlank();
+ });
+
+ LOG.info("HybridChunker produced {} chunks from multi-chapter PDF",
chunks.size());
+ }
+
+ @Test
+ void chunkHybridWithOperationHeader() throws Exception {
+ Path testFile = createTestFile();
+
+ Map<String, Object> headers = Map.of(
+ DoclingHeaders.INPUT_FILE_PATH, testFile.toString(),
+ DoclingHeaders.OPERATION, "CHUNK_HYBRID");
+
+ Exchange result = template.request("direct:chunk-via-header",
+ e -> e.getIn().setHeaders(headers));
+
+ @SuppressWarnings("unchecked")
+ List<Chunk> chunks = result.getIn().getBody(List.class);
+
+ assertThat(chunks).isNotEmpty();
+ }
+
+ private Path createTestFile() throws Exception {
+ Path tempFile = Files.createTempFile("docling-chunk-test-", ".md");
+ String content = """
+ # Apache Camel Overview
+
+ Apache Camel is an open-source integration framework based on
known
+ Enterprise Integration Patterns. It provides a routing and
mediation
+ engine that allows developers to define routing rules.
+
+ ## Components
+
+ Camel provides over 300 components for connecting to external
systems
+ including databases, message brokers, cloud services, and APIs.
+
+ ### Kafka Component
+
+ The Kafka component enables integration with Apache Kafka for
+ high-throughput messaging and event streaming.
+
+ ### HTTP Component
+
+ The HTTP component allows sending and receiving HTTP requests,
+ supporting both synchronous and asynchronous communication.
+
+ ## Architecture
+
+ Camel uses a pipeline architecture where messages flow through
a
+ series of processors. Each processor can transform, filter, or
+ route messages based on content or headers.
+ """;
+ Files.write(tempFile, content.getBytes());
+ return tempFile;
+ }
+
+ private Path createMultiChapterPdf() throws IOException {
+ try (InputStream is =
getClass().getClassLoader().getResourceAsStream("multi_chapter_lorem.pdf")) {
+ Path tempFile =
Files.createTempFile("docling-chunk-test-multichapter", ".pdf");
+ Files.copy(is, tempFile.toAbsolutePath(),
StandardCopyOption.REPLACE_EXISTING);
+ return tempFile;
+ }
+ }
+
+ @Override
+ protected RouteBuilder createRouteBuilder() throws Exception {
+ return new RouteBuilder() {
+ @Override
+ public void configure() throws Exception {
+ from("direct:chunk-hybrid")
+
.to("docling:convert?operation=CHUNK_HYBRID&contentInBody=true");
+
+ from("direct:chunk-hierarchical")
+
.to("docling:convert?operation=CHUNK_HIERARCHICAL&contentInBody=true");
+
+ from("direct:chunk-hybrid-full-response")
+
.to("docling:convert?operation=CHUNK_HYBRID&contentInBody=false");
+
+ from("direct:chunk-via-header")
+ .to("docling:convert?contentInBody=true");
+ }
+ };
+ }
+}
diff --git
a/components/camel-ai/camel-docling/src/test/resources/multi_chapter_lorem.pdf
b/components/camel-ai/camel-docling/src/test/resources/multi_chapter_lorem.pdf
new file mode 100644
index 000000000000..6d4ce1f252af
--- /dev/null
+++
b/components/camel-ai/camel-docling/src/test/resources/multi_chapter_lorem.pdf
@@ -0,0 +1,150 @@
+%PDF-1.4
+%���� ReportLab Generated PDF document (opensource)
+1 0 obj
+<<
+/F1 2 0 R /F2 3 0 R
+>>
+endobj
+2 0 obj
+<<
+/BaseFont /Helvetica /Encoding /WinAnsiEncoding /Name /F1 /Subtype /Type1
/Type /Font
+>>
+endobj
+3 0 obj
+<<
+/BaseFont /Helvetica-Bold /Encoding /WinAnsiEncoding /Name /F2 /Subtype /Type1
/Type /Font
+>>
+endobj
+4 0 obj
+<<
+/Contents 12 0 R /MediaBox [ 0 0 612 792 ] /Parent 11 0 R /Resources <<
+/Font 1 0 R /ProcSet [ /PDF /Text /ImageB /ImageC /ImageI ]
+>> /Rotate 0 /Trans <<
+
+>>
+ /Type /Page
+>>
+endobj
+5 0 obj
+<<
+/Contents 13 0 R /MediaBox [ 0 0 612 792 ] /Parent 11 0 R /Resources <<
+/Font 1 0 R /ProcSet [ /PDF /Text /ImageB /ImageC /ImageI ]
+>> /Rotate 0 /Trans <<
+
+>>
+ /Type /Page
+>>
+endobj
+6 0 obj
+<<
+/Contents 14 0 R /MediaBox [ 0 0 612 792 ] /Parent 11 0 R /Resources <<
+/Font 1 0 R /ProcSet [ /PDF /Text /ImageB /ImageC /ImageI ]
+>> /Rotate 0 /Trans <<
+
+>>
+ /Type /Page
+>>
+endobj
+7 0 obj
+<<
+/Contents 15 0 R /MediaBox [ 0 0 612 792 ] /Parent 11 0 R /Resources <<
+/Font 1 0 R /ProcSet [ /PDF /Text /ImageB /ImageC /ImageI ]
+>> /Rotate 0 /Trans <<
+
+>>
+ /Type /Page
+>>
+endobj
+8 0 obj
+<<
+/Contents 16 0 R /MediaBox [ 0 0 612 792 ] /Parent 11 0 R /Resources <<
+/Font 1 0 R /ProcSet [ /PDF /Text /ImageB /ImageC /ImageI ]
+>> /Rotate 0 /Trans <<
+
+>>
+ /Type /Page
+>>
+endobj
+9 0 obj
+<<
+/PageMode /UseNone /Pages 11 0 R /Type /Catalog
+>>
+endobj
+10 0 obj
+<<
+/Author (\(anonymous\)) /CreationDate (D:20260223165429+01'00') /Creator
(\(unspecified\)) /Keywords () /ModDate (D:20260223165429+01'00') /Producer
(ReportLab PDF Library - \(opensource\))
+ /Subject (\(unspecified\)) /Title (\(anonymous\)) /Trapped /False
+>>
+endobj
+11 0 obj
+<<
+/Count 5 /Kids [ 4 0 R 5 0 R 6 0 R 7 0 R 8 0 R ] /Type /Pages
+>>
+endobj
+12 0 obj
+<<
+/Filter [ /ASCII85Decode /FlateDecode ] /Length 389
+>>
+stream
+Gas2Dbtc/1&;9LtME)FT-'#!'E<B/O&dONh![cbLMeY?H:Bb[_-7oCI5aJ.pGWbS7'M:B#qDK,!X)[g*TISGCe7--WrJYl)C^YB)?E^S<Xi)bo3GG0;jI&%U<V"4'9SKJdA#a8PRB9bh0EdDh-Q0Oh'75KNSO@\l#.J6Dhbg"UG44f0W$`C,7sN6okke+W?KM+H=\LrUC:X8/Ek/*4ALC*XVlY6*0YYYp>f%'M:<Ffg"1q2n6g?rZ_o<e^I1,o)Mgt+O*;6s/j*)3iFg)RTmUS@2g1G!ifVCH8$pLT:(R&B+a!@+40"*srI;5LRGJ)2\Q$*B%1FBE3hJr*bM&le'2n_K>8EO$n:0^8E?s!d2/LN=#U$5]H;[sN)-U/Kf(GTj&K`~>endstream
+endobj
+13 0 obj
+<<
+/Filter [ /ASCII85Decode /FlateDecode ] /Length 1597
+>>
+stream
+Gat=*?#SIe&:F5UfJf^^XIA^hJYI*h#=ofCG,bUl4WR>X7H>J;+&^d!hq=bq9!6Eo^dqogG<<FC1V9"$.=SSp\#bC7T$n2l1Wj;d?W)&24fA*gHC,ef*+caG=4_PZnR6f,:#Fak#hQ@G'(/eV18EMO8"%U]`eXFXe3Q+;DWt#D036f.k[QH[W0@U_o\\'o/'YhAZtK:V+C=%<&79oM^E1SYXNMj#8C^tC4L/\090a)01.I6j?X-lsJR"_R.,ZXVS$gf';74<'/I#obJkEZ';Nt.=:i&QfTl;om$JH8!(>T^$_2/W(/=in^aX`R-\7"j=;c;k>EmlZ&%p]X/.qrL!.KiaN4.A`s>:Bc+Ok0;0.5rDIQ?LI<.fiJ"[C($!.+5o1)9dc>RF3J3K@EZ;TV\*i/dk*.>c`J!D,`Mac_7s\$YS)6H1DPD_UDd"9LZ^Qj]Y@Sn6a+CTu5p^KWOL"g];Ah*hTA&^hL6.\?:h:5uFeO9B
[...]
+endobj
+14 0 obj
+<<
+/Filter [ /ASCII85Decode /FlateDecode ] /Length 1997
+>>
+stream
+Gat=+9lo&I&A@sBm&a#Q<[C18SD9VkR7ScCdo`%&+W#rf7+<?p!2*orfCpj^Js&@NR>g_\;ZM"2p:[rY3R.%LO8ftQdG$PWnmP;b"g,6j?VZrpr^?5,BA_%th9&f#oP2[0nSAu5\_S.3c1=NZ>N:qO/S9CQ@_l]I^T0a9UnbmJqH\D#G*9Sl2CPnY60j#6rA06)$U6#4<qqSL&"bJX`Y*OL-SW;of=6\E/>dK7B2+I7>eCeXQ8-*qA)T)E`WKMC8,t2IFWR`40q6HhZdn=UXnQpd9gok?'8*k5PiTNVQ'%Y)-2SEf[j4C>;;D6u9M)($Z3[%$TO?epd59-,:5OLlPqY\mrXL^o)<5'U=,Rn[7C@jg-VM!=9]XQ8l)1nkPc$tRjC8H;\#h)JNC8H^=:.XeS-TIk^""]an?fBpPK7LN`W+#ibOm-5o5JA^2(-/Jl=;U^0<cKdkI;7Ipg=aNNKt75Q`M]gcI"\&n3PW)X'HGQG^
[...]
+endobj
+15 0 obj
+<<
+/Filter [ /ASCII85Decode /FlateDecode ] /Length 1556
+>>
+stream
+Gat=*bAu>q']&X:maC>JWZH>XA='mq9J]F))/H*PA-7rq'KRAP!2(&!lZ4[DOcipiiGi>M0Q8,sSGS]&$t&Vu]r6d:>LTd*b\=eaop4`!di2ITd/Eh+qpi?kq0#2FkAhGQZZP?1.m0<qQLM>;Pm&aO@V)eU)qT9qeO9,<Hr$@o"B@1Y]s;)4I'',dp(a_0U%L(um0gW\4MJX]G^Og`ZPna3H&?*\S$H1n30r%hAH51gkJs+MY/];!Z6,a*nt"rkf/ok@o+QI8e>NWe//?IufuH@X^U<a;->2il5sit]Z7bRt:MIMkJaa1Ofg`(PkoR'ED'N+GJOlJBX1WV,%X?&olKo?OD5;+ZOm>8L3'Sup<91U[/;B@J0Vo<C+W"m4eV-^:NX'/h?U$[raHA_Nrq1P'[;'BUfp#o9-58PEeQFX@K$AP7ra`#&d!4/ebu+:H#=M*(6H"*09>ZDH$>74tPdKQ3-)>Zd$^+%"_,8*V_Vc'BBj
[...]
+endobj
+16 0 obj
+<<
+/Filter [ /ASCII85Decode /FlateDecode ] /Length 1947
+>>
+stream
+Gat=+9lo&I&A@sBlnR`QXshJR-b%SsVjYP[2X9@d$V7EGe0)O,TG$qLpFX/kagKfiN5m!-!qtrnICh%DM4Z!jXW!p@2Y?UE"ViQACoLNDY%dKihJY$\?53@[f=1C`OMc=O9[EY)!nW=W'(.Z^ZD,lIXK5H3j2Kh\[?3VShpA92Ltp@jZ>.DhgITK<FRQ;kc>1%=8XmJ/o98tJ"$^G4Dt".lO.WRnk9@JU+tB`o?FMAXojpF][:e3lG0]4BmC_uDC`QMKq%c!Jc(=0h=HBJY<^l(e.BooV.B6X",e#(#eXdZh!JR31n4hi`fin1r<I8:/)qm>KbugG=)7S@s&'b\_^JJ4<,9Bb;q:5]ZE#n\U6Y3`2Y0pA`!OZ5m@XE()2=B3FD>$#PFnV*/d<@kuQrMgd-c%6a,@P]T>4sF\B*1S'9ou#R);*tEaKUuC<sO_X[3a/A:t,r8nMIE"gZ&hU*:b^$Pe;*\RZq\nPs^=*<2!0til
[...]
+endobj
+xref
+0 17
+0000000000 65535 f
+0000000061 00000 n
+0000000102 00000 n
+0000000209 00000 n
+0000000321 00000 n
+0000000516 00000 n
+0000000711 00000 n
+0000000906 00000 n
+0000001101 00000 n
+0000001296 00000 n
+0000001365 00000 n
+0000001646 00000 n
+0000001730 00000 n
+0000002210 00000 n
+0000003899 00000 n
+0000005988 00000 n
+0000007636 00000 n
+trailer
+<<
+/ID
+[<ee92b1129eb0e4edbaa13db24fe8b39f><ee92b1129eb0e4edbaa13db24fe8b39f>]
+% ReportLab generated PDF document -- digest (opensource)
+
+/Info 10 0 R
+/Root 9 0 R
+/Size 17
+>>
+startxref
+9675
+%%EOF
diff --git
a/dsl/camel-componentdsl/src/generated/java/org/apache/camel/builder/component/dsl/DoclingComponentBuilderFactory.java
b/dsl/camel-componentdsl/src/generated/java/org/apache/camel/builder/component/dsl/DoclingComponentBuilderFactory.java
index 780a2a8bf4a0..0f465090b60f 100644
---
a/dsl/camel-componentdsl/src/generated/java/org/apache/camel/builder/component/dsl/DoclingComponentBuilderFactory.java
+++
b/dsl/camel-componentdsl/src/generated/java/org/apache/camel/builder/component/dsl/DoclingComponentBuilderFactory.java
@@ -730,6 +730,88 @@ public interface DoclingComponentBuilderFactory {
}
+ /**
+ * Include raw text in chunk output.
+ *
+ * The option is a: <code>java.lang.Boolean</code> type.
+ *
+ * Default: false
+ * Group: chunking
+ *
+ * @param chunkingIncludeRawText the value to set
+ * @return the dsl builder
+ */
+ default DoclingComponentBuilder
chunkingIncludeRawText(java.lang.Boolean chunkingIncludeRawText) {
+ doSetProperty("chunkingIncludeRawText", chunkingIncludeRawText);
+ return this;
+ }
+
+ /**
+ * Maximum number of tokens per chunk for hybrid chunking.
+ *
+ * The option is a: <code>java.lang.Integer</code> type.
+ *
+ * Group: chunking
+ *
+ * @param chunkingMaxTokens the value to set
+ * @return the dsl builder
+ */
+ default DoclingComponentBuilder chunkingMaxTokens(java.lang.Integer
chunkingMaxTokens) {
+ doSetProperty("chunkingMaxTokens", chunkingMaxTokens);
+ return this;
+ }
+
+
+ /**
+ * Whether to merge peer chunks in hybrid chunking.
+ *
+ * The option is a: <code>java.lang.Boolean</code> type.
+ *
+ * Default: true
+ * Group: chunking
+ *
+ * @param chunkingMergePeers the value to set
+ * @return the dsl builder
+ */
+ default DoclingComponentBuilder chunkingMergePeers(java.lang.Boolean
chunkingMergePeers) {
+ doSetProperty("chunkingMergePeers", chunkingMergePeers);
+ return this;
+ }
+
+ /**
+ * Tokenizer model for hybrid chunking (e.g.
+ * sentence-transformers/all-MiniLM-L6-v2).
+ *
+ * The option is a: <code>java.lang.String</code> type.
+ *
+ * Group: chunking
+ *
+ * @param chunkingTokenizer the value to set
+ * @return the dsl builder
+ */
+ default DoclingComponentBuilder chunkingTokenizer(java.lang.String
chunkingTokenizer) {
+ doSetProperty("chunkingTokenizer", chunkingTokenizer);
+ return this;
+ }
+
+
+ /**
+ * Use markdown format for tables in chunk output.
+ *
+ * The option is a: <code>java.lang.Boolean</code> type.
+ *
+ * Default: false
+ * Group: chunking
+ *
+ * @param chunkingUseMarkdownTables the value to set
+ * @return the dsl builder
+ */
+ default DoclingComponentBuilder
chunkingUseMarkdownTables(java.lang.Boolean chunkingUseMarkdownTables) {
+ doSetProperty("chunkingUseMarkdownTables",
chunkingUseMarkdownTables);
+ return this;
+ }
+
+
/**
* Include metadata in message headers when extracting metadata.
*
@@ -890,6 +972,11 @@ public interface DoclingComponentBuilderFactory {
case "batchSize": getOrCreateConfiguration((DoclingComponent)
component).setBatchSize((int) value); return true;
case "batchTimeout": getOrCreateConfiguration((DoclingComponent)
component).setBatchTimeout((long) value); return true;
case "splitBatchResults":
getOrCreateConfiguration((DoclingComponent)
component).setSplitBatchResults((boolean) value); return true;
+ case "chunkingIncludeRawText":
getOrCreateConfiguration((DoclingComponent)
component).setChunkingIncludeRawText((java.lang.Boolean) value); return true;
+ case "chunkingMaxTokens":
getOrCreateConfiguration((DoclingComponent)
component).setChunkingMaxTokens((java.lang.Integer) value); return true;
+ case "chunkingMergePeers":
getOrCreateConfiguration((DoclingComponent)
component).setChunkingMergePeers((java.lang.Boolean) value); return true;
+ case "chunkingTokenizer":
getOrCreateConfiguration((DoclingComponent)
component).setChunkingTokenizer((java.lang.String) value); return true;
+ case "chunkingUseMarkdownTables":
getOrCreateConfiguration((DoclingComponent)
component).setChunkingUseMarkdownTables((java.lang.Boolean) value); return true;
case "includeMetadataInHeaders":
getOrCreateConfiguration((DoclingComponent)
component).setIncludeMetadataInHeaders((boolean) value); return true;
case "includeRawMetadata":
getOrCreateConfiguration((DoclingComponent)
component).setIncludeRawMetadata((boolean) value); return true;
case "apiKeyHeader": getOrCreateConfiguration((DoclingComponent)
component).setApiKeyHeader((java.lang.String) value); return true;
diff --git
a/dsl/camel-endpointdsl/src/generated/java/org/apache/camel/builder/endpoint/dsl/DoclingEndpointBuilderFactory.java
b/dsl/camel-endpointdsl/src/generated/java/org/apache/camel/builder/endpoint/dsl/DoclingEndpointBuilderFactory.java
index 24f6fcd98b13..287d40952b66 100644
---
a/dsl/camel-endpointdsl/src/generated/java/org/apache/camel/builder/endpoint/dsl/DoclingEndpointBuilderFactory.java
+++
b/dsl/camel-endpointdsl/src/generated/java/org/apache/camel/builder/endpoint/dsl/DoclingEndpointBuilderFactory.java
@@ -403,6 +403,143 @@ public interface DoclingEndpointBuilderFactory {
doSetProperty("splitBatchResults", splitBatchResults);
return this;
}
+ /**
+ * Include raw text in chunk output.
+ *
+ * The option is a: <code>java.lang.Boolean</code> type.
+ *
+ * Default: false
+ * Group: chunking
+ *
+ * @param chunkingIncludeRawText the value to set
+ * @return the dsl builder
+ */
+ default DoclingEndpointBuilder chunkingIncludeRawText(Boolean
chunkingIncludeRawText) {
+ doSetProperty("chunkingIncludeRawText", chunkingIncludeRawText);
+ return this;
+ }
+ /**
+ * Include raw text in chunk output.
+ *
+ * The option will be converted to a <code>java.lang.Boolean</code>
+ * type.
+ *
+ * Default: false
+ * Group: chunking
+ *
+ * @param chunkingIncludeRawText the value to set
+ * @return the dsl builder
+ */
+ default DoclingEndpointBuilder chunkingIncludeRawText(String
chunkingIncludeRawText) {
+ doSetProperty("chunkingIncludeRawText", chunkingIncludeRawText);
+ return this;
+ }
+ /**
+ * Maximum number of tokens per chunk for hybrid chunking.
+ *
+ * The option is a: <code>java.lang.Integer</code> type.
+ *
+ * Group: chunking
+ *
+ * @param chunkingMaxTokens the value to set
+ * @return the dsl builder
+ */
+ default DoclingEndpointBuilder chunkingMaxTokens(Integer
chunkingMaxTokens) {
+ doSetProperty("chunkingMaxTokens", chunkingMaxTokens);
+ return this;
+ }
+ /**
+ * Maximum number of tokens per chunk for hybrid chunking.
+ *
+ * The option will be converted to a <code>java.lang.Integer</code>
+ * type.
+ *
+ * Group: chunking
+ *
+ * @param chunkingMaxTokens the value to set
+ * @return the dsl builder
+ */
+ default DoclingEndpointBuilder chunkingMaxTokens(String
chunkingMaxTokens) {
+ doSetProperty("chunkingMaxTokens", chunkingMaxTokens);
+ return this;
+ }
+ /**
+ * Whether to merge peer chunks in hybrid chunking.
+ *
+ * The option is a: <code>java.lang.Boolean</code> type.
+ *
+ * Default: true
+ * Group: chunking
+ *
+ * @param chunkingMergePeers the value to set
+ * @return the dsl builder
+ */
+ default DoclingEndpointBuilder chunkingMergePeers(Boolean
chunkingMergePeers) {
+ doSetProperty("chunkingMergePeers", chunkingMergePeers);
+ return this;
+ }
+ /**
+ * Whether to merge peer chunks in hybrid chunking.
+ *
+ * The option will be converted to a <code>java.lang.Boolean</code>
+ * type.
+ *
+ * Default: true
+ * Group: chunking
+ *
+ * @param chunkingMergePeers the value to set
+ * @return the dsl builder
+ */
+ default DoclingEndpointBuilder chunkingMergePeers(String
chunkingMergePeers) {
+ doSetProperty("chunkingMergePeers", chunkingMergePeers);
+ return this;
+ }
+ /**
+ * Tokenizer model for hybrid chunking (e.g.
+ * sentence-transformers/all-MiniLM-L6-v2).
+ *
+ * The option is a: <code>java.lang.String</code> type.
+ *
+ * Group: chunking
+ *
+ * @param chunkingTokenizer the value to set
+ * @return the dsl builder
+ */
+ default DoclingEndpointBuilder chunkingTokenizer(String
chunkingTokenizer) {
+ doSetProperty("chunkingTokenizer", chunkingTokenizer);
+ return this;
+ }
+ /**
+ * Use markdown format for tables in chunk output.
+ *
+ * The option is a: <code>java.lang.Boolean</code> type.
+ *
+ * Default: false
+ * Group: chunking
+ *
+ * @param chunkingUseMarkdownTables the value to set
+ * @return the dsl builder
+ */
+ default DoclingEndpointBuilder chunkingUseMarkdownTables(Boolean
chunkingUseMarkdownTables) {
+ doSetProperty("chunkingUseMarkdownTables",
chunkingUseMarkdownTables);
+ return this;
+ }
+ /**
+ * Use markdown format for tables in chunk output.
+ *
+ * The option will be converted to a <code>java.lang.Boolean</code>
+ * type.
+ *
+ * Default: false
+ * Group: chunking
+ *
+ * @param chunkingUseMarkdownTables the value to set
+ * @return the dsl builder
+ */
+ default DoclingEndpointBuilder chunkingUseMarkdownTables(String
chunkingUseMarkdownTables) {
+ doSetProperty("chunkingUseMarkdownTables",
chunkingUseMarkdownTables);
+ return this;
+ }
/**
* Include metadata in message headers when extracting metadata.
*
@@ -1621,6 +1758,43 @@ public interface DoclingEndpointBuilderFactory {
public String doclingMetadataRaw() {
return "CamelDoclingMetadataRaw";
}
+ /**
+ * Tokenizer for hybrid chunking (e.g.
+ * sentence-transformers/all-MiniLM-L6-v2).
+ *
+ * The option is a: {@code String} type.
+ *
+ * Group: producer
+ *
+ * @return the name of the header {@code DoclingChunkingTokenizer}.
+ */
+ public String doclingChunkingTokenizer() {
+ return "CamelDoclingChunkingTokenizer";
+ }
+ /**
+ * Maximum tokens per chunk for hybrid chunking.
+ *
+ * The option is a: {@code Integer} type.
+ *
+ * Group: producer
+ *
+ * @return the name of the header {@code DoclingChunkingMaxTokens}.
+ */
+ public String doclingChunkingMaxTokens() {
+ return "CamelDoclingChunkingMaxTokens";
+ }
+ /**
+ * Whether to merge peer chunks in hybrid chunking.
+ *
+ * The option is a: {@code Boolean} type.
+ *
+ * Group: producer
+ *
+ * @return the name of the header {@code DoclingChunkingMergePeers}.
+ */
+ public String doclingChunkingMergePeers() {
+ return "CamelDoclingChunkingMergePeers";
+ }
}
static DoclingEndpointBuilder endpointBuilder(String componentName, String
path) {
class DoclingEndpointBuilderImpl extends AbstractEndpointBuilder
implements DoclingEndpointBuilder, AdvancedDoclingEndpointBuilder {