This is an automated email from the ASF dual-hosted git repository. acosentino pushed a commit to branch docling-serve in repository https://gitbox.apache.org/repos/asf/camel.git
commit 0fbdfc67124323a751230ad184069245acba2059 Author: Andrea Cosentino <[email protected]> AuthorDate: Tue Oct 7 11:07:53 2025 +0200 CAMEL-22503 - Camel-Docling: Support Docling-serve and being able to invoke the service as API Signed-off-by: Andrea Cosentino <[email protected]> --- .../apache/camel/catalog/components/docling.json | 46 +++-- components/camel-ai/camel-docling/pom.xml | 14 ++ .../docling/DoclingComponentConfigurer.java | 12 ++ .../docling/DoclingConfigurationConfigurer.java | 12 ++ .../docling/DoclingEndpointConfigurer.java | 12 ++ .../docling/DoclingEndpointUriFactory.java | 4 +- .../apache/camel/component/docling/docling.json | 46 +++-- .../src/main/docs/docling-component.adoc | 143 ++++++++++++- .../component/docling/DoclingConfiguration.java | 24 +++ .../camel/component/docling/DoclingProducer.java | 79 +++++++- .../component/docling/DoclingServeClient.java | 224 +++++++++++++++++++++ .../component/docling/DoclingServeProducerIT.java | 179 ++++++++++++++++ .../docling/DoclingServeProducerTest.java | 120 +++++++++++ .../dsl/DoclingComponentBuilderFactory.java | 36 ++++ .../dsl/DoclingEndpointBuilderFactory.java | 45 +++++ .../camel-test-infra-docling}/pom.xml | 48 ++--- .../infra/docling/common/DoclingProperties.java | 27 +++ .../docling/services/DoclingInfraService.java | 27 +++ .../DoclingLocalContainerInfraService.java | 110 ++++++++++ .../services/DoclingRemoteInfraService.java | 60 ++++++ .../infra/docling/services/container.properties | 18 ++ .../infra/docling/DoclingInfraServiceTest.java | 46 +++++ .../infra/docling/services/DoclingService.java | 26 +++ .../docling/services/DoclingServiceFactory.java | 39 ++++ 24 files changed, 1309 insertions(+), 88 deletions(-) diff --git a/catalog/camel-catalog/src/generated/resources/org/apache/camel/catalog/components/docling.json b/catalog/camel-catalog/src/generated/resources/org/apache/camel/catalog/components/docling.json index 748e2cb66f7f..fb4dfdeec45d 100644 --- a/catalog/camel-catalog/src/generated/resources/org/apache/camel/catalog/components/docling.json +++ b/catalog/camel-catalog/src/generated/resources/org/apache/camel/catalog/components/docling.json @@ -26,17 +26,19 @@ "componentProperties": { "configuration": { "index": 0, "kind": "property", "displayName": "Configuration", "group": "producer", "label": "", "required": false, "type": "object", "javaType": "org.apache.camel.component.docling.DoclingConfiguration", "deprecated": false, "autowired": false, "secret": false, "description": "The configuration for the Docling Endpoint" }, "contentInBody": { "index": 1, "kind": "property", "displayName": "Content In Body", "group": "producer", "label": "", "required": false, "type": "boolean", "javaType": "boolean", "deprecated": false, "deprecationNote": "", "autowired": false, "secret": false, "defaultValue": false, "configurationClass": "org.apache.camel.component.docling.DoclingConfiguration", "configurationField": "configuration", "description": "Include the content of the output file in the exchange body and dele [...] - "enableOCR": { "index": 2, "kind": "property", "displayName": "Enable OCR", "group": "producer", "label": "", "required": false, "type": "boolean", "javaType": "boolean", "deprecated": false, "deprecationNote": "", "autowired": false, "secret": false, "defaultValue": true, "configurationClass": "org.apache.camel.component.docling.DoclingConfiguration", "configurationField": "configuration", "description": "Enable OCR processing for scanned documents" }, - "includeLayoutInfo": { "index": 3, "kind": "property", "displayName": "Include Layout Info", "group": "producer", "label": "", "required": false, "type": "boolean", "javaType": "boolean", "deprecated": false, "deprecationNote": "", "autowired": false, "secret": false, "defaultValue": false, "configurationClass": "org.apache.camel.component.docling.DoclingConfiguration", "configurationField": "configuration", "description": "Show layout information with bounding boxes" }, - "lazyStartProducer": { "index": 4, "kind": "property", "displayName": "Lazy Start Producer", "group": "producer", "label": "producer", "required": false, "type": "boolean", "javaType": "boolean", "deprecated": false, "autowired": false, "secret": false, "defaultValue": false, "description": "Whether the producer should be started lazy (on the first message). By starting lazy you can use this to allow CamelContext and routes to startup in situations where a producer may otherwise fail [...] - "ocrLanguage": { "index": 5, "kind": "property", "displayName": "Ocr Language", "group": "producer", "label": "", "required": false, "type": "string", "javaType": "java.lang.String", "deprecated": false, "deprecationNote": "", "autowired": false, "secret": false, "defaultValue": "en", "configurationClass": "org.apache.camel.component.docling.DoclingConfiguration", "configurationField": "configuration", "description": "Language code for OCR processing" }, - "operation": { "index": 6, "kind": "property", "displayName": "Operation", "group": "producer", "label": "", "required": true, "type": "enum", "javaType": "org.apache.camel.component.docling.DoclingOperations", "enum": [ "CONVERT_TO_MARKDOWN", "CONVERT_TO_HTML", "CONVERT_TO_JSON", "EXTRACT_TEXT", "EXTRACT_STRUCTURED_DATA" ], "deprecated": false, "deprecationNote": "", "autowired": false, "secret": false, "defaultValue": "CONVERT_TO_MARKDOWN", "configurationClass": "org.apache.camel.c [...] - "outputFormat": { "index": 7, "kind": "property", "displayName": "Output Format", "group": "producer", "label": "", "required": false, "type": "string", "javaType": "java.lang.String", "deprecated": false, "deprecationNote": "", "autowired": false, "secret": false, "defaultValue": "markdown", "configurationClass": "org.apache.camel.component.docling.DoclingConfiguration", "configurationField": "configuration", "description": "Output format for document conversion" }, - "autowiredEnabled": { "index": 8, "kind": "property", "displayName": "Autowired Enabled", "group": "advanced", "label": "advanced", "required": false, "type": "boolean", "javaType": "boolean", "deprecated": false, "autowired": false, "secret": false, "defaultValue": true, "description": "Whether autowiring is enabled. This is used for automatic autowiring options (the option must be marked as autowired) by looking up in the registry to find if there is a single instance of matching t [...] - "doclingCommand": { "index": 9, "kind": "property", "displayName": "Docling Command", "group": "advanced", "label": "advanced", "required": false, "type": "string", "javaType": "java.lang.String", "deprecated": false, "deprecationNote": "", "autowired": false, "secret": false, "configurationClass": "org.apache.camel.component.docling.DoclingConfiguration", "configurationField": "configuration", "description": "Path to Docling Python executable or command" }, - "processTimeout": { "index": 10, "kind": "property", "displayName": "Process Timeout", "group": "advanced", "label": "advanced", "required": false, "type": "integer", "javaType": "long", "deprecated": false, "deprecationNote": "", "autowired": false, "secret": false, "defaultValue": 30000, "configurationClass": "org.apache.camel.component.docling.DoclingConfiguration", "configurationField": "configuration", "description": "Timeout for Docling process execution in milliseconds" }, - "workingDirectory": { "index": 11, "kind": "property", "displayName": "Working Directory", "group": "advanced", "label": "advanced", "required": false, "type": "string", "javaType": "java.lang.String", "deprecated": false, "deprecationNote": "", "autowired": false, "secret": false, "configurationClass": "org.apache.camel.component.docling.DoclingConfiguration", "configurationField": "configuration", "description": "Working directory for Docling execution" }, - "maxFileSize": { "index": 12, "kind": "property", "displayName": "Max File Size", "group": "security", "label": "security", "required": false, "type": "integer", "javaType": "long", "deprecated": false, "deprecationNote": "", "autowired": false, "secret": false, "defaultValue": 52428800, "configurationClass": "org.apache.camel.component.docling.DoclingConfiguration", "configurationField": "configuration", "description": "Maximum file size in bytes for processing" } + "doclingServeUrl": { "index": 2, "kind": "property", "displayName": "Docling Serve Url", "group": "producer", "label": "", "required": false, "type": "string", "javaType": "java.lang.String", "deprecated": false, "deprecationNote": "", "autowired": false, "secret": false, "defaultValue": "http:\/\/localhost:5001", "configurationClass": "org.apache.camel.component.docling.DoclingConfiguration", "configurationField": "configuration", "description": "Docling-serve API URL (e.g., http:\/ [...] + "enableOCR": { "index": 3, "kind": "property", "displayName": "Enable OCR", "group": "producer", "label": "", "required": false, "type": "boolean", "javaType": "boolean", "deprecated": false, "deprecationNote": "", "autowired": false, "secret": false, "defaultValue": true, "configurationClass": "org.apache.camel.component.docling.DoclingConfiguration", "configurationField": "configuration", "description": "Enable OCR processing for scanned documents" }, + "includeLayoutInfo": { "index": 4, "kind": "property", "displayName": "Include Layout Info", "group": "producer", "label": "", "required": false, "type": "boolean", "javaType": "boolean", "deprecated": false, "deprecationNote": "", "autowired": false, "secret": false, "defaultValue": false, "configurationClass": "org.apache.camel.component.docling.DoclingConfiguration", "configurationField": "configuration", "description": "Show layout information with bounding boxes" }, + "lazyStartProducer": { "index": 5, "kind": "property", "displayName": "Lazy Start Producer", "group": "producer", "label": "producer", "required": false, "type": "boolean", "javaType": "boolean", "deprecated": false, "autowired": false, "secret": false, "defaultValue": false, "description": "Whether the producer should be started lazy (on the first message). By starting lazy you can use this to allow CamelContext and routes to startup in situations where a producer may otherwise fail [...] + "ocrLanguage": { "index": 6, "kind": "property", "displayName": "Ocr Language", "group": "producer", "label": "", "required": false, "type": "string", "javaType": "java.lang.String", "deprecated": false, "deprecationNote": "", "autowired": false, "secret": false, "defaultValue": "en", "configurationClass": "org.apache.camel.component.docling.DoclingConfiguration", "configurationField": "configuration", "description": "Language code for OCR processing" }, + "operation": { "index": 7, "kind": "property", "displayName": "Operation", "group": "producer", "label": "", "required": true, "type": "enum", "javaType": "org.apache.camel.component.docling.DoclingOperations", "enum": [ "CONVERT_TO_MARKDOWN", "CONVERT_TO_HTML", "CONVERT_TO_JSON", "EXTRACT_TEXT", "EXTRACT_STRUCTURED_DATA" ], "deprecated": false, "deprecationNote": "", "autowired": false, "secret": false, "defaultValue": "CONVERT_TO_MARKDOWN", "configurationClass": "org.apache.camel.c [...] + "outputFormat": { "index": 8, "kind": "property", "displayName": "Output Format", "group": "producer", "label": "", "required": false, "type": "string", "javaType": "java.lang.String", "deprecated": false, "deprecationNote": "", "autowired": false, "secret": false, "defaultValue": "markdown", "configurationClass": "org.apache.camel.component.docling.DoclingConfiguration", "configurationField": "configuration", "description": "Output format for document conversion" }, + "useDoclingServe": { "index": 9, "kind": "property", "displayName": "Use Docling Serve", "group": "producer", "label": "", "required": false, "type": "boolean", "javaType": "boolean", "deprecated": false, "deprecationNote": "", "autowired": false, "secret": false, "defaultValue": false, "configurationClass": "org.apache.camel.component.docling.DoclingConfiguration", "configurationField": "configuration", "description": "Use docling-serve API instead of CLI command" }, + "autowiredEnabled": { "index": 10, "kind": "property", "displayName": "Autowired Enabled", "group": "advanced", "label": "advanced", "required": false, "type": "boolean", "javaType": "boolean", "deprecated": false, "autowired": false, "secret": false, "defaultValue": true, "description": "Whether autowiring is enabled. This is used for automatic autowiring options (the option must be marked as autowired) by looking up in the registry to find if there is a single instance of matching [...] + "doclingCommand": { "index": 11, "kind": "property", "displayName": "Docling Command", "group": "advanced", "label": "advanced", "required": false, "type": "string", "javaType": "java.lang.String", "deprecated": false, "deprecationNote": "", "autowired": false, "secret": false, "configurationClass": "org.apache.camel.component.docling.DoclingConfiguration", "configurationField": "configuration", "description": "Path to Docling Python executable or command" }, + "processTimeout": { "index": 12, "kind": "property", "displayName": "Process Timeout", "group": "advanced", "label": "advanced", "required": false, "type": "integer", "javaType": "long", "deprecated": false, "deprecationNote": "", "autowired": false, "secret": false, "defaultValue": 30000, "configurationClass": "org.apache.camel.component.docling.DoclingConfiguration", "configurationField": "configuration", "description": "Timeout for Docling process execution in milliseconds" }, + "workingDirectory": { "index": 13, "kind": "property", "displayName": "Working Directory", "group": "advanced", "label": "advanced", "required": false, "type": "string", "javaType": "java.lang.String", "deprecated": false, "deprecationNote": "", "autowired": false, "secret": false, "configurationClass": "org.apache.camel.component.docling.DoclingConfiguration", "configurationField": "configuration", "description": "Working directory for Docling execution" }, + "maxFileSize": { "index": 14, "kind": "property", "displayName": "Max File Size", "group": "security", "label": "security", "required": false, "type": "integer", "javaType": "long", "deprecated": false, "deprecationNote": "", "autowired": false, "secret": false, "defaultValue": 52428800, "configurationClass": "org.apache.camel.component.docling.DoclingConfiguration", "configurationField": "configuration", "description": "Maximum file size in bytes for processing" } }, "headers": { "CamelDoclingOperation": { "index": 0, "kind": "header", "displayName": "", "group": "producer", "label": "", "required": false, "javaType": "DoclingOperations", "deprecated": false, "deprecationNote": "", "autowired": false, "secret": false, "description": "The operation to perform", "constantName": "org.apache.camel.component.docling.DoclingHeaders#OPERATION" }, @@ -51,15 +53,17 @@ "properties": { "operationId": { "index": 0, "kind": "path", "displayName": "Operation Id", "group": "producer", "label": "", "required": true, "type": "string", "javaType": "java.lang.String", "deprecated": false, "deprecationNote": "", "autowired": false, "secret": false, "description": "The operation identifier" }, "contentInBody": { "index": 1, "kind": "parameter", "displayName": "Content In Body", "group": "producer", "label": "", "required": false, "type": "boolean", "javaType": "boolean", "deprecated": false, "deprecationNote": "", "autowired": false, "secret": false, "defaultValue": false, "configurationClass": "org.apache.camel.component.docling.DoclingConfiguration", "configurationField": "configuration", "description": "Include the content of the output file in the exchange body and del [...] - "enableOCR": { "index": 2, "kind": "parameter", "displayName": "Enable OCR", "group": "producer", "label": "", "required": false, "type": "boolean", "javaType": "boolean", "deprecated": false, "deprecationNote": "", "autowired": false, "secret": false, "defaultValue": true, "configurationClass": "org.apache.camel.component.docling.DoclingConfiguration", "configurationField": "configuration", "description": "Enable OCR processing for scanned documents" }, - "includeLayoutInfo": { "index": 3, "kind": "parameter", "displayName": "Include Layout Info", "group": "producer", "label": "", "required": false, "type": "boolean", "javaType": "boolean", "deprecated": false, "deprecationNote": "", "autowired": false, "secret": false, "defaultValue": false, "configurationClass": "org.apache.camel.component.docling.DoclingConfiguration", "configurationField": "configuration", "description": "Show layout information with bounding boxes" }, - "ocrLanguage": { "index": 4, "kind": "parameter", "displayName": "Ocr Language", "group": "producer", "label": "", "required": false, "type": "string", "javaType": "java.lang.String", "deprecated": false, "deprecationNote": "", "autowired": false, "secret": false, "defaultValue": "en", "configurationClass": "org.apache.camel.component.docling.DoclingConfiguration", "configurationField": "configuration", "description": "Language code for OCR processing" }, - "operation": { "index": 5, "kind": "parameter", "displayName": "Operation", "group": "producer", "label": "", "required": true, "type": "enum", "javaType": "org.apache.camel.component.docling.DoclingOperations", "enum": [ "CONVERT_TO_MARKDOWN", "CONVERT_TO_HTML", "CONVERT_TO_JSON", "EXTRACT_TEXT", "EXTRACT_STRUCTURED_DATA" ], "deprecated": false, "deprecationNote": "", "autowired": false, "secret": false, "defaultValue": "CONVERT_TO_MARKDOWN", "configurationClass": "org.apache.camel. [...] - "outputFormat": { "index": 6, "kind": "parameter", "displayName": "Output Format", "group": "producer", "label": "", "required": false, "type": "string", "javaType": "java.lang.String", "deprecated": false, "deprecationNote": "", "autowired": false, "secret": false, "defaultValue": "markdown", "configurationClass": "org.apache.camel.component.docling.DoclingConfiguration", "configurationField": "configuration", "description": "Output format for document conversion" }, - "lazyStartProducer": { "index": 7, "kind": "parameter", "displayName": "Lazy Start Producer", "group": "producer (advanced)", "label": "producer,advanced", "required": false, "type": "boolean", "javaType": "boolean", "deprecated": false, "autowired": false, "secret": false, "defaultValue": false, "description": "Whether the producer should be started lazy (on the first message). By starting lazy you can use this to allow CamelContext and routes to startup in situations where a produc [...] - "doclingCommand": { "index": 8, "kind": "parameter", "displayName": "Docling Command", "group": "advanced", "label": "advanced", "required": false, "type": "string", "javaType": "java.lang.String", "deprecated": false, "deprecationNote": "", "autowired": false, "secret": false, "configurationClass": "org.apache.camel.component.docling.DoclingConfiguration", "configurationField": "configuration", "description": "Path to Docling Python executable or command" }, - "processTimeout": { "index": 9, "kind": "parameter", "displayName": "Process Timeout", "group": "advanced", "label": "advanced", "required": false, "type": "integer", "javaType": "long", "deprecated": false, "deprecationNote": "", "autowired": false, "secret": false, "defaultValue": 30000, "configurationClass": "org.apache.camel.component.docling.DoclingConfiguration", "configurationField": "configuration", "description": "Timeout for Docling process execution in milliseconds" }, - "workingDirectory": { "index": 10, "kind": "parameter", "displayName": "Working Directory", "group": "advanced", "label": "advanced", "required": false, "type": "string", "javaType": "java.lang.String", "deprecated": false, "deprecationNote": "", "autowired": false, "secret": false, "configurationClass": "org.apache.camel.component.docling.DoclingConfiguration", "configurationField": "configuration", "description": "Working directory for Docling execution" }, - "maxFileSize": { "index": 11, "kind": "parameter", "displayName": "Max File Size", "group": "security", "label": "security", "required": false, "type": "integer", "javaType": "long", "deprecated": false, "deprecationNote": "", "autowired": false, "secret": false, "defaultValue": 52428800, "configurationClass": "org.apache.camel.component.docling.DoclingConfiguration", "configurationField": "configuration", "description": "Maximum file size in bytes for processing" } + "doclingServeUrl": { "index": 2, "kind": "parameter", "displayName": "Docling Serve Url", "group": "producer", "label": "", "required": false, "type": "string", "javaType": "java.lang.String", "deprecated": false, "deprecationNote": "", "autowired": false, "secret": false, "defaultValue": "http:\/\/localhost:5001", "configurationClass": "org.apache.camel.component.docling.DoclingConfiguration", "configurationField": "configuration", "description": "Docling-serve API URL (e.g., http:\ [...] + "enableOCR": { "index": 3, "kind": "parameter", "displayName": "Enable OCR", "group": "producer", "label": "", "required": false, "type": "boolean", "javaType": "boolean", "deprecated": false, "deprecationNote": "", "autowired": false, "secret": false, "defaultValue": true, "configurationClass": "org.apache.camel.component.docling.DoclingConfiguration", "configurationField": "configuration", "description": "Enable OCR processing for scanned documents" }, + "includeLayoutInfo": { "index": 4, "kind": "parameter", "displayName": "Include Layout Info", "group": "producer", "label": "", "required": false, "type": "boolean", "javaType": "boolean", "deprecated": false, "deprecationNote": "", "autowired": false, "secret": false, "defaultValue": false, "configurationClass": "org.apache.camel.component.docling.DoclingConfiguration", "configurationField": "configuration", "description": "Show layout information with bounding boxes" }, + "ocrLanguage": { "index": 5, "kind": "parameter", "displayName": "Ocr Language", "group": "producer", "label": "", "required": false, "type": "string", "javaType": "java.lang.String", "deprecated": false, "deprecationNote": "", "autowired": false, "secret": false, "defaultValue": "en", "configurationClass": "org.apache.camel.component.docling.DoclingConfiguration", "configurationField": "configuration", "description": "Language code for OCR processing" }, + "operation": { "index": 6, "kind": "parameter", "displayName": "Operation", "group": "producer", "label": "", "required": true, "type": "enum", "javaType": "org.apache.camel.component.docling.DoclingOperations", "enum": [ "CONVERT_TO_MARKDOWN", "CONVERT_TO_HTML", "CONVERT_TO_JSON", "EXTRACT_TEXT", "EXTRACT_STRUCTURED_DATA" ], "deprecated": false, "deprecationNote": "", "autowired": false, "secret": false, "defaultValue": "CONVERT_TO_MARKDOWN", "configurationClass": "org.apache.camel. [...] + "outputFormat": { "index": 7, "kind": "parameter", "displayName": "Output Format", "group": "producer", "label": "", "required": false, "type": "string", "javaType": "java.lang.String", "deprecated": false, "deprecationNote": "", "autowired": false, "secret": false, "defaultValue": "markdown", "configurationClass": "org.apache.camel.component.docling.DoclingConfiguration", "configurationField": "configuration", "description": "Output format for document conversion" }, + "useDoclingServe": { "index": 8, "kind": "parameter", "displayName": "Use Docling Serve", "group": "producer", "label": "", "required": false, "type": "boolean", "javaType": "boolean", "deprecated": false, "deprecationNote": "", "autowired": false, "secret": false, "defaultValue": false, "configurationClass": "org.apache.camel.component.docling.DoclingConfiguration", "configurationField": "configuration", "description": "Use docling-serve API instead of CLI command" }, + "lazyStartProducer": { "index": 9, "kind": "parameter", "displayName": "Lazy Start Producer", "group": "producer (advanced)", "label": "producer,advanced", "required": false, "type": "boolean", "javaType": "boolean", "deprecated": false, "autowired": false, "secret": false, "defaultValue": false, "description": "Whether the producer should be started lazy (on the first message). By starting lazy you can use this to allow CamelContext and routes to startup in situations where a produc [...] + "doclingCommand": { "index": 10, "kind": "parameter", "displayName": "Docling Command", "group": "advanced", "label": "advanced", "required": false, "type": "string", "javaType": "java.lang.String", "deprecated": false, "deprecationNote": "", "autowired": false, "secret": false, "configurationClass": "org.apache.camel.component.docling.DoclingConfiguration", "configurationField": "configuration", "description": "Path to Docling Python executable or command" }, + "processTimeout": { "index": 11, "kind": "parameter", "displayName": "Process Timeout", "group": "advanced", "label": "advanced", "required": false, "type": "integer", "javaType": "long", "deprecated": false, "deprecationNote": "", "autowired": false, "secret": false, "defaultValue": 30000, "configurationClass": "org.apache.camel.component.docling.DoclingConfiguration", "configurationField": "configuration", "description": "Timeout for Docling process execution in milliseconds" }, + "workingDirectory": { "index": 12, "kind": "parameter", "displayName": "Working Directory", "group": "advanced", "label": "advanced", "required": false, "type": "string", "javaType": "java.lang.String", "deprecated": false, "deprecationNote": "", "autowired": false, "secret": false, "configurationClass": "org.apache.camel.component.docling.DoclingConfiguration", "configurationField": "configuration", "description": "Working directory for Docling execution" }, + "maxFileSize": { "index": 13, "kind": "parameter", "displayName": "Max File Size", "group": "security", "label": "security", "required": false, "type": "integer", "javaType": "long", "deprecated": false, "deprecationNote": "", "autowired": false, "secret": false, "defaultValue": 52428800, "configurationClass": "org.apache.camel.component.docling.DoclingConfiguration", "configurationField": "configuration", "description": "Maximum file size in bytes for processing" } } } diff --git a/components/camel-ai/camel-docling/pom.xml b/components/camel-ai/camel-docling/pom.xml index 90207a0192f6..6b1c7ec1fe48 100644 --- a/components/camel-ai/camel-docling/pom.xml +++ b/components/camel-ai/camel-docling/pom.xml @@ -47,6 +47,13 @@ <version>${jackson2-version}</version> </dependency> + <!-- HTTP client for docling-serve API integration --> + <dependency> + <groupId>org.apache.httpcomponents.client5</groupId> + <artifactId>httpclient5</artifactId> + <version>${httpclient-version}</version> + </dependency> + <!-- for testing --> <dependency> <groupId>org.apache.camel</groupId> @@ -58,6 +65,13 @@ <artifactId>assertj-core</artifactId> <scope>test</scope> </dependency> + <dependency> + <groupId>org.apache.camel</groupId> + <artifactId>camel-test-infra-docling</artifactId> + <version>${project.version}</version> + <type>test-jar</type> + <scope>test</scope> + </dependency> </dependencies> diff --git a/components/camel-ai/camel-docling/src/generated/java/org/apache/camel/component/docling/DoclingComponentConfigurer.java b/components/camel-ai/camel-docling/src/generated/java/org/apache/camel/component/docling/DoclingComponentConfigurer.java index f00d50c51efd..840207c9d1c5 100644 --- a/components/camel-ai/camel-docling/src/generated/java/org/apache/camel/component/docling/DoclingComponentConfigurer.java +++ b/components/camel-ai/camel-docling/src/generated/java/org/apache/camel/component/docling/DoclingComponentConfigurer.java @@ -37,6 +37,8 @@ public class DoclingComponentConfigurer extends PropertyConfigurerSupport implem case "contentInBody": getOrCreateConfiguration(target).setContentInBody(property(camelContext, boolean.class, value)); return true; case "doclingcommand": case "doclingCommand": getOrCreateConfiguration(target).setDoclingCommand(property(camelContext, java.lang.String.class, value)); return true; + case "doclingserveurl": + case "doclingServeUrl": getOrCreateConfiguration(target).setDoclingServeUrl(property(camelContext, java.lang.String.class, value)); return true; case "enableocr": case "enableOCR": getOrCreateConfiguration(target).setEnableOCR(property(camelContext, boolean.class, value)); return true; case "includelayoutinfo": @@ -52,6 +54,8 @@ public class DoclingComponentConfigurer extends PropertyConfigurerSupport implem case "outputFormat": getOrCreateConfiguration(target).setOutputFormat(property(camelContext, java.lang.String.class, value)); return true; case "processtimeout": case "processTimeout": getOrCreateConfiguration(target).setProcessTimeout(property(camelContext, long.class, value)); return true; + case "usedoclingserve": + case "useDoclingServe": getOrCreateConfiguration(target).setUseDoclingServe(property(camelContext, boolean.class, value)); return true; case "workingdirectory": case "workingDirectory": getOrCreateConfiguration(target).setWorkingDirectory(property(camelContext, java.lang.String.class, value)); return true; default: return false; @@ -68,6 +72,8 @@ public class DoclingComponentConfigurer extends PropertyConfigurerSupport implem case "contentInBody": return boolean.class; case "doclingcommand": case "doclingCommand": return java.lang.String.class; + case "doclingserveurl": + case "doclingServeUrl": return java.lang.String.class; case "enableocr": case "enableOCR": return boolean.class; case "includelayoutinfo": @@ -83,6 +89,8 @@ public class DoclingComponentConfigurer extends PropertyConfigurerSupport implem case "outputFormat": return java.lang.String.class; case "processtimeout": case "processTimeout": return long.class; + case "usedoclingserve": + case "useDoclingServe": return boolean.class; case "workingdirectory": case "workingDirectory": return java.lang.String.class; default: return null; @@ -100,6 +108,8 @@ public class DoclingComponentConfigurer extends PropertyConfigurerSupport implem case "contentInBody": return getOrCreateConfiguration(target).isContentInBody(); case "doclingcommand": case "doclingCommand": return getOrCreateConfiguration(target).getDoclingCommand(); + case "doclingserveurl": + case "doclingServeUrl": return getOrCreateConfiguration(target).getDoclingServeUrl(); case "enableocr": case "enableOCR": return getOrCreateConfiguration(target).isEnableOCR(); case "includelayoutinfo": @@ -115,6 +125,8 @@ public class DoclingComponentConfigurer extends PropertyConfigurerSupport implem case "outputFormat": return getOrCreateConfiguration(target).getOutputFormat(); case "processtimeout": case "processTimeout": return getOrCreateConfiguration(target).getProcessTimeout(); + case "usedoclingserve": + case "useDoclingServe": return getOrCreateConfiguration(target).isUseDoclingServe(); case "workingdirectory": case "workingDirectory": return getOrCreateConfiguration(target).getWorkingDirectory(); default: return null; diff --git a/components/camel-ai/camel-docling/src/generated/java/org/apache/camel/component/docling/DoclingConfigurationConfigurer.java b/components/camel-ai/camel-docling/src/generated/java/org/apache/camel/component/docling/DoclingConfigurationConfigurer.java index a5d88d14ed4e..73a53e60adea 100644 --- a/components/camel-ai/camel-docling/src/generated/java/org/apache/camel/component/docling/DoclingConfigurationConfigurer.java +++ b/components/camel-ai/camel-docling/src/generated/java/org/apache/camel/component/docling/DoclingConfigurationConfigurer.java @@ -27,6 +27,8 @@ public class DoclingConfigurationConfigurer extends org.apache.camel.support.com case "contentInBody": target.setContentInBody(property(camelContext, boolean.class, value)); return true; case "doclingcommand": case "doclingCommand": target.setDoclingCommand(property(camelContext, java.lang.String.class, value)); return true; + case "doclingserveurl": + case "doclingServeUrl": target.setDoclingServeUrl(property(camelContext, java.lang.String.class, value)); return true; case "enableocr": case "enableOCR": target.setEnableOCR(property(camelContext, boolean.class, value)); return true; case "includelayoutinfo": @@ -40,6 +42,8 @@ public class DoclingConfigurationConfigurer extends org.apache.camel.support.com case "outputFormat": target.setOutputFormat(property(camelContext, java.lang.String.class, value)); return true; case "processtimeout": case "processTimeout": target.setProcessTimeout(property(camelContext, long.class, value)); return true; + case "usedoclingserve": + case "useDoclingServe": target.setUseDoclingServe(property(camelContext, boolean.class, value)); return true; case "workingdirectory": case "workingDirectory": target.setWorkingDirectory(property(camelContext, java.lang.String.class, value)); return true; default: return false; @@ -53,6 +57,8 @@ public class DoclingConfigurationConfigurer extends org.apache.camel.support.com case "contentInBody": return boolean.class; case "doclingcommand": case "doclingCommand": return java.lang.String.class; + case "doclingserveurl": + case "doclingServeUrl": return java.lang.String.class; case "enableocr": case "enableOCR": return boolean.class; case "includelayoutinfo": @@ -66,6 +72,8 @@ public class DoclingConfigurationConfigurer extends org.apache.camel.support.com case "outputFormat": return java.lang.String.class; case "processtimeout": case "processTimeout": return long.class; + case "usedoclingserve": + case "useDoclingServe": return boolean.class; case "workingdirectory": case "workingDirectory": return java.lang.String.class; default: return null; @@ -80,6 +88,8 @@ public class DoclingConfigurationConfigurer extends org.apache.camel.support.com case "contentInBody": return target.isContentInBody(); case "doclingcommand": case "doclingCommand": return target.getDoclingCommand(); + case "doclingserveurl": + case "doclingServeUrl": return target.getDoclingServeUrl(); case "enableocr": case "enableOCR": return target.isEnableOCR(); case "includelayoutinfo": @@ -93,6 +103,8 @@ public class DoclingConfigurationConfigurer extends org.apache.camel.support.com case "outputFormat": return target.getOutputFormat(); case "processtimeout": case "processTimeout": return target.getProcessTimeout(); + case "usedoclingserve": + case "useDoclingServe": return target.isUseDoclingServe(); case "workingdirectory": case "workingDirectory": return target.getWorkingDirectory(); default: return null; diff --git a/components/camel-ai/camel-docling/src/generated/java/org/apache/camel/component/docling/DoclingEndpointConfigurer.java b/components/camel-ai/camel-docling/src/generated/java/org/apache/camel/component/docling/DoclingEndpointConfigurer.java index 3a1fd6d13bec..8cd8bc60aff3 100644 --- a/components/camel-ai/camel-docling/src/generated/java/org/apache/camel/component/docling/DoclingEndpointConfigurer.java +++ b/components/camel-ai/camel-docling/src/generated/java/org/apache/camel/component/docling/DoclingEndpointConfigurer.java @@ -27,6 +27,8 @@ public class DoclingEndpointConfigurer extends PropertyConfigurerSupport impleme case "contentInBody": target.getConfiguration().setContentInBody(property(camelContext, boolean.class, value)); return true; case "doclingcommand": case "doclingCommand": target.getConfiguration().setDoclingCommand(property(camelContext, java.lang.String.class, value)); return true; + case "doclingserveurl": + case "doclingServeUrl": target.getConfiguration().setDoclingServeUrl(property(camelContext, java.lang.String.class, value)); return true; case "enableocr": case "enableOCR": target.getConfiguration().setEnableOCR(property(camelContext, boolean.class, value)); return true; case "includelayoutinfo": @@ -42,6 +44,8 @@ public class DoclingEndpointConfigurer extends PropertyConfigurerSupport impleme case "outputFormat": target.getConfiguration().setOutputFormat(property(camelContext, java.lang.String.class, value)); return true; case "processtimeout": case "processTimeout": target.getConfiguration().setProcessTimeout(property(camelContext, long.class, value)); return true; + case "usedoclingserve": + case "useDoclingServe": target.getConfiguration().setUseDoclingServe(property(camelContext, boolean.class, value)); return true; case "workingdirectory": case "workingDirectory": target.getConfiguration().setWorkingDirectory(property(camelContext, java.lang.String.class, value)); return true; default: return false; @@ -55,6 +59,8 @@ public class DoclingEndpointConfigurer extends PropertyConfigurerSupport impleme case "contentInBody": return boolean.class; case "doclingcommand": case "doclingCommand": return java.lang.String.class; + case "doclingserveurl": + case "doclingServeUrl": return java.lang.String.class; case "enableocr": case "enableOCR": return boolean.class; case "includelayoutinfo": @@ -70,6 +76,8 @@ public class DoclingEndpointConfigurer extends PropertyConfigurerSupport impleme case "outputFormat": return java.lang.String.class; case "processtimeout": case "processTimeout": return long.class; + case "usedoclingserve": + case "useDoclingServe": return boolean.class; case "workingdirectory": case "workingDirectory": return java.lang.String.class; default: return null; @@ -84,6 +92,8 @@ public class DoclingEndpointConfigurer extends PropertyConfigurerSupport impleme case "contentInBody": return target.getConfiguration().isContentInBody(); case "doclingcommand": case "doclingCommand": return target.getConfiguration().getDoclingCommand(); + case "doclingserveurl": + case "doclingServeUrl": return target.getConfiguration().getDoclingServeUrl(); case "enableocr": case "enableOCR": return target.getConfiguration().isEnableOCR(); case "includelayoutinfo": @@ -99,6 +109,8 @@ public class DoclingEndpointConfigurer extends PropertyConfigurerSupport impleme case "outputFormat": return target.getConfiguration().getOutputFormat(); case "processtimeout": case "processTimeout": return target.getConfiguration().getProcessTimeout(); + case "usedoclingserve": + case "useDoclingServe": return target.getConfiguration().isUseDoclingServe(); case "workingdirectory": case "workingDirectory": return target.getConfiguration().getWorkingDirectory(); default: return null; diff --git a/components/camel-ai/camel-docling/src/generated/java/org/apache/camel/component/docling/DoclingEndpointUriFactory.java b/components/camel-ai/camel-docling/src/generated/java/org/apache/camel/component/docling/DoclingEndpointUriFactory.java index 96d6cbb5344d..8b6173443452 100644 --- a/components/camel-ai/camel-docling/src/generated/java/org/apache/camel/component/docling/DoclingEndpointUriFactory.java +++ b/components/camel-ai/camel-docling/src/generated/java/org/apache/camel/component/docling/DoclingEndpointUriFactory.java @@ -23,9 +23,10 @@ public class DoclingEndpointUriFactory extends org.apache.camel.support.componen private static final Set<String> SECRET_PROPERTY_NAMES; private static final Map<String, String> MULTI_VALUE_PREFIXES; static { - Set<String> props = new HashSet<>(12); + Set<String> props = new HashSet<>(14); props.add("contentInBody"); props.add("doclingCommand"); + props.add("doclingServeUrl"); props.add("enableOCR"); props.add("includeLayoutInfo"); props.add("lazyStartProducer"); @@ -35,6 +36,7 @@ public class DoclingEndpointUriFactory extends org.apache.camel.support.componen props.add("operationId"); props.add("outputFormat"); props.add("processTimeout"); + props.add("useDoclingServe"); props.add("workingDirectory"); PROPERTY_NAMES = Collections.unmodifiableSet(props); SECRET_PROPERTY_NAMES = Collections.emptySet(); diff --git a/components/camel-ai/camel-docling/src/generated/resources/META-INF/org/apache/camel/component/docling/docling.json b/components/camel-ai/camel-docling/src/generated/resources/META-INF/org/apache/camel/component/docling/docling.json index 748e2cb66f7f..fb4dfdeec45d 100644 --- a/components/camel-ai/camel-docling/src/generated/resources/META-INF/org/apache/camel/component/docling/docling.json +++ b/components/camel-ai/camel-docling/src/generated/resources/META-INF/org/apache/camel/component/docling/docling.json @@ -26,17 +26,19 @@ "componentProperties": { "configuration": { "index": 0, "kind": "property", "displayName": "Configuration", "group": "producer", "label": "", "required": false, "type": "object", "javaType": "org.apache.camel.component.docling.DoclingConfiguration", "deprecated": false, "autowired": false, "secret": false, "description": "The configuration for the Docling Endpoint" }, "contentInBody": { "index": 1, "kind": "property", "displayName": "Content In Body", "group": "producer", "label": "", "required": false, "type": "boolean", "javaType": "boolean", "deprecated": false, "deprecationNote": "", "autowired": false, "secret": false, "defaultValue": false, "configurationClass": "org.apache.camel.component.docling.DoclingConfiguration", "configurationField": "configuration", "description": "Include the content of the output file in the exchange body and dele [...] - "enableOCR": { "index": 2, "kind": "property", "displayName": "Enable OCR", "group": "producer", "label": "", "required": false, "type": "boolean", "javaType": "boolean", "deprecated": false, "deprecationNote": "", "autowired": false, "secret": false, "defaultValue": true, "configurationClass": "org.apache.camel.component.docling.DoclingConfiguration", "configurationField": "configuration", "description": "Enable OCR processing for scanned documents" }, - "includeLayoutInfo": { "index": 3, "kind": "property", "displayName": "Include Layout Info", "group": "producer", "label": "", "required": false, "type": "boolean", "javaType": "boolean", "deprecated": false, "deprecationNote": "", "autowired": false, "secret": false, "defaultValue": false, "configurationClass": "org.apache.camel.component.docling.DoclingConfiguration", "configurationField": "configuration", "description": "Show layout information with bounding boxes" }, - "lazyStartProducer": { "index": 4, "kind": "property", "displayName": "Lazy Start Producer", "group": "producer", "label": "producer", "required": false, "type": "boolean", "javaType": "boolean", "deprecated": false, "autowired": false, "secret": false, "defaultValue": false, "description": "Whether the producer should be started lazy (on the first message). By starting lazy you can use this to allow CamelContext and routes to startup in situations where a producer may otherwise fail [...] - "ocrLanguage": { "index": 5, "kind": "property", "displayName": "Ocr Language", "group": "producer", "label": "", "required": false, "type": "string", "javaType": "java.lang.String", "deprecated": false, "deprecationNote": "", "autowired": false, "secret": false, "defaultValue": "en", "configurationClass": "org.apache.camel.component.docling.DoclingConfiguration", "configurationField": "configuration", "description": "Language code for OCR processing" }, - "operation": { "index": 6, "kind": "property", "displayName": "Operation", "group": "producer", "label": "", "required": true, "type": "enum", "javaType": "org.apache.camel.component.docling.DoclingOperations", "enum": [ "CONVERT_TO_MARKDOWN", "CONVERT_TO_HTML", "CONVERT_TO_JSON", "EXTRACT_TEXT", "EXTRACT_STRUCTURED_DATA" ], "deprecated": false, "deprecationNote": "", "autowired": false, "secret": false, "defaultValue": "CONVERT_TO_MARKDOWN", "configurationClass": "org.apache.camel.c [...] - "outputFormat": { "index": 7, "kind": "property", "displayName": "Output Format", "group": "producer", "label": "", "required": false, "type": "string", "javaType": "java.lang.String", "deprecated": false, "deprecationNote": "", "autowired": false, "secret": false, "defaultValue": "markdown", "configurationClass": "org.apache.camel.component.docling.DoclingConfiguration", "configurationField": "configuration", "description": "Output format for document conversion" }, - "autowiredEnabled": { "index": 8, "kind": "property", "displayName": "Autowired Enabled", "group": "advanced", "label": "advanced", "required": false, "type": "boolean", "javaType": "boolean", "deprecated": false, "autowired": false, "secret": false, "defaultValue": true, "description": "Whether autowiring is enabled. This is used for automatic autowiring options (the option must be marked as autowired) by looking up in the registry to find if there is a single instance of matching t [...] - "doclingCommand": { "index": 9, "kind": "property", "displayName": "Docling Command", "group": "advanced", "label": "advanced", "required": false, "type": "string", "javaType": "java.lang.String", "deprecated": false, "deprecationNote": "", "autowired": false, "secret": false, "configurationClass": "org.apache.camel.component.docling.DoclingConfiguration", "configurationField": "configuration", "description": "Path to Docling Python executable or command" }, - "processTimeout": { "index": 10, "kind": "property", "displayName": "Process Timeout", "group": "advanced", "label": "advanced", "required": false, "type": "integer", "javaType": "long", "deprecated": false, "deprecationNote": "", "autowired": false, "secret": false, "defaultValue": 30000, "configurationClass": "org.apache.camel.component.docling.DoclingConfiguration", "configurationField": "configuration", "description": "Timeout for Docling process execution in milliseconds" }, - "workingDirectory": { "index": 11, "kind": "property", "displayName": "Working Directory", "group": "advanced", "label": "advanced", "required": false, "type": "string", "javaType": "java.lang.String", "deprecated": false, "deprecationNote": "", "autowired": false, "secret": false, "configurationClass": "org.apache.camel.component.docling.DoclingConfiguration", "configurationField": "configuration", "description": "Working directory for Docling execution" }, - "maxFileSize": { "index": 12, "kind": "property", "displayName": "Max File Size", "group": "security", "label": "security", "required": false, "type": "integer", "javaType": "long", "deprecated": false, "deprecationNote": "", "autowired": false, "secret": false, "defaultValue": 52428800, "configurationClass": "org.apache.camel.component.docling.DoclingConfiguration", "configurationField": "configuration", "description": "Maximum file size in bytes for processing" } + "doclingServeUrl": { "index": 2, "kind": "property", "displayName": "Docling Serve Url", "group": "producer", "label": "", "required": false, "type": "string", "javaType": "java.lang.String", "deprecated": false, "deprecationNote": "", "autowired": false, "secret": false, "defaultValue": "http:\/\/localhost:5001", "configurationClass": "org.apache.camel.component.docling.DoclingConfiguration", "configurationField": "configuration", "description": "Docling-serve API URL (e.g., http:\/ [...] + "enableOCR": { "index": 3, "kind": "property", "displayName": "Enable OCR", "group": "producer", "label": "", "required": false, "type": "boolean", "javaType": "boolean", "deprecated": false, "deprecationNote": "", "autowired": false, "secret": false, "defaultValue": true, "configurationClass": "org.apache.camel.component.docling.DoclingConfiguration", "configurationField": "configuration", "description": "Enable OCR processing for scanned documents" }, + "includeLayoutInfo": { "index": 4, "kind": "property", "displayName": "Include Layout Info", "group": "producer", "label": "", "required": false, "type": "boolean", "javaType": "boolean", "deprecated": false, "deprecationNote": "", "autowired": false, "secret": false, "defaultValue": false, "configurationClass": "org.apache.camel.component.docling.DoclingConfiguration", "configurationField": "configuration", "description": "Show layout information with bounding boxes" }, + "lazyStartProducer": { "index": 5, "kind": "property", "displayName": "Lazy Start Producer", "group": "producer", "label": "producer", "required": false, "type": "boolean", "javaType": "boolean", "deprecated": false, "autowired": false, "secret": false, "defaultValue": false, "description": "Whether the producer should be started lazy (on the first message). By starting lazy you can use this to allow CamelContext and routes to startup in situations where a producer may otherwise fail [...] + "ocrLanguage": { "index": 6, "kind": "property", "displayName": "Ocr Language", "group": "producer", "label": "", "required": false, "type": "string", "javaType": "java.lang.String", "deprecated": false, "deprecationNote": "", "autowired": false, "secret": false, "defaultValue": "en", "configurationClass": "org.apache.camel.component.docling.DoclingConfiguration", "configurationField": "configuration", "description": "Language code for OCR processing" }, + "operation": { "index": 7, "kind": "property", "displayName": "Operation", "group": "producer", "label": "", "required": true, "type": "enum", "javaType": "org.apache.camel.component.docling.DoclingOperations", "enum": [ "CONVERT_TO_MARKDOWN", "CONVERT_TO_HTML", "CONVERT_TO_JSON", "EXTRACT_TEXT", "EXTRACT_STRUCTURED_DATA" ], "deprecated": false, "deprecationNote": "", "autowired": false, "secret": false, "defaultValue": "CONVERT_TO_MARKDOWN", "configurationClass": "org.apache.camel.c [...] + "outputFormat": { "index": 8, "kind": "property", "displayName": "Output Format", "group": "producer", "label": "", "required": false, "type": "string", "javaType": "java.lang.String", "deprecated": false, "deprecationNote": "", "autowired": false, "secret": false, "defaultValue": "markdown", "configurationClass": "org.apache.camel.component.docling.DoclingConfiguration", "configurationField": "configuration", "description": "Output format for document conversion" }, + "useDoclingServe": { "index": 9, "kind": "property", "displayName": "Use Docling Serve", "group": "producer", "label": "", "required": false, "type": "boolean", "javaType": "boolean", "deprecated": false, "deprecationNote": "", "autowired": false, "secret": false, "defaultValue": false, "configurationClass": "org.apache.camel.component.docling.DoclingConfiguration", "configurationField": "configuration", "description": "Use docling-serve API instead of CLI command" }, + "autowiredEnabled": { "index": 10, "kind": "property", "displayName": "Autowired Enabled", "group": "advanced", "label": "advanced", "required": false, "type": "boolean", "javaType": "boolean", "deprecated": false, "autowired": false, "secret": false, "defaultValue": true, "description": "Whether autowiring is enabled. This is used for automatic autowiring options (the option must be marked as autowired) by looking up in the registry to find if there is a single instance of matching [...] + "doclingCommand": { "index": 11, "kind": "property", "displayName": "Docling Command", "group": "advanced", "label": "advanced", "required": false, "type": "string", "javaType": "java.lang.String", "deprecated": false, "deprecationNote": "", "autowired": false, "secret": false, "configurationClass": "org.apache.camel.component.docling.DoclingConfiguration", "configurationField": "configuration", "description": "Path to Docling Python executable or command" }, + "processTimeout": { "index": 12, "kind": "property", "displayName": "Process Timeout", "group": "advanced", "label": "advanced", "required": false, "type": "integer", "javaType": "long", "deprecated": false, "deprecationNote": "", "autowired": false, "secret": false, "defaultValue": 30000, "configurationClass": "org.apache.camel.component.docling.DoclingConfiguration", "configurationField": "configuration", "description": "Timeout for Docling process execution in milliseconds" }, + "workingDirectory": { "index": 13, "kind": "property", "displayName": "Working Directory", "group": "advanced", "label": "advanced", "required": false, "type": "string", "javaType": "java.lang.String", "deprecated": false, "deprecationNote": "", "autowired": false, "secret": false, "configurationClass": "org.apache.camel.component.docling.DoclingConfiguration", "configurationField": "configuration", "description": "Working directory for Docling execution" }, + "maxFileSize": { "index": 14, "kind": "property", "displayName": "Max File Size", "group": "security", "label": "security", "required": false, "type": "integer", "javaType": "long", "deprecated": false, "deprecationNote": "", "autowired": false, "secret": false, "defaultValue": 52428800, "configurationClass": "org.apache.camel.component.docling.DoclingConfiguration", "configurationField": "configuration", "description": "Maximum file size in bytes for processing" } }, "headers": { "CamelDoclingOperation": { "index": 0, "kind": "header", "displayName": "", "group": "producer", "label": "", "required": false, "javaType": "DoclingOperations", "deprecated": false, "deprecationNote": "", "autowired": false, "secret": false, "description": "The operation to perform", "constantName": "org.apache.camel.component.docling.DoclingHeaders#OPERATION" }, @@ -51,15 +53,17 @@ "properties": { "operationId": { "index": 0, "kind": "path", "displayName": "Operation Id", "group": "producer", "label": "", "required": true, "type": "string", "javaType": "java.lang.String", "deprecated": false, "deprecationNote": "", "autowired": false, "secret": false, "description": "The operation identifier" }, "contentInBody": { "index": 1, "kind": "parameter", "displayName": "Content In Body", "group": "producer", "label": "", "required": false, "type": "boolean", "javaType": "boolean", "deprecated": false, "deprecationNote": "", "autowired": false, "secret": false, "defaultValue": false, "configurationClass": "org.apache.camel.component.docling.DoclingConfiguration", "configurationField": "configuration", "description": "Include the content of the output file in the exchange body and del [...] - "enableOCR": { "index": 2, "kind": "parameter", "displayName": "Enable OCR", "group": "producer", "label": "", "required": false, "type": "boolean", "javaType": "boolean", "deprecated": false, "deprecationNote": "", "autowired": false, "secret": false, "defaultValue": true, "configurationClass": "org.apache.camel.component.docling.DoclingConfiguration", "configurationField": "configuration", "description": "Enable OCR processing for scanned documents" }, - "includeLayoutInfo": { "index": 3, "kind": "parameter", "displayName": "Include Layout Info", "group": "producer", "label": "", "required": false, "type": "boolean", "javaType": "boolean", "deprecated": false, "deprecationNote": "", "autowired": false, "secret": false, "defaultValue": false, "configurationClass": "org.apache.camel.component.docling.DoclingConfiguration", "configurationField": "configuration", "description": "Show layout information with bounding boxes" }, - "ocrLanguage": { "index": 4, "kind": "parameter", "displayName": "Ocr Language", "group": "producer", "label": "", "required": false, "type": "string", "javaType": "java.lang.String", "deprecated": false, "deprecationNote": "", "autowired": false, "secret": false, "defaultValue": "en", "configurationClass": "org.apache.camel.component.docling.DoclingConfiguration", "configurationField": "configuration", "description": "Language code for OCR processing" }, - "operation": { "index": 5, "kind": "parameter", "displayName": "Operation", "group": "producer", "label": "", "required": true, "type": "enum", "javaType": "org.apache.camel.component.docling.DoclingOperations", "enum": [ "CONVERT_TO_MARKDOWN", "CONVERT_TO_HTML", "CONVERT_TO_JSON", "EXTRACT_TEXT", "EXTRACT_STRUCTURED_DATA" ], "deprecated": false, "deprecationNote": "", "autowired": false, "secret": false, "defaultValue": "CONVERT_TO_MARKDOWN", "configurationClass": "org.apache.camel. [...] - "outputFormat": { "index": 6, "kind": "parameter", "displayName": "Output Format", "group": "producer", "label": "", "required": false, "type": "string", "javaType": "java.lang.String", "deprecated": false, "deprecationNote": "", "autowired": false, "secret": false, "defaultValue": "markdown", "configurationClass": "org.apache.camel.component.docling.DoclingConfiguration", "configurationField": "configuration", "description": "Output format for document conversion" }, - "lazyStartProducer": { "index": 7, "kind": "parameter", "displayName": "Lazy Start Producer", "group": "producer (advanced)", "label": "producer,advanced", "required": false, "type": "boolean", "javaType": "boolean", "deprecated": false, "autowired": false, "secret": false, "defaultValue": false, "description": "Whether the producer should be started lazy (on the first message). By starting lazy you can use this to allow CamelContext and routes to startup in situations where a produc [...] - "doclingCommand": { "index": 8, "kind": "parameter", "displayName": "Docling Command", "group": "advanced", "label": "advanced", "required": false, "type": "string", "javaType": "java.lang.String", "deprecated": false, "deprecationNote": "", "autowired": false, "secret": false, "configurationClass": "org.apache.camel.component.docling.DoclingConfiguration", "configurationField": "configuration", "description": "Path to Docling Python executable or command" }, - "processTimeout": { "index": 9, "kind": "parameter", "displayName": "Process Timeout", "group": "advanced", "label": "advanced", "required": false, "type": "integer", "javaType": "long", "deprecated": false, "deprecationNote": "", "autowired": false, "secret": false, "defaultValue": 30000, "configurationClass": "org.apache.camel.component.docling.DoclingConfiguration", "configurationField": "configuration", "description": "Timeout for Docling process execution in milliseconds" }, - "workingDirectory": { "index": 10, "kind": "parameter", "displayName": "Working Directory", "group": "advanced", "label": "advanced", "required": false, "type": "string", "javaType": "java.lang.String", "deprecated": false, "deprecationNote": "", "autowired": false, "secret": false, "configurationClass": "org.apache.camel.component.docling.DoclingConfiguration", "configurationField": "configuration", "description": "Working directory for Docling execution" }, - "maxFileSize": { "index": 11, "kind": "parameter", "displayName": "Max File Size", "group": "security", "label": "security", "required": false, "type": "integer", "javaType": "long", "deprecated": false, "deprecationNote": "", "autowired": false, "secret": false, "defaultValue": 52428800, "configurationClass": "org.apache.camel.component.docling.DoclingConfiguration", "configurationField": "configuration", "description": "Maximum file size in bytes for processing" } + "doclingServeUrl": { "index": 2, "kind": "parameter", "displayName": "Docling Serve Url", "group": "producer", "label": "", "required": false, "type": "string", "javaType": "java.lang.String", "deprecated": false, "deprecationNote": "", "autowired": false, "secret": false, "defaultValue": "http:\/\/localhost:5001", "configurationClass": "org.apache.camel.component.docling.DoclingConfiguration", "configurationField": "configuration", "description": "Docling-serve API URL (e.g., http:\ [...] + "enableOCR": { "index": 3, "kind": "parameter", "displayName": "Enable OCR", "group": "producer", "label": "", "required": false, "type": "boolean", "javaType": "boolean", "deprecated": false, "deprecationNote": "", "autowired": false, "secret": false, "defaultValue": true, "configurationClass": "org.apache.camel.component.docling.DoclingConfiguration", "configurationField": "configuration", "description": "Enable OCR processing for scanned documents" }, + "includeLayoutInfo": { "index": 4, "kind": "parameter", "displayName": "Include Layout Info", "group": "producer", "label": "", "required": false, "type": "boolean", "javaType": "boolean", "deprecated": false, "deprecationNote": "", "autowired": false, "secret": false, "defaultValue": false, "configurationClass": "org.apache.camel.component.docling.DoclingConfiguration", "configurationField": "configuration", "description": "Show layout information with bounding boxes" }, + "ocrLanguage": { "index": 5, "kind": "parameter", "displayName": "Ocr Language", "group": "producer", "label": "", "required": false, "type": "string", "javaType": "java.lang.String", "deprecated": false, "deprecationNote": "", "autowired": false, "secret": false, "defaultValue": "en", "configurationClass": "org.apache.camel.component.docling.DoclingConfiguration", "configurationField": "configuration", "description": "Language code for OCR processing" }, + "operation": { "index": 6, "kind": "parameter", "displayName": "Operation", "group": "producer", "label": "", "required": true, "type": "enum", "javaType": "org.apache.camel.component.docling.DoclingOperations", "enum": [ "CONVERT_TO_MARKDOWN", "CONVERT_TO_HTML", "CONVERT_TO_JSON", "EXTRACT_TEXT", "EXTRACT_STRUCTURED_DATA" ], "deprecated": false, "deprecationNote": "", "autowired": false, "secret": false, "defaultValue": "CONVERT_TO_MARKDOWN", "configurationClass": "org.apache.camel. [...] + "outputFormat": { "index": 7, "kind": "parameter", "displayName": "Output Format", "group": "producer", "label": "", "required": false, "type": "string", "javaType": "java.lang.String", "deprecated": false, "deprecationNote": "", "autowired": false, "secret": false, "defaultValue": "markdown", "configurationClass": "org.apache.camel.component.docling.DoclingConfiguration", "configurationField": "configuration", "description": "Output format for document conversion" }, + "useDoclingServe": { "index": 8, "kind": "parameter", "displayName": "Use Docling Serve", "group": "producer", "label": "", "required": false, "type": "boolean", "javaType": "boolean", "deprecated": false, "deprecationNote": "", "autowired": false, "secret": false, "defaultValue": false, "configurationClass": "org.apache.camel.component.docling.DoclingConfiguration", "configurationField": "configuration", "description": "Use docling-serve API instead of CLI command" }, + "lazyStartProducer": { "index": 9, "kind": "parameter", "displayName": "Lazy Start Producer", "group": "producer (advanced)", "label": "producer,advanced", "required": false, "type": "boolean", "javaType": "boolean", "deprecated": false, "autowired": false, "secret": false, "defaultValue": false, "description": "Whether the producer should be started lazy (on the first message). By starting lazy you can use this to allow CamelContext and routes to startup in situations where a produc [...] + "doclingCommand": { "index": 10, "kind": "parameter", "displayName": "Docling Command", "group": "advanced", "label": "advanced", "required": false, "type": "string", "javaType": "java.lang.String", "deprecated": false, "deprecationNote": "", "autowired": false, "secret": false, "configurationClass": "org.apache.camel.component.docling.DoclingConfiguration", "configurationField": "configuration", "description": "Path to Docling Python executable or command" }, + "processTimeout": { "index": 11, "kind": "parameter", "displayName": "Process Timeout", "group": "advanced", "label": "advanced", "required": false, "type": "integer", "javaType": "long", "deprecated": false, "deprecationNote": "", "autowired": false, "secret": false, "defaultValue": 30000, "configurationClass": "org.apache.camel.component.docling.DoclingConfiguration", "configurationField": "configuration", "description": "Timeout for Docling process execution in milliseconds" }, + "workingDirectory": { "index": 12, "kind": "parameter", "displayName": "Working Directory", "group": "advanced", "label": "advanced", "required": false, "type": "string", "javaType": "java.lang.String", "deprecated": false, "deprecationNote": "", "autowired": false, "secret": false, "configurationClass": "org.apache.camel.component.docling.DoclingConfiguration", "configurationField": "configuration", "description": "Working directory for Docling execution" }, + "maxFileSize": { "index": 13, "kind": "parameter", "displayName": "Max File Size", "group": "security", "label": "security", "required": false, "type": "integer", "javaType": "long", "deprecated": false, "deprecationNote": "", "autowired": false, "secret": false, "defaultValue": 52428800, "configurationClass": "org.apache.camel.component.docling.DoclingConfiguration", "configurationField": "configuration", "description": "Maximum file size in bytes for processing" } } } diff --git a/components/camel-ai/camel-docling/src/main/docs/docling-component.adoc b/components/camel-ai/camel-docling/src/main/docs/docling-component.adoc index 4933131b7ab9..c3fc080033bf 100644 --- a/components/camel-ai/camel-docling/src/main/docs/docling-component.adoc +++ b/components/camel-ai/camel-docling/src/main/docs/docling-component.adoc @@ -32,13 +32,33 @@ Maven users will need to add the following dependency to their `pom.xml` for thi == Prerequisites -Before using this component, you need to have Docling installed on your system. You can install it using pip: +This component supports two modes of operation: +1. **CLI Mode (default)**: Requires Docling to be installed on your system via pip: ++ [source,bash] ---- pip install docling ---- +2. **API Mode**: Requires a running docling-serve instance. You can run it using: ++ +[source,bash] +---- +# Install docling-serve +pip install docling-serve + +# Run docling-serve +docling-serve --host 0.0.0.0 --port 5001 +---- ++ +Or using Docker: ++ +[source,bash] +---- +docker run -p 5001:5001 ghcr.io/docling-project/docling-serve:latest +---- + == URI format ---- @@ -439,6 +459,119 @@ public class CustomArgsBean { } ---- +== Using Docling-Serve API + +=== Basic usage with docling-serve + +[tabs] +==== +Java:: ++ +[source,java] +---- +from("file:///data/documents?include=.*\\.pdf") + .to("docling:CONVERT_TO_MARKDOWN?useDoclingServe=true&doclingServeUrl=http://localhost:5001&contentInBody=true") + .process(exchange -> { + String markdown = exchange.getIn().getBody(String.class); + log.info("Converted content: {}", markdown); + }); +---- + +YAML:: ++ +[source,yaml] +---- +- route: + from: + uri: "file:///data/documents" + parameters: + include: ".*\\.pdf" + steps: + - to: + uri: "docling:CONVERT_TO_MARKDOWN" + parameters: + useDoclingServe: true + doclingServeUrl: "http://localhost:5001" + contentInBody: true + - process: + ref: "markdownProcessor" +---- +==== + +=== Converting documents from URLs using docling-serve + +When using docling-serve API mode, you can also process documents from URLs: + +[tabs] +==== +Java:: ++ +[source,java] +---- +from("timer:convert?repeatCount=1") + .setBody(constant("https://arxiv.org/pdf/2501.17887")) + .to("docling:CONVERT_TO_MARKDOWN?useDoclingServe=true&contentInBody=true") + .to("file:///data/output"); +---- + +YAML:: ++ +[source,yaml] +---- +- route: + from: + uri: "timer:convert" + parameters: + repeatCount: 1 + steps: + - setBody: + constant: "https://arxiv.org/pdf/2501.17887" + - to: + uri: "docling:CONVERT_TO_MARKDOWN" + parameters: + useDoclingServe: true + contentInBody: true + - to: + uri: "file:///data/output" +---- +==== + +=== Batch processing with docling-serve + +[tabs] +==== +Java:: ++ +[source,java] +---- +from("file:///data/documents?include=.*\\.(pdf|docx)") + .to("docling:CONVERT_TO_HTML?useDoclingServe=true&doclingServeUrl=http://localhost:5001&contentInBody=true") + .to("file:///data/converted?fileName=${file:name.noext}.html"); +---- + +YAML:: ++ +[source,yaml] +---- +- route: + from: + uri: "file:///data/documents" + parameters: + include: ".*\\.(pdf|docx)" + steps: + - to: + uri: "docling:CONVERT_TO_HTML" + parameters: + useDoclingServe: true + doclingServeUrl: "http://localhost:5001" + contentInBody: true + - to: + uri: "file:///data/converted" + parameters: + fileName: "${file:name.noext}.html" +---- +==== + == Error Handling The component handles various error scenarios: @@ -446,13 +579,15 @@ The component handles various error scenarios: - **File size limit exceeded**: Files larger than `maxFileSize` are rejected - **Process timeout**: Long-running conversions are terminated after `processTimeout` milliseconds - **Invalid file formats**: Unsupported file formats result in processing errors -- **Docling not found**: Missing Docling installation causes startup failures +- **Docling not found**: Missing Docling installation causes startup failures (CLI mode) +- **Connection errors**: When using docling-serve API mode, connection failures to the API endpoint will result in errors == Performance Considerations -- Large documents may require increased `processTimeout` values +- Large documents may require increased `processTimeout` values (CLI mode) - OCR processing significantly increases processing time for scanned documents -- Consider using `contentInBody=false` for large outputs to avoid memory issues +- Consider using `contentInBody=true` when using docling-serve API mode to get results directly in the body - The `maxFileSize` setting helps prevent resource exhaustion +- **API Mode vs CLI Mode**: The docling-serve API mode typically offers better performance and resource utilization for high-volume document processing, as it maintains a persistent server instance include::spring-boot:partial$starter.adoc[] diff --git a/components/camel-ai/camel-docling/src/main/java/org/apache/camel/component/docling/DoclingConfiguration.java b/components/camel-ai/camel-docling/src/main/java/org/apache/camel/component/docling/DoclingConfiguration.java index 0f818a4fd5d7..5c60f75c17b1 100644 --- a/components/camel-ai/camel-docling/src/main/java/org/apache/camel/component/docling/DoclingConfiguration.java +++ b/components/camel-ai/camel-docling/src/main/java/org/apache/camel/component/docling/DoclingConfiguration.java @@ -73,6 +73,14 @@ public class DoclingConfiguration implements Cloneable { defaultValue = "false") private boolean contentInBody = false; + @UriParam + @Metadata(description = "Use docling-serve API instead of CLI command", defaultValue = "false") + private boolean useDoclingServe = false; + + @UriParam + @Metadata(description = "Docling-serve API URL (e.g., http://localhost:5001)", defaultValue = "http://localhost:5001") + private String doclingServeUrl = "http://localhost:5001"; + public DoclingOperations getOperation() { return operation; } @@ -153,6 +161,22 @@ public class DoclingConfiguration implements Cloneable { this.contentInBody = contentInBody; } + public boolean isUseDoclingServe() { + return useDoclingServe; + } + + public void setUseDoclingServe(boolean useDoclingServe) { + this.useDoclingServe = useDoclingServe; + } + + public String getDoclingServeUrl() { + return doclingServeUrl; + } + + public void setDoclingServeUrl(String doclingServeUrl) { + this.doclingServeUrl = doclingServeUrl; + } + public DoclingConfiguration copy() { try { return (DoclingConfiguration) super.clone(); diff --git a/components/camel-ai/camel-docling/src/main/java/org/apache/camel/component/docling/DoclingProducer.java b/components/camel-ai/camel-docling/src/main/java/org/apache/camel/component/docling/DoclingProducer.java index 85c8f2050da4..b88725b8e8e3 100644 --- a/components/camel-ai/camel-docling/src/main/java/org/apache/camel/component/docling/DoclingProducer.java +++ b/components/camel-ai/camel-docling/src/main/java/org/apache/camel/component/docling/DoclingProducer.java @@ -42,6 +42,7 @@ public class DoclingProducer extends DefaultProducer { private DoclingEndpoint endpoint; private DoclingConfiguration configuration; + private DoclingServeClient doclingServeClient; public DoclingProducer(DoclingEndpoint endpoint) { super(endpoint); @@ -49,6 +50,26 @@ public class DoclingProducer extends DefaultProducer { this.configuration = endpoint.getConfiguration(); } + @Override + protected void doStart() throws Exception { + super.doStart(); + if (configuration.isUseDoclingServe()) { + doclingServeClient = new DoclingServeClient(configuration.getDoclingServeUrl()); + LOG.info("DoclingProducer configured to use docling-serve API at: {}", configuration.getDoclingServeUrl()); + } else { + LOG.info("DoclingProducer configured to use docling CLI command"); + } + } + + @Override + protected void doStop() throws Exception { + super.doStop(); + if (doclingServeClient != null) { + doclingServeClient.close(); + doclingServeClient = null; + } + } + @Override public void process(Exchange exchange) throws Exception { LOG.debug("DoclingProducer processing exchange with message ID: {}", exchange.getExchangeId()); @@ -86,29 +107,59 @@ public class DoclingProducer extends DefaultProducer { private void processConvertToMarkdown(Exchange exchange) throws Exception { LOG.debug("DoclingProducer converting to markdown"); - String inputPath = getInputPath(exchange); - exchange.getIn().setBody(executeDoclingCommand(inputPath, "markdown", exchange)); + if (configuration.isUseDoclingServe()) { + String inputPath = getInputPath(exchange); + String result = doclingServeClient.convertDocument(inputPath, "markdown"); + exchange.getIn().setBody(result); + } else { + String inputPath = getInputPath(exchange); + exchange.getIn().setBody(executeDoclingCommand(inputPath, "markdown", exchange)); + } } private void processConvertToHTML(Exchange exchange) throws Exception { LOG.debug("DoclingProducer converting to HTML"); - String inputPath = getInputPath(exchange); - exchange.getIn().setBody(executeDoclingCommand(inputPath, "html", exchange)); + if (configuration.isUseDoclingServe()) { + String inputPath = getInputPath(exchange); + String result = doclingServeClient.convertDocument(inputPath, "html"); + exchange.getIn().setBody(result); + } else { + String inputPath = getInputPath(exchange); + exchange.getIn().setBody(executeDoclingCommand(inputPath, "html", exchange)); + } } private void processConvertToJSON(Exchange exchange) throws Exception { - String inputPath = getInputPath(exchange); - exchange.getIn().setBody(executeDoclingCommand(inputPath, "json", exchange)); + if (configuration.isUseDoclingServe()) { + String inputPath = getInputPath(exchange); + String result = doclingServeClient.convertDocument(inputPath, "json"); + exchange.getIn().setBody(result); + } else { + String inputPath = getInputPath(exchange); + exchange.getIn().setBody(executeDoclingCommand(inputPath, "json", exchange)); + } } private void processExtractText(Exchange exchange) throws Exception { - String inputPath = getInputPath(exchange); - exchange.getIn().setBody(executeDoclingCommand(inputPath, "text", exchange)); + if (configuration.isUseDoclingServe()) { + String inputPath = getInputPath(exchange); + String result = doclingServeClient.convertDocument(inputPath, "text"); + exchange.getIn().setBody(result); + } else { + String inputPath = getInputPath(exchange); + exchange.getIn().setBody(executeDoclingCommand(inputPath, "text", exchange)); + } } private void processExtractStructuredData(Exchange exchange) throws Exception { - String inputPath = getInputPath(exchange); - exchange.getIn().setBody(executeDoclingCommand(inputPath, "json", exchange)); + if (configuration.isUseDoclingServe()) { + String inputPath = getInputPath(exchange); + String result = doclingServeClient.convertDocument(inputPath, "json"); + exchange.getIn().setBody(result); + } else { + String inputPath = getInputPath(exchange); + exchange.getIn().setBody(executeDoclingCommand(inputPath, "json", exchange)); + } } private String getInputPath(Exchange exchange) throws InvalidPayloadException, IOException { @@ -122,10 +173,16 @@ public class DoclingProducer extends DefaultProducer { Object body = exchange.getIn().getBody(); if (body instanceof String) { String content = (String) body; - if (content.startsWith("/") || content.contains("\\")) { + // Check if it's a URL (http:// or https://) or a file path + if (content.startsWith("http://") || content.startsWith("https://")) { + // Return URL as-is, no validation needed + return content; + } else if (content.startsWith("/") || content.contains("\\")) { + // It's a file path validateFileSize(content); return content; } else { + // Treat as content to be written to a temp file Path tempFile = Files.createTempFile("docling-", ".tmp"); Files.write(tempFile, content.getBytes()); validateFileSize(tempFile.toString()); diff --git a/components/camel-ai/camel-docling/src/main/java/org/apache/camel/component/docling/DoclingServeClient.java b/components/camel-ai/camel-docling/src/main/java/org/apache/camel/component/docling/DoclingServeClient.java new file mode 100644 index 000000000000..375e76c00952 --- /dev/null +++ b/components/camel-ai/camel-docling/src/main/java/org/apache/camel/component/docling/DoclingServeClient.java @@ -0,0 +1,224 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.camel.component.docling; + +import java.io.File; +import java.io.IOException; +import java.nio.file.Files; +import java.util.Base64; +import java.util.Collections; +import java.util.HashMap; +import java.util.Map; + +import com.fasterxml.jackson.databind.JsonNode; +import com.fasterxml.jackson.databind.ObjectMapper; +import org.apache.hc.client5.http.classic.methods.HttpPost; +import org.apache.hc.client5.http.impl.classic.CloseableHttpClient; +import org.apache.hc.client5.http.impl.classic.CloseableHttpResponse; +import org.apache.hc.client5.http.impl.classic.HttpClients; +import org.apache.hc.core5.http.ContentType; +import org.apache.hc.core5.http.io.entity.EntityUtils; +import org.apache.hc.core5.http.io.entity.StringEntity; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Client for interacting with Docling-Serve API. + */ +public class DoclingServeClient { + + private static final Logger LOG = LoggerFactory.getLogger(DoclingServeClient.class); + private static final String CONVERT_ENDPOINT = "/v1/convert/source"; + + private final String baseUrl; + private final ObjectMapper objectMapper; + private final CloseableHttpClient httpClient; + + public DoclingServeClient(String baseUrl) { + this.baseUrl = baseUrl.endsWith("/") ? baseUrl.substring(0, baseUrl.length() - 1) : baseUrl; + this.objectMapper = new ObjectMapper(); + this.httpClient = HttpClients.createDefault(); + } + + /** + * Convert a document using the docling-serve API. + * + * @param inputSource File path or URL to the document + * @param outputFormat Output format (md, json, html, text) + * @return Converted document content + * @throws IOException If the API call fails + */ + public String convertDocument(String inputSource, String outputFormat) throws IOException { + LOG.debug("Converting document using docling-serve API: {}", inputSource); + + // Check if input is a URL or file path + if (inputSource.startsWith("http://") || inputSource.startsWith("https://")) { + return convertFromUrl(inputSource, outputFormat); + } else { + return convertFromFile(inputSource, outputFormat); + } + } + + private String convertFromUrl(String url, String outputFormat) throws IOException { + Map<String, Object> requestBody = new HashMap<>(); + Map<String, String> source = new HashMap<>(); + source.put("kind", "http"); + source.put("url", url); + requestBody.put("sources", Collections.singletonList(source)); + + // Add output format if specified + if (outputFormat != null && !outputFormat.isEmpty()) { + Map<String, Object> options = new HashMap<>(); + options.put("to_formats", Collections.singletonList(mapOutputFormat(outputFormat))); + requestBody.put("options", options); + } + + String jsonRequest = objectMapper.writeValueAsString(requestBody); + LOG.debug("Request body: {}", jsonRequest); + + HttpPost httpPost = new HttpPost(baseUrl + CONVERT_ENDPOINT); + httpPost.setEntity(new StringEntity(jsonRequest, ContentType.APPLICATION_JSON)); + httpPost.setHeader("Accept", "application/json"); + + try (CloseableHttpResponse response = httpClient.execute(httpPost)) { + int statusCode = response.getCode(); + String responseBody; + try { + responseBody = EntityUtils.toString(response.getEntity()); + } catch (org.apache.hc.core5.http.ParseException e) { + throw new IOException("Failed to parse response from docling-serve API", e); + } + + if (statusCode >= 200 && statusCode < 300) { + return extractConvertedContent(responseBody, outputFormat); + } else { + throw new IOException( + "Docling-serve API request failed with status " + statusCode + ": " + responseBody); + } + } + } + + private String convertFromFile(String filePath, String outputFormat) throws IOException { + File file = new File(filePath); + if (!file.exists()) { + throw new IOException("File not found: " + filePath); + } + + // Read file and encode as base64 + byte[] fileBytes = Files.readAllBytes(file.toPath()); + String base64Content = Base64.getEncoder().encodeToString(fileBytes); + + // Build request body with base64-encoded file + Map<String, Object> requestBody = new HashMap<>(); + Map<String, String> source = new HashMap<>(); + source.put("kind", "file"); + source.put("base64_string", base64Content); + source.put("filename", file.getName()); + requestBody.put("sources", Collections.singletonList(source)); + + // Add output format if specified + if (outputFormat != null && !outputFormat.isEmpty()) { + Map<String, Object> options = new HashMap<>(); + options.put("to_formats", Collections.singletonList(mapOutputFormat(outputFormat))); + requestBody.put("options", options); + } + + String jsonRequest = objectMapper.writeValueAsString(requestBody); + LOG.debug("Request body: {}", jsonRequest); + + HttpPost httpPost = new HttpPost(baseUrl + CONVERT_ENDPOINT); + httpPost.setEntity(new StringEntity(jsonRequest, ContentType.APPLICATION_JSON)); + httpPost.setHeader("Accept", "application/json"); + + try (CloseableHttpResponse response = httpClient.execute(httpPost)) { + int statusCode = response.getCode(); + String responseBody; + try { + responseBody = EntityUtils.toString(response.getEntity()); + } catch (org.apache.hc.core5.http.ParseException e) { + throw new IOException("Failed to parse response from docling-serve API", e); + } + + if (statusCode >= 200 && statusCode < 300) { + return extractConvertedContent(responseBody, outputFormat); + } else { + throw new IOException( + "Docling-serve API request failed with status " + statusCode + ": " + responseBody); + } + } + } + + private String extractConvertedContent(String responseBody, String outputFormat) throws IOException { + try { + JsonNode rootNode = objectMapper.readTree(responseBody); + + // The response structure may vary, so we'll try to extract the content + // This is a simplified implementation - adjust based on actual API response + if (rootNode.has("documents") && rootNode.get("documents").isArray() + && rootNode.get("documents").size() > 0) { + JsonNode firstDoc = rootNode.get("documents").get(0); + + // Try different possible response formats + if (firstDoc.has("content")) { + return firstDoc.get("content").asText(); + } else if (firstDoc.has("markdown")) { + return firstDoc.get("markdown").asText(); + } else if (firstDoc.has("text")) { + return firstDoc.get("text").asText(); + } else { + // Return the entire document as JSON string + return objectMapper.writeValueAsString(firstDoc); + } + } else if (rootNode.has("content")) { + return rootNode.get("content").asText(); + } else { + // Return the entire response as a formatted JSON string + return objectMapper.writerWithDefaultPrettyPrinter().writeValueAsString(rootNode); + } + } catch (Exception e) { + LOG.warn("Failed to parse JSON response, returning raw response", e); + return responseBody; + } + } + + private String mapOutputFormat(String outputFormat) { + if (outputFormat == null) { + return "md"; + } + + switch (outputFormat.toLowerCase()) { + case "markdown": + case "md": + return "md"; + case "html": + return "html"; + case "json": + return "json"; + case "text": + case "txt": + return "text"; + default: + return "md"; + } + } + + public void close() throws IOException { + if (httpClient != null) { + httpClient.close(); + } + } +} diff --git a/components/camel-ai/camel-docling/src/test/java/org/apache/camel/component/docling/DoclingServeProducerIT.java b/components/camel-ai/camel-docling/src/test/java/org/apache/camel/component/docling/DoclingServeProducerIT.java new file mode 100644 index 000000000000..0525655dd945 --- /dev/null +++ b/components/camel-ai/camel-docling/src/test/java/org/apache/camel/component/docling/DoclingServeProducerIT.java @@ -0,0 +1,179 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.camel.component.docling; + +import java.io.File; +import java.nio.file.Files; +import java.nio.file.Path; + +import org.apache.camel.CamelContext; +import org.apache.camel.builder.RouteBuilder; +import org.apache.camel.test.infra.docling.services.DoclingService; +import org.apache.camel.test.infra.docling.services.DoclingServiceFactory; +import org.apache.camel.test.junit5.CamelTestSupport; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.extension.RegisterExtension; +import org.junit.jupiter.api.io.TempDir; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertTrue; + +/** + * Integration test for Docling-Serve producer operations using test-infra for container management. + * + * This test demonstrates how to use the camel-test-infra-docling module to automatically spin up a Docling-Serve + * container for testing without manual setup. + */ +public class DoclingServeProducerIT extends CamelTestSupport { + + private static final Logger LOG = LoggerFactory.getLogger(DoclingServeProducerIT.class); + + @RegisterExtension + static DoclingService doclingService = DoclingServiceFactory.createService(); + + @TempDir + Path outputDir; + + @Override + protected CamelContext createCamelContext() throws Exception { + CamelContext context = super.createCamelContext(); + DoclingComponent docling = context.getComponent("docling", DoclingComponent.class); + DoclingConfiguration conf = new DoclingConfiguration(); + conf.setUseDoclingServe(true); + conf.setDoclingServeUrl(doclingService.getDoclingServerUrl()); + docling.setConfiguration(conf); + + LOG.info("Testing Docling-Serve at: {}", doclingService.getDoclingServerUrl()); + + return context; + } + + @Test + public void testMarkdownConversionWithDoclingServe() throws Exception { + Path testFile = createTestFile(); + + String result = template.requestBodyAndHeader("direct:convert-markdown-serve", + testFile.toString(), + DoclingHeaders.INPUT_FILE_PATH, testFile.toString(), String.class); + + assertNotNull(result); + assertTrue(result.length() > 0); + + LOG.info("Successfully converted document to Markdown"); + } + + @Test + public void testHtmlConversionWithDoclingServe() throws Exception { + Path testFile = createTestFile(); + + String result = template.requestBodyAndHeader("direct:convert-html-serve", + testFile.toString(), + DoclingHeaders.OPERATION, DoclingOperations.CONVERT_TO_HTML, String.class); + + assertNotNull(result); + assertTrue(result.length() > 0); + + LOG.info("Successfully converted document to HTML"); + } + + @Test + public void testUrlConversionWithDoclingServe() throws Exception { + // Test converting a document from a URL + String url = "https://arxiv.org/pdf/2501.17887"; + + String result = template.requestBody("direct:convert-url-serve", url, String.class); + + assertNotNull(result); + assertTrue(result.length() > 0); + + LOG.info("Successfully converted document from URL"); + } + + @Test + public void testJsonConversionWithDoclingServe() throws Exception { + Path testFile = createTestFile(); + + String result = template.requestBodyAndHeader("direct:convert-json-serve", + testFile.toString(), + DoclingHeaders.INPUT_FILE_PATH, testFile.toString(), String.class); + + assertNotNull(result); + assertTrue(result.length() > 0); + // JSON response should contain some structure + assertTrue(result.contains("{") || result.contains("[")); + + LOG.info("Successfully converted document to JSON"); + } + + @Test + public void testConvertAndWriteToFile() throws Exception { + Path testFile = createTestFile(); + + // Send the file path to the route that converts and writes to file + template.sendBodyAndHeader("direct:convert-and-write", + testFile.toString(), + DoclingHeaders.INPUT_FILE_PATH, testFile.toString()); + + // Verify the output file was created + File outputFile = new File(outputDir.toFile(), "converted-output.md"); + assertTrue(outputFile.exists(), "Output file should exist"); + assertTrue(outputFile.length() > 0, "Output file should not be empty"); + + // Read and verify content + String content = Files.readString(outputFile.toPath()); + assertNotNull(content); + assertTrue(content.length() > 0); + + LOG.info("Successfully converted document and wrote to file: {}", outputFile.getAbsolutePath()); + LOG.info("Output file size: {} bytes", outputFile.length()); + } + + private Path createTestFile() throws Exception { + Path tempFile = Files.createTempFile("docling-serve-test", ".md"); + Files.write(tempFile, + "# Test Document\n\nThis is a test document for Docling-Serve processing.\n\n## Section 1\n\nSome content here.\n\n- List item 1\n- List item 2\n" + .getBytes()); + return tempFile; + } + + @Override + protected RouteBuilder createRouteBuilder() throws Exception { + return new RouteBuilder() { + @Override + public void configure() throws Exception { + from("direct:convert-markdown-serve") + .to("docling:convert?operation=CONVERT_TO_MARKDOWN&contentInBody=true"); + + from("direct:convert-html-serve") + .to("docling:convert?operation=CONVERT_TO_HTML&contentInBody=true"); + + from("direct:convert-json-serve") + .to("docling:convert?operation=CONVERT_TO_JSON&contentInBody=true"); + + from("direct:convert-url-serve") + .to("docling:convert?operation=CONVERT_TO_MARKDOWN&contentInBody=true"); + + from("direct:convert-and-write") + .to("docling:convert?operation=CONVERT_TO_MARKDOWN&contentInBody=true") + .to("file:" + outputDir.toString() + "?fileName=converted-output.md"); + } + }; + } + +} diff --git a/components/camel-ai/camel-docling/src/test/java/org/apache/camel/component/docling/DoclingServeProducerTest.java b/components/camel-ai/camel-docling/src/test/java/org/apache/camel/component/docling/DoclingServeProducerTest.java new file mode 100644 index 000000000000..ff485fb1678a --- /dev/null +++ b/components/camel-ai/camel-docling/src/test/java/org/apache/camel/component/docling/DoclingServeProducerTest.java @@ -0,0 +1,120 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.camel.component.docling; + +import java.nio.file.Files; +import java.nio.file.Path; + +import org.apache.camel.builder.RouteBuilder; +import org.apache.camel.test.junit5.CamelTestSupport; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.condition.EnabledIfSystemProperty; + +import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertTrue; + +public class DoclingServeProducerTest extends CamelTestSupport { + + @Test + @EnabledIfSystemProperty(named = "docling.serve.test.enabled", matches = "true") + public void testMarkdownConversionWithDoclingServe() throws Exception { + Path testFile = createTestFile(); + + String result = template.requestBodyAndHeader("direct:convert-markdown-serve", + testFile.toString(), + DoclingHeaders.INPUT_FILE_PATH, testFile.toString(), String.class); + + assertNotNull(result); + assertTrue(result.length() > 0); + } + + @Test + @EnabledIfSystemProperty(named = "docling.serve.test.enabled", matches = "true") + public void testHtmlConversionWithDoclingServe() throws Exception { + Path testFile = createTestFile(); + + String result = template.requestBodyAndHeader("direct:convert-html-serve", + testFile.toString(), + DoclingHeaders.OPERATION, DoclingOperations.CONVERT_TO_HTML, String.class); + + assertNotNull(result); + assertTrue(result.length() > 0); + } + + @Test + @EnabledIfSystemProperty(named = "docling.serve.test.enabled", matches = "true") + public void testUrlConversionWithDoclingServe() throws Exception { + // Test converting a document from a URL + String url = "https://arxiv.org/pdf/2501.17887"; + + String result = template.requestBody("direct:convert-url-serve", url, String.class); + + assertNotNull(result); + assertTrue(result.length() > 0); + } + + @Test + @EnabledIfSystemProperty(named = "docling.serve.test.enabled", matches = "true") + public void testJsonConversionWithDoclingServe() throws Exception { + Path testFile = createTestFile(); + + String result = template.requestBodyAndHeader("direct:convert-json-serve", + testFile.toString(), + DoclingHeaders.INPUT_FILE_PATH, testFile.toString(), String.class); + + assertNotNull(result); + assertTrue(result.length() > 0); + // JSON response should contain some structure + assertTrue(result.contains("{") || result.contains("[")); + } + + private Path createTestFile() throws Exception { + Path tempFile = Files.createTempFile("docling-serve-test", ".md"); + Files.write(tempFile, + "# Test Document\n\nThis is a test document for Docling-Serve processing.\n\n## Section 1\n\nSome content here.\n\n- List item 1\n- List item 2\n" + .getBytes()); + return tempFile; + } + + @Override + protected RouteBuilder createRouteBuilder() throws Exception { + return new RouteBuilder() { + @Override + public void configure() throws Exception { + // Get the docling-serve URL from system property, default to localhost:5001 + String doclingServeUrl = System.getProperty("docling.serve.url", "http://localhost:5001"); + + from("direct:convert-markdown-serve") + .to("docling:convert?operation=CONVERT_TO_MARKDOWN&useDoclingServe=true&doclingServeUrl=" + + doclingServeUrl + "&contentInBody=true"); + + from("direct:convert-html-serve") + .to("docling:convert?operation=CONVERT_TO_HTML&useDoclingServe=true&doclingServeUrl=" + + doclingServeUrl + "&contentInBody=true"); + + from("direct:convert-json-serve") + .to("docling:convert?operation=CONVERT_TO_JSON&useDoclingServe=true&doclingServeUrl=" + + doclingServeUrl + "&contentInBody=true"); + + from("direct:convert-url-serve") + .to("docling:convert?operation=CONVERT_TO_MARKDOWN&useDoclingServe=true&doclingServeUrl=" + + doclingServeUrl + "&contentInBody=true"); + } + }; + } + +} diff --git a/dsl/camel-componentdsl/src/generated/java/org/apache/camel/builder/component/dsl/DoclingComponentBuilderFactory.java b/dsl/camel-componentdsl/src/generated/java/org/apache/camel/builder/component/dsl/DoclingComponentBuilderFactory.java index 845f067145b1..0812a96b9ae0 100644 --- a/dsl/camel-componentdsl/src/generated/java/org/apache/camel/builder/component/dsl/DoclingComponentBuilderFactory.java +++ b/dsl/camel-componentdsl/src/generated/java/org/apache/camel/builder/component/dsl/DoclingComponentBuilderFactory.java @@ -85,6 +85,23 @@ public interface DoclingComponentBuilderFactory { } + /** + * Docling-serve API URL (e.g., http://localhost:5001). + * + * The option is a: <code>java.lang.String</code> type. + * + * Default: http://localhost:5001 + * Group: producer + * + * @param doclingServeUrl the value to set + * @return the dsl builder + */ + default DoclingComponentBuilder doclingServeUrl(java.lang.String doclingServeUrl) { + doSetProperty("doclingServeUrl", doclingServeUrl); + return this; + } + + /** * Enable OCR processing for scanned documents. * @@ -196,6 +213,23 @@ public interface DoclingComponentBuilderFactory { } + /** + * Use docling-serve API instead of CLI command. + * + * The option is a: <code>boolean</code> type. + * + * Default: false + * Group: producer + * + * @param useDoclingServe the value to set + * @return the dsl builder + */ + default DoclingComponentBuilder useDoclingServe(boolean useDoclingServe) { + doSetProperty("useDoclingServe", useDoclingServe); + return this; + } + + /** * Whether autowiring is enabled. This is used for automatic autowiring * options (the option must be marked as autowired) by looking up in the @@ -303,12 +337,14 @@ public interface DoclingComponentBuilderFactory { switch (name) { case "configuration": ((DoclingComponent) component).setConfiguration((org.apache.camel.component.docling.DoclingConfiguration) value); return true; case "contentInBody": getOrCreateConfiguration((DoclingComponent) component).setContentInBody((boolean) value); return true; + case "doclingServeUrl": getOrCreateConfiguration((DoclingComponent) component).setDoclingServeUrl((java.lang.String) value); return true; case "enableOCR": getOrCreateConfiguration((DoclingComponent) component).setEnableOCR((boolean) value); return true; case "includeLayoutInfo": getOrCreateConfiguration((DoclingComponent) component).setIncludeLayoutInfo((boolean) value); return true; case "lazyStartProducer": ((DoclingComponent) component).setLazyStartProducer((boolean) value); return true; case "ocrLanguage": getOrCreateConfiguration((DoclingComponent) component).setOcrLanguage((java.lang.String) value); return true; case "operation": getOrCreateConfiguration((DoclingComponent) component).setOperation((org.apache.camel.component.docling.DoclingOperations) value); return true; case "outputFormat": getOrCreateConfiguration((DoclingComponent) component).setOutputFormat((java.lang.String) value); return true; + case "useDoclingServe": getOrCreateConfiguration((DoclingComponent) component).setUseDoclingServe((boolean) value); return true; case "autowiredEnabled": ((DoclingComponent) component).setAutowiredEnabled((boolean) value); return true; case "doclingCommand": getOrCreateConfiguration((DoclingComponent) component).setDoclingCommand((java.lang.String) value); return true; case "processTimeout": getOrCreateConfiguration((DoclingComponent) component).setProcessTimeout((long) value); return true; diff --git a/dsl/camel-endpointdsl/src/generated/java/org/apache/camel/builder/endpoint/dsl/DoclingEndpointBuilderFactory.java b/dsl/camel-endpointdsl/src/generated/java/org/apache/camel/builder/endpoint/dsl/DoclingEndpointBuilderFactory.java index a0c845ef7b57..40e5ab55a262 100644 --- a/dsl/camel-endpointdsl/src/generated/java/org/apache/camel/builder/endpoint/dsl/DoclingEndpointBuilderFactory.java +++ b/dsl/camel-endpointdsl/src/generated/java/org/apache/camel/builder/endpoint/dsl/DoclingEndpointBuilderFactory.java @@ -76,6 +76,21 @@ public interface DoclingEndpointBuilderFactory { doSetProperty("contentInBody", contentInBody); return this; } + /** + * Docling-serve API URL (e.g., http://localhost:5001). + * + * The option is a: <code>java.lang.String</code> type. + * + * Default: http://localhost:5001 + * Group: producer + * + * @param doclingServeUrl the value to set + * @return the dsl builder + */ + default DoclingEndpointBuilder doclingServeUrl(String doclingServeUrl) { + doSetProperty("doclingServeUrl", doclingServeUrl); + return this; + } /** * Enable OCR processing for scanned documents. * @@ -202,6 +217,36 @@ public interface DoclingEndpointBuilderFactory { doSetProperty("outputFormat", outputFormat); return this; } + /** + * Use docling-serve API instead of CLI command. + * + * The option is a: <code>boolean</code> type. + * + * Default: false + * Group: producer + * + * @param useDoclingServe the value to set + * @return the dsl builder + */ + default DoclingEndpointBuilder useDoclingServe(boolean useDoclingServe) { + doSetProperty("useDoclingServe", useDoclingServe); + return this; + } + /** + * Use docling-serve API instead of CLI command. + * + * The option will be converted to a <code>boolean</code> type. + * + * Default: false + * Group: producer + * + * @param useDoclingServe the value to set + * @return the dsl builder + */ + default DoclingEndpointBuilder useDoclingServe(String useDoclingServe) { + doSetProperty("useDoclingServe", useDoclingServe); + return this; + } /** * Maximum file size in bytes for processing. * diff --git a/components/camel-ai/camel-docling/pom.xml b/test-infra/camel-test-infra-docling/pom.xml similarity index 54% copy from components/camel-ai/camel-docling/pom.xml copy to test-infra/camel-test-infra-docling/pom.xml index 90207a0192f6..ce8492e65f50 100644 --- a/components/camel-ai/camel-docling/pom.xml +++ b/test-infra/camel-test-infra-docling/pom.xml @@ -17,48 +17,36 @@ limitations under the License. --> -<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" - xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> - - <modelVersion>4.0.0</modelVersion> - +<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> <parent> - <artifactId>camel-ai-parent</artifactId> + <artifactId>camel-test-infra-parent</artifactId> <groupId>org.apache.camel</groupId> + <relativePath>../camel-test-infra-parent/pom.xml</relativePath> <version>4.15.0-SNAPSHOT</version> </parent> - <artifactId>camel-docling</artifactId> - <packaging>jar</packaging> - <name>Camel :: AI :: Docling</name> - <description>Docling document processing component</description> - - <dependencies> + <modelVersion>4.0.0</modelVersion> - <dependency> - <groupId>org.apache.camel</groupId> - <artifactId>camel-support</artifactId> - </dependency> + <artifactId>camel-test-infra-docling</artifactId> + <name>Camel :: Test Infra :: Docling</name> - <!-- Docling Python integration - will use process execution --> - <dependency> - <groupId>com.fasterxml.jackson.core</groupId> - <artifactId>jackson-databind</artifactId> - <version>${jackson2-version}</version> - </dependency> + <properties> + <assembly.skipAssembly>false</assembly.skipAssembly> + </properties> - <!-- for testing --> + <dependencies> <dependency> <groupId>org.apache.camel</groupId> - <artifactId>camel-test-junit5</artifactId> - <scope>test</scope> + <artifactId>camel-test-infra-common</artifactId> + <version>${project.version}</version> + <type>test-jar</type> </dependency> + <dependency> - <groupId>org.assertj</groupId> - <artifactId>assertj-core</artifactId> - <scope>test</scope> + <groupId>org.testcontainers</groupId> + <artifactId>testcontainers</artifactId> + <version>${testcontainers-version}</version> </dependency> - </dependencies> -</project> \ No newline at end of file +</project> diff --git a/test-infra/camel-test-infra-docling/src/main/java/org/apache/camel/test/infra/docling/common/DoclingProperties.java b/test-infra/camel-test-infra-docling/src/main/java/org/apache/camel/test/infra/docling/common/DoclingProperties.java new file mode 100644 index 000000000000..e96cd1aa4774 --- /dev/null +++ b/test-infra/camel-test-infra-docling/src/main/java/org/apache/camel/test/infra/docling/common/DoclingProperties.java @@ -0,0 +1,27 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.camel.test.infra.docling.common; + +public final class DoclingProperties { + public static final String DOCLING_SERVER_URL = "docling.server.url"; + public static final String DOCLING_CONTAINER = "docling.container"; + + private DoclingProperties() { + + } +} diff --git a/test-infra/camel-test-infra-docling/src/main/java/org/apache/camel/test/infra/docling/services/DoclingInfraService.java b/test-infra/camel-test-infra-docling/src/main/java/org/apache/camel/test/infra/docling/services/DoclingInfraService.java new file mode 100644 index 000000000000..da41eb071872 --- /dev/null +++ b/test-infra/camel-test-infra-docling/src/main/java/org/apache/camel/test/infra/docling/services/DoclingInfraService.java @@ -0,0 +1,27 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.camel.test.infra.docling.services; + +import org.apache.camel.test.infra.common.services.InfrastructureService; + +/** + * Test infra service for Docling + */ +public interface DoclingInfraService extends InfrastructureService { + + String getDoclingServerUrl(); +} diff --git a/test-infra/camel-test-infra-docling/src/main/java/org/apache/camel/test/infra/docling/services/DoclingLocalContainerInfraService.java b/test-infra/camel-test-infra-docling/src/main/java/org/apache/camel/test/infra/docling/services/DoclingLocalContainerInfraService.java new file mode 100644 index 000000000000..23470b8a8bc3 --- /dev/null +++ b/test-infra/camel-test-infra-docling/src/main/java/org/apache/camel/test/infra/docling/services/DoclingLocalContainerInfraService.java @@ -0,0 +1,110 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.camel.test.infra.docling.services; + +import java.time.Duration; + +import org.apache.camel.spi.annotations.InfraService; +import org.apache.camel.test.infra.common.LocalPropertyResolver; +import org.apache.camel.test.infra.common.services.ContainerEnvironmentUtil; +import org.apache.camel.test.infra.common.services.ContainerService; +import org.apache.camel.test.infra.docling.common.DoclingProperties; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.testcontainers.containers.GenericContainer; +import org.testcontainers.containers.wait.strategy.Wait; +import org.testcontainers.utility.DockerImageName; + +@InfraService(service = DoclingInfraService.class, + description = "Document processing and conversion service", + serviceAlias = { "docling" }) +public class DoclingLocalContainerInfraService implements DoclingInfraService, ContainerService<GenericContainer<?>> { + + private static final Logger LOG = LoggerFactory.getLogger(DoclingLocalContainerInfraService.class); + + private static final String DEFAULT_DOCLING_CONTAINER = "quay.io/docling-project/docling-serve:latest"; + private static final int DOCLING_PORT = 5001; + + private final GenericContainer<?> container; + + public DoclingLocalContainerInfraService() { + this(LocalPropertyResolver.getProperty(DoclingLocalContainerInfraService.class, + DoclingProperties.DOCLING_CONTAINER)); + } + + public DoclingLocalContainerInfraService(String imageName) { + container = initContainer(imageName); + String name = ContainerEnvironmentUtil.containerName(this.getClass()); + if (name != null) { + container.withCreateContainerCmdModifier(cmd -> cmd.withName(name)); + } + } + + public DoclingLocalContainerInfraService(GenericContainer<?> container) { + this.container = container; + } + + protected GenericContainer<?> initContainer(String imageName) { + String doclingImage = imageName != null ? imageName : DEFAULT_DOCLING_CONTAINER; + + class TestInfraDoclingContainer extends GenericContainer<TestInfraDoclingContainer> { + public TestInfraDoclingContainer(boolean fixedPort) { + super(DockerImageName.parse(doclingImage)); + + withExposedPorts(DOCLING_PORT) + .waitingFor(Wait.forListeningPorts(DOCLING_PORT)) + .withStartupTimeout(Duration.ofMinutes(3L)); + + if (fixedPort) { + addFixedExposedPort(DOCLING_PORT, DOCLING_PORT); + } + } + } + + return new TestInfraDoclingContainer(ContainerEnvironmentUtil.isFixedPort(this.getClass())); + } + + @Override + public void registerProperties() { + System.setProperty(DoclingProperties.DOCLING_SERVER_URL, getDoclingServerUrl()); + } + + @Override + public void initialize() { + LOG.info("Trying to start the Docling container"); + container.start(); + + registerProperties(); + LOG.info("Docling instance running at {}", getDoclingServerUrl()); + } + + @Override + public void shutdown() { + LOG.info("Stopping the Docling container"); + container.stop(); + } + + @Override + public GenericContainer<?> getContainer() { + return container; + } + + @Override + public String getDoclingServerUrl() { + return String.format("http://%s:%d", container.getHost(), container.getMappedPort(DOCLING_PORT)); + } +} diff --git a/test-infra/camel-test-infra-docling/src/main/java/org/apache/camel/test/infra/docling/services/DoclingRemoteInfraService.java b/test-infra/camel-test-infra-docling/src/main/java/org/apache/camel/test/infra/docling/services/DoclingRemoteInfraService.java new file mode 100644 index 000000000000..e79cdb521323 --- /dev/null +++ b/test-infra/camel-test-infra-docling/src/main/java/org/apache/camel/test/infra/docling/services/DoclingRemoteInfraService.java @@ -0,0 +1,60 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.camel.test.infra.docling.services; + +import org.apache.camel.test.infra.docling.common.DoclingProperties; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Remote Docling infrastructure service for testing with external Docling instances + */ +public class DoclingRemoteInfraService implements DoclingInfraService { + + private static final Logger LOG = LoggerFactory.getLogger(DoclingRemoteInfraService.class); + + private final String doclingServerUrl; + + public DoclingRemoteInfraService() { + this(System.getProperty(DoclingProperties.DOCLING_SERVER_URL)); + } + + public DoclingRemoteInfraService(String doclingServerUrl) { + this.doclingServerUrl = doclingServerUrl; + } + + @Override + public void registerProperties() { + System.setProperty(DoclingProperties.DOCLING_SERVER_URL, getDoclingServerUrl()); + } + + @Override + public void initialize() { + LOG.info("Using remote Docling instance at {}", getDoclingServerUrl()); + registerProperties(); + } + + @Override + public void shutdown() { + LOG.info("Remote Docling service shutdown (no-op)"); + } + + @Override + public String getDoclingServerUrl() { + return doclingServerUrl; + } +} diff --git a/test-infra/camel-test-infra-docling/src/main/resources/org/apache/camel/test/infra/docling/services/container.properties b/test-infra/camel-test-infra-docling/src/main/resources/org/apache/camel/test/infra/docling/services/container.properties new file mode 100644 index 000000000000..31df40ee1894 --- /dev/null +++ b/test-infra/camel-test-infra-docling/src/main/resources/org/apache/camel/test/infra/docling/services/container.properties @@ -0,0 +1,18 @@ +## --------------------------------------------------------------------------- +## Licensed to the Apache Software Foundation (ASF) under one or more +## contributor license agreements. See the NOTICE file distributed with +## this work for additional information regarding copyright ownership. +## The ASF licenses this file to You under the Apache License, Version 2.0 +## (the "License"); you may not use this file except in compliance with +## the License. You may obtain a copy of the License at +## +## http://www.apache.org/licenses/LICENSE-2.0 +## +## Unless required by applicable law or agreed to in writing, software +## distributed under the License is distributed on an "AS IS" BASIS, +## WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +## See the License for the specific language governing permissions and +## limitations under the License. +## --------------------------------------------------------------------------- +## Docling container configuration for test-infra +docling.container=quay.io/docling-project/docling-serve:v1.6.0 diff --git a/test-infra/camel-test-infra-docling/src/test/java/org/apache/camel/test/infra/docling/DoclingInfraServiceTest.java b/test-infra/camel-test-infra-docling/src/test/java/org/apache/camel/test/infra/docling/DoclingInfraServiceTest.java new file mode 100644 index 000000000000..bbf970808337 --- /dev/null +++ b/test-infra/camel-test-infra-docling/src/test/java/org/apache/camel/test/infra/docling/DoclingInfraServiceTest.java @@ -0,0 +1,46 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.camel.test.infra.docling; + +import org.apache.camel.test.infra.docling.services.DoclingInfraService; +import org.apache.camel.test.infra.docling.services.DoclingRemoteInfraService; +import org.junit.jupiter.api.Test; + +import static org.junit.jupiter.api.Assertions.*; + +public class DoclingInfraServiceTest { + + @Test + public void testRemoteServiceConfiguration() { + DoclingInfraService service = new DoclingRemoteInfraService("http://localhost:5001"); + + assertEquals("http://localhost:5001", service.getDoclingServerUrl()); + } + + @Test + public void testRemoteServiceWithSystemProperties() { + System.setProperty("docling.server.url", "http://test:5001"); + + try { + DoclingInfraService service = new DoclingRemoteInfraService(); + + assertEquals("http://test:5001", service.getDoclingServerUrl()); + } finally { + System.clearProperty("docling.server.url"); + } + } +} diff --git a/test-infra/camel-test-infra-docling/src/test/java/org/apache/camel/test/infra/docling/services/DoclingService.java b/test-infra/camel-test-infra-docling/src/test/java/org/apache/camel/test/infra/docling/services/DoclingService.java new file mode 100644 index 000000000000..134db8ce8fa9 --- /dev/null +++ b/test-infra/camel-test-infra-docling/src/test/java/org/apache/camel/test/infra/docling/services/DoclingService.java @@ -0,0 +1,26 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.camel.test.infra.docling.services; + +import org.apache.camel.test.infra.common.services.ContainerTestService; +import org.apache.camel.test.infra.common.services.TestService; + +/** + * Test infra service for Docling + */ +public interface DoclingService extends TestService, DoclingInfraService, ContainerTestService { +} diff --git a/test-infra/camel-test-infra-docling/src/test/java/org/apache/camel/test/infra/docling/services/DoclingServiceFactory.java b/test-infra/camel-test-infra-docling/src/test/java/org/apache/camel/test/infra/docling/services/DoclingServiceFactory.java new file mode 100644 index 000000000000..6e38bdc21760 --- /dev/null +++ b/test-infra/camel-test-infra-docling/src/test/java/org/apache/camel/test/infra/docling/services/DoclingServiceFactory.java @@ -0,0 +1,39 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.camel.test.infra.docling.services; + +import org.apache.camel.test.infra.common.services.SimpleTestServiceBuilder; + +public final class DoclingServiceFactory { + private DoclingServiceFactory() { + + } + + public static SimpleTestServiceBuilder<DoclingService> builder() { + return new SimpleTestServiceBuilder<>("docling"); + } + + public static DoclingService createService() { + return builder() + .addLocalMapping(DoclingLocalContainerService::new) + .build(); + } + + public static class DoclingLocalContainerService extends DoclingLocalContainerInfraService + implements DoclingService { + } +}
