This is an automated email from the ASF dual-hosted git repository.

apupier pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/camel.git


The following commit(s) were added to refs/heads/main by this push:
     new a79dc22489dd Add basic tests for docling extract_structured_data
a79dc22489dd is described below

commit a79dc22489dd16471474b0ee0c503c8ec2878fc9
Author: AurĂ©lien Pupier <[email protected]>
AuthorDate: Fri Jan 23 16:33:45 2026 +0100

    Add basic tests for docling extract_structured_data
    
    Signed-off-by: AurĂ©lien Pupier <[email protected]>
---
 .../{ => integration}/BatchProcessingIT.java       |  20 ++--
 .../docling/integration/DoclingITestSupport.java   |  50 +++++++++
 .../integration/DoclingServeProducerIT.java        |  30 +-----
 .../integration/ExtractStructuredDataIT.java       | 118 +++++++++++++++++++++
 .../docling/integration/MetadataExtractionIT.java  |  30 +-----
 .../src/test/resources/picture_classification.pdf  | Bin 0 -> 212855 bytes
 6 files changed, 177 insertions(+), 71 deletions(-)

diff --git 
a/components/camel-ai/camel-docling/src/test/java/org/apache/camel/component/docling/BatchProcessingIT.java
 
b/components/camel-ai/camel-docling/src/test/java/org/apache/camel/component/docling/integration/BatchProcessingIT.java
similarity index 96%
rename from 
components/camel-ai/camel-docling/src/test/java/org/apache/camel/component/docling/BatchProcessingIT.java
rename to 
components/camel-ai/camel-docling/src/test/java/org/apache/camel/component/docling/integration/BatchProcessingIT.java
index f8d05dfb662a..7a8bec034b94 100644
--- 
a/components/camel-ai/camel-docling/src/test/java/org/apache/camel/component/docling/BatchProcessingIT.java
+++ 
b/components/camel-ai/camel-docling/src/test/java/org/apache/camel/component/docling/integration/BatchProcessingIT.java
@@ -14,7 +14,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-package org.apache.camel.component.docling;
+package org.apache.camel.component.docling.integration;
 
 import java.nio.file.Files;
 import java.nio.file.Path;
@@ -23,13 +23,12 @@ import java.util.List;
 
 import org.apache.camel.CamelContext;
 import org.apache.camel.builder.RouteBuilder;
-import org.apache.camel.test.infra.docling.services.DoclingService;
-import org.apache.camel.test.infra.docling.services.DoclingServiceFactory;
-import org.apache.camel.test.junit5.CamelTestSupport;
+import org.apache.camel.component.docling.BatchConversionResult;
+import org.apache.camel.component.docling.BatchProcessingResults;
+import org.apache.camel.component.docling.DoclingComponent;
+import org.apache.camel.component.docling.DoclingConfiguration;
+import org.apache.camel.component.docling.DoclingHeaders;
 import org.junit.jupiter.api.Test;
-import org.junit.jupiter.api.extension.RegisterExtension;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
 
 import static org.junit.jupiter.api.Assertions.assertEquals;
 import static org.junit.jupiter.api.Assertions.assertNotNull;
@@ -39,12 +38,7 @@ import static org.junit.jupiter.api.Assertions.fail;
 /**
  * Integration test for batch processing operations using test-infra for 
container management.
  */
-public class BatchProcessingIT extends CamelTestSupport {
-
-    private static final Logger LOG = 
LoggerFactory.getLogger(BatchProcessingIT.class);
-
-    @RegisterExtension
-    static DoclingService doclingService = 
DoclingServiceFactory.createService();
+class BatchProcessingIT extends DoclingITestSupport {
 
     @Test
     public void testBatchConvertToMarkdown() throws Exception {
diff --git 
a/components/camel-ai/camel-docling/src/test/java/org/apache/camel/component/docling/integration/DoclingITestSupport.java
 
b/components/camel-ai/camel-docling/src/test/java/org/apache/camel/component/docling/integration/DoclingITestSupport.java
new file mode 100644
index 000000000000..458f0bcaab8e
--- /dev/null
+++ 
b/components/camel-ai/camel-docling/src/test/java/org/apache/camel/component/docling/integration/DoclingITestSupport.java
@@ -0,0 +1,50 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.camel.component.docling.integration;
+
+import org.apache.camel.CamelContext;
+import org.apache.camel.component.docling.DoclingComponent;
+import org.apache.camel.component.docling.DoclingConfiguration;
+import org.apache.camel.test.infra.docling.services.DoclingService;
+import org.apache.camel.test.infra.docling.services.DoclingServiceFactory;
+import org.apache.camel.test.junit5.CamelTestSupport;
+import org.junit.jupiter.api.extension.RegisterExtension;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+public abstract class DoclingITestSupport extends CamelTestSupport {
+
+    protected static final Logger LOG = 
LoggerFactory.getLogger(DoclingITestSupport.class);
+
+    @RegisterExtension
+    static DoclingService doclingService = 
DoclingServiceFactory.createService();
+
+    @Override
+    protected CamelContext createCamelContext() throws Exception {
+        CamelContext context = super.createCamelContext();
+        DoclingComponent docling = context.getComponent("docling", 
DoclingComponent.class);
+        DoclingConfiguration conf = new DoclingConfiguration();
+        conf.setUseDoclingServe(true);
+        conf.setDoclingServeUrl(doclingService.doclingServerUrl());
+        docling.setConfiguration(conf);
+
+        LOG.info("Testing Docling-Serve at: {}", 
doclingService.doclingServerUrl());
+
+        return context;
+    }
+
+}
diff --git 
a/components/camel-ai/camel-docling/src/test/java/org/apache/camel/component/docling/integration/DoclingServeProducerIT.java
 
b/components/camel-ai/camel-docling/src/test/java/org/apache/camel/component/docling/integration/DoclingServeProducerIT.java
index f48e9fa1b0c7..859eb0b02f6f 100644
--- 
a/components/camel-ai/camel-docling/src/test/java/org/apache/camel/component/docling/integration/DoclingServeProducerIT.java
+++ 
b/components/camel-ai/camel-docling/src/test/java/org/apache/camel/component/docling/integration/DoclingServeProducerIT.java
@@ -20,22 +20,13 @@ import java.io.File;
 import java.nio.file.Files;
 import java.nio.file.Path;
 
-import org.apache.camel.CamelContext;
 import org.apache.camel.builder.RouteBuilder;
 import org.apache.camel.component.docling.ConversionStatus;
-import org.apache.camel.component.docling.DoclingComponent;
-import org.apache.camel.component.docling.DoclingConfiguration;
 import org.apache.camel.component.docling.DoclingHeaders;
 import org.apache.camel.component.docling.DoclingOperations;
-import org.apache.camel.test.infra.docling.services.DoclingService;
-import org.apache.camel.test.infra.docling.services.DoclingServiceFactory;
-import org.apache.camel.test.junit5.CamelTestSupport;
 import org.junit.jupiter.api.Test;
 import org.junit.jupiter.api.condition.DisabledIfSystemProperty;
-import org.junit.jupiter.api.extension.RegisterExtension;
 import org.junit.jupiter.api.io.TempDir;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
 
 import static org.junit.jupiter.api.Assertions.assertNotNull;
 import static org.junit.jupiter.api.Assertions.assertTrue;
@@ -48,30 +39,11 @@ import static org.junit.jupiter.api.Assertions.fail;
  * container for testing without manual setup.
  */
 @DisabledIfSystemProperty(named = "ci.env.name", matches = ".*", 
disabledReason = "Too much resources on GitHub Actions")
-public class DoclingServeProducerIT extends CamelTestSupport {
-
-    private static final Logger LOG = 
LoggerFactory.getLogger(DoclingServeProducerIT.class);
-
-    @RegisterExtension
-    static DoclingService doclingService = 
DoclingServiceFactory.createService();
+class DoclingServeProducerIT extends DoclingITestSupport {
 
     @TempDir
     Path outputDir;
 
-    @Override
-    protected CamelContext createCamelContext() throws Exception {
-        CamelContext context = super.createCamelContext();
-        DoclingComponent docling = context.getComponent("docling", 
DoclingComponent.class);
-        DoclingConfiguration conf = new DoclingConfiguration();
-        conf.setUseDoclingServe(true);
-        conf.setDoclingServeUrl(doclingService.doclingServerUrl());
-        docling.setConfiguration(conf);
-
-        LOG.info("Testing Docling-Serve at: {}", 
doclingService.doclingServerUrl());
-
-        return context;
-    }
-
     @Test
     public void testMarkdownConversionWithDoclingServe() throws Exception {
         Path testFile = createTestFile();
diff --git 
a/components/camel-ai/camel-docling/src/test/java/org/apache/camel/component/docling/integration/ExtractStructuredDataIT.java
 
b/components/camel-ai/camel-docling/src/test/java/org/apache/camel/component/docling/integration/ExtractStructuredDataIT.java
new file mode 100644
index 000000000000..703fdcb27778
--- /dev/null
+++ 
b/components/camel-ai/camel-docling/src/test/java/org/apache/camel/component/docling/integration/ExtractStructuredDataIT.java
@@ -0,0 +1,118 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.camel.component.docling.integration;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.StandardCopyOption;
+import java.util.List;
+
+import ai.docling.core.DoclingDocument;
+import ai.docling.core.DoclingDocument.PictureItem;
+import ai.docling.core.DoclingDocument.TableData;
+import ai.docling.core.DoclingDocument.TableItem;
+import com.fasterxml.jackson.databind.ObjectMapper;
+import org.apache.camel.builder.RouteBuilder;
+import org.apache.camel.component.docling.DoclingHeaders;
+import org.apache.camel.component.docling.DoclingOperations;
+import org.junit.jupiter.api.Test;
+import org.junit.jupiter.api.condition.DisabledIfSystemProperty;
+
+import static org.assertj.core.api.Assertions.assertThat;
+
+@DisabledIfSystemProperty(named = "ci.env.name", matches = ".*", 
disabledReason = "Too much resources on GitHub Actions")
+class ExtractStructuredDataIT extends DoclingITestSupport {
+
+    @Test
+    void extractTableFromMarkdown() throws Exception {
+        Path testFile = createTestFile();
+
+        String result = 
template.requestBodyAndHeader("direct:extract-structured-data",
+                testFile.toString(),
+                DoclingHeaders.OPERATION, 
DoclingOperations.EXTRACT_STRUCTURED_DATA, String.class);
+        ObjectMapper mapper = new ObjectMapper();
+        DoclingDocument doclingDocument = mapper.readValue(result, 
DoclingDocument.class);
+
+        List<TableItem> tables = doclingDocument.getTables();
+        assertThat(tables).hasSize(1);
+        TableData table = tables.get(0).getData();
+        assertThat(table.getNumCols()).isEqualTo(3);
+        assertThat(table.getNumRows()).isEqualTo(4);
+        assertThat(table.getGrid().get(1).get(2).getText()).isEqualTo("C1");
+    }
+
+    @Test
+    void extractImageFromPDF() throws Exception {
+        Path testFile = createTestPdfFile();
+
+        String result = 
template.requestBodyAndHeader("direct:extract-structured-data",
+                testFile.toString(),
+                DoclingHeaders.OPERATION, 
DoclingOperations.EXTRACT_STRUCTURED_DATA, String.class);
+        ObjectMapper mapper = new ObjectMapper();
+        DoclingDocument doclingDocument = mapper.readValue(result, 
DoclingDocument.class);
+
+        List<PictureItem> pictures = doclingDocument.getPictures();
+        assertThat(pictures).hasSize(2);
+    }
+
+    private Path createTestFile() throws Exception {
+        Path tempFile = 
Files.createTempFile("docling-extract-structureddata-test-", ".md");
+        String content = """
+                # Test Document
+
+                This is a test document for structured data
+
+                ## Section 1
+
+                Some content here.
+
+                - List item 1
+                - List item 2
+
+                ## Section 2
+
+                |  A |  B |  C |
+                |---|---|---|
+                | A1  | B1  | C1  |
+                |   A2|  B2 | C2  |
+                |  A3 |  B3 | C3  |
+                """;
+        Files.write(tempFile, content.getBytes());
+        return tempFile;
+    }
+
+    private Path createTestPdfFile() throws IOException {
+        try (InputStream is = 
getClass().getClassLoader().getResourceAsStream("picture_classification.pdf")) {
+            java.nio.file.Path tempFile = 
Files.createTempFile("docling-test-picture_classification", ".pdf");
+            Files.copy(is, tempFile.toAbsolutePath(), 
StandardCopyOption.REPLACE_EXISTING);
+            return tempFile;
+        }
+    }
+
+    @Override
+    protected RouteBuilder createRouteBuilder() throws Exception {
+        return new RouteBuilder() {
+            @Override
+            public void configure() throws Exception {
+                from("direct:extract-structured-data")
+                        
.to("docling:convert?operation=EXTRACT_STRUCTURED_DATA&contentInBody=true");
+            }
+        };
+    }
+}
diff --git 
a/components/camel-ai/camel-docling/src/test/java/org/apache/camel/component/docling/integration/MetadataExtractionIT.java
 
b/components/camel-ai/camel-docling/src/test/java/org/apache/camel/component/docling/integration/MetadataExtractionIT.java
index 0e9003801d99..5b051a62a91e 100644
--- 
a/components/camel-ai/camel-docling/src/test/java/org/apache/camel/component/docling/integration/MetadataExtractionIT.java
+++ 
b/components/camel-ai/camel-docling/src/test/java/org/apache/camel/component/docling/integration/MetadataExtractionIT.java
@@ -23,19 +23,10 @@ import java.nio.file.Path;
 import java.nio.file.StandardCopyOption;
 import java.util.Map;
 
-import org.apache.camel.CamelContext;
 import org.apache.camel.builder.RouteBuilder;
-import org.apache.camel.component.docling.DoclingComponent;
-import org.apache.camel.component.docling.DoclingConfiguration;
 import org.apache.camel.component.docling.DocumentMetadata;
-import org.apache.camel.test.infra.docling.services.DoclingService;
-import org.apache.camel.test.infra.docling.services.DoclingServiceFactory;
-import org.apache.camel.test.junit5.CamelTestSupport;
 import org.junit.jupiter.api.Test;
 import org.junit.jupiter.api.condition.DisabledIfSystemProperty;
-import org.junit.jupiter.api.extension.RegisterExtension;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
 
 import static org.assertj.core.api.Assertions.assertThat;
 import static org.junit.jupiter.api.Assertions.assertFalse;
@@ -46,26 +37,7 @@ import static org.junit.jupiter.api.Assertions.assertTrue;
  * Integration test for metadata extraction operations using test-infra for 
container management.
  */
 @DisabledIfSystemProperty(named = "ci.env.name", matches = ".*", 
disabledReason = "Too much resources on GitHub Actions")
-public class MetadataExtractionIT extends CamelTestSupport {
-
-    private static final Logger LOG = 
LoggerFactory.getLogger(MetadataExtractionIT.class);
-
-    @RegisterExtension
-    static DoclingService doclingService = 
DoclingServiceFactory.createService();
-
-    @Override
-    protected CamelContext createCamelContext() throws Exception {
-        CamelContext context = super.createCamelContext();
-        DoclingComponent docling = context.getComponent("docling", 
DoclingComponent.class);
-        DoclingConfiguration conf = new DoclingConfiguration();
-        conf.setUseDoclingServe(true);
-        conf.setDoclingServeUrl(doclingService.doclingServerUrl());
-        docling.setConfiguration(conf);
-
-        LOG.info("Testing Docling-Serve metadata extraction at: {}", 
doclingService.doclingServerUrl());
-
-        return context;
-    }
+public class MetadataExtractionIT extends DoclingITestSupport {
 
     @Test
     public void testBasicMetadataExtraction() throws Exception {
diff --git 
a/components/camel-ai/camel-docling/src/test/resources/picture_classification.pdf
 
b/components/camel-ai/camel-docling/src/test/resources/picture_classification.pdf
new file mode 100644
index 000000000000..230f74fd41a8
Binary files /dev/null and 
b/components/camel-ai/camel-docling/src/test/resources/picture_classification.pdf
 differ

Reply via email to