This is an automated email from the ASF dual-hosted git repository.

acosentino pushed a commit to branch CAMEL-23212
in repository https://gitbox.apache.org/repos/asf/camel.git

commit 62217e291915c96049774104c32a19d6c1e3d759
Author: Andrea Cosentino <[email protected]>
AuthorDate: Wed Mar 18 12:14:52 2026 +0100

    CAMEL-23212: Camel-Docling: Harden CLI argument injection validation in 
camel-docling
    
    Switch from blocklist to allowlist approach for custom CLI argument
    validation in DoclingProducer. Define a set of ~55 recognized docling
    CLI flags organized by category (format, pipeline, OCR, tables, PDF,
    enrichment, output, advanced, debug, performance, info). Any flag not
    in the allowlist is rejected with a clear error message.
    
    Add defense-in-depth rejection of shell metacharacters (;, |, `, $())
    in all custom argument values, even though ProcessBuilder uses
    list-based invocation and does not interpret them.
    
    Enhance path traversal detection by normalizing path-like values via
    Path.normalize() to catch traversal sequences that bypass the literal
    ../ check (e.g., /safe/subdir/../../etc/passwd).
    
    Add 8 new unit tests covering semicolon, pipe, backtick, and $()
    injection attempts, unknown long/short flag rejection, -vv verbosity
    acceptance, and normalized path traversal detection.
    
    Document all allowed custom arguments in docling-component.adoc.
    
    Signed-off-by: Andrea Cosentino <[email protected]>
---
 .../src/main/docs/docling-component.adoc           |  53 ++++++++
 .../camel/component/docling/DoclingProducer.java   | 151 ++++++++++++++++++---
 .../docling/DoclingCustomArgsValidationTest.java   | 122 ++++++++++++++++-
 3 files changed, 307 insertions(+), 19 deletions(-)

diff --git 
a/components/camel-ai/camel-docling/src/main/docs/docling-component.adoc 
b/components/camel-ai/camel-docling/src/main/docs/docling-component.adoc
index 3459720929be..68595ddcdac4 100644
--- a/components/camel-ai/camel-docling/src/main/docs/docling-component.adoc
+++ b/components/camel-ai/camel-docling/src/main/docs/docling-component.adoc
@@ -381,6 +381,59 @@ YAML::
 ----
 ====
 
+=== Custom argument validation
+
+When passing custom CLI arguments via the `CamelDoclingCustomArguments` 
header, the component enforces an **allowlist** of recognized docling CLI flags.
+Only the following flags are permitted:
+
+[width="100%",cols="2,4",options="header"]
+|===
+| Category | Allowed flags
+
+| Input/output format
+| `--from`, `--to`
+
+| Pipeline
+| `--pipeline`, `--vlm-model`, `--asr-model`
+
+| OCR
+| `--ocr`, `--no-ocr`, `--force-ocr`, `--no-force-ocr`, `--ocr-engine`, 
`--ocr-lang`, `--psm`
+
+| Tables
+| `--tables`, `--no-tables`, `--table-mode`
+
+| PDF
+| `--pdf-backend`, `--pdf-password`
+
+| Enrichment
+| `--enrich-code`, `--no-enrich-code`, `--enrich-formula`, 
`--no-enrich-formula`, `--enrich-picture-classes`, 
`--no-enrich-picture-classes`, `--enrich-picture-description`, 
`--no-enrich-picture-description`, `--enrich-chart-extraction`, 
`--no-enrich-chart-extraction`
+
+| Output formatting
+| `--image-export-mode`, `--show-layout`, `--no-show-layout`
+
+| Advanced
+| `--headers`, `--artifacts-path`, `--enable-remote-services`, 
`--no-enable-remote-services`, `--allow-external-plugins`, 
`--no-allow-external-plugins`, `--show-external-plugins`, 
`--no-show-external-plugins`, `--document-timeout`, `--device`, 
`--num-threads`, `--page-batch-size`
+
+| Debug
+| `--verbose`, `-v` / `-vv` / `-vvv`, `--debug-visualize-cells`, 
`--no-debug-visualize-cells`, `--debug-visualize-ocr`, 
`--no-debug-visualize-ocr`, `--debug-visualize-layout`, 
`--no-debug-visualize-layout`, `--debug-visualize-tables`, 
`--no-debug-visualize-tables`
+
+| Performance
+| `--abort-on-error`, `--no-abort-on-error`, `--profiling`, `--no-profiling`, 
`--save-profiling`, `--no-save-profiling`
+
+| Info
+| `--version`, `--help`, `--logo`
+
+|===
+
+The `--output` (`-o`) flag is **not permitted** because the output directory 
is managed by the producer.
+Use the `CamelDoclingOutputFilePath` header or endpoint configuration instead.
+
+Additionally, the following are rejected:
+
+- **Shell metacharacters**: `;`, `|`, `` ` ``, `$()` — blocked as 
defense-in-depth even though ProcessBuilder does not interpret them.
+- **Path traversal**: `../`, `..\`, and paths that resolve to traversal after 
normalization.
+- **Unknown flags**: any flag not in the allowlist above.
+
 === Extracting document metadata
 
 [tabs]
diff --git 
a/components/camel-ai/camel-docling/src/main/java/org/apache/camel/component/docling/DoclingProducer.java
 
b/components/camel-ai/camel-docling/src/main/java/org/apache/camel/component/docling/DoclingProducer.java
index 17bfc05e851b..437fa8f540cb 100644
--- 
a/components/camel-ai/camel-docling/src/main/java/org/apache/camel/component/docling/DoclingProducer.java
+++ 
b/components/camel-ai/camel-docling/src/main/java/org/apache/camel/component/docling/DoclingProducer.java
@@ -30,6 +30,7 @@ import java.util.Base64;
 import java.util.Collection;
 import java.util.List;
 import java.util.Map;
+import java.util.Set;
 import java.util.concurrent.CompletableFuture;
 import java.util.concurrent.CompletionStage;
 import java.util.concurrent.ConcurrentHashMap;
@@ -86,6 +87,56 @@ public class DoclingProducer extends DefaultProducer {
 
     private static final Logger LOG = 
LoggerFactory.getLogger(DoclingProducer.class);
 
+    /**
+     * Recognized docling CLI flags. Only these flags are permitted in custom 
arguments (allowlist approach). Flags
+     * managed by the producer ({@code --output}, {@code -o}) are excluded and 
checked separately.
+     */
+    private static final Set<String> ALLOWED_DOCLING_FLAGS = Set.of(
+            // Input/output format
+            "--from", "--to",
+            // Pipeline
+            "--pipeline", "--vlm-model", "--asr-model",
+            // OCR
+            "--ocr", "--no-ocr", "--force-ocr", "--no-force-ocr",
+            "--ocr-engine", "--ocr-lang", "--psm",
+            // Tables
+            "--tables", "--no-tables", "--table-mode",
+            // PDF
+            "--pdf-backend", "--pdf-password",
+            // Enrichment
+            "--enrich-code", "--no-enrich-code",
+            "--enrich-formula", "--no-enrich-formula",
+            "--enrich-picture-classes", "--no-enrich-picture-classes",
+            "--enrich-picture-description", "--no-enrich-picture-description",
+            "--enrich-chart-extraction", "--no-enrich-chart-extraction",
+            // Output formatting
+            "--image-export-mode",
+            "--show-layout", "--no-show-layout",
+            // Advanced
+            "--headers", "--artifacts-path",
+            "--enable-remote-services", "--no-enable-remote-services",
+            "--allow-external-plugins", "--no-allow-external-plugins",
+            "--show-external-plugins", "--no-show-external-plugins",
+            "--document-timeout", "--device", "--num-threads", 
"--page-batch-size",
+            // Debug
+            "--verbose",
+            "--debug-visualize-cells", "--no-debug-visualize-cells",
+            "--debug-visualize-ocr", "--no-debug-visualize-ocr",
+            "--debug-visualize-layout", "--no-debug-visualize-layout",
+            "--debug-visualize-tables", "--no-debug-visualize-tables",
+            // Performance / error handling
+            "--abort-on-error", "--no-abort-on-error",
+            "--profiling", "--no-profiling",
+            "--save-profiling", "--no-save-profiling",
+            // Info
+            "--version", "--help", "--logo");
+
+    /**
+     * Flags managed by the producer that must not be overridden through 
custom arguments. The output directory is
+     * controlled by the producer via endpoint configuration or the {@link 
DoclingHeaders#OUTPUT_FILE_PATH} header.
+     */
+    private static final Set<String> PRODUCER_MANAGED_FLAGS = 
Set.of("--output", "-o");
+
     private DoclingConfiguration configuration;
     private DoclingServeApi doclingServeApi;
     private ObjectMapper objectMapper;
@@ -1876,14 +1927,10 @@ public class DoclingProducer extends DefaultProducer {
     }
 
     /**
-     * Validates custom CLI arguments to ensure they do not conflict with 
producer-managed options such as the output
-     * directory.
+     * Validates custom CLI arguments using an allowlist approach. Only 
recognized docling CLI flags are permitted.
+     * Producer-managed flags, shell metacharacters, and path traversal 
sequences are rejected.
      */
     private void validateCustomArguments(List<String> customArgs) {
-        // The output directory is managed by the producer via endpoint 
configuration
-        // or the OUTPUT_FILE_PATH header, so it must not be overridden 
through custom arguments.
-        List<String> blockedFlags = List.of("--output", "-o");
-
         for (int i = 0; i < customArgs.size(); i++) {
             String arg = customArgs.get(i);
 
@@ -1891,20 +1938,88 @@ public class DoclingProducer extends DefaultProducer {
                 throw new IllegalArgumentException("Custom argument at index " 
+ i + " is null");
             }
 
-            String argLower = arg.toLowerCase();
-            for (String blocked : blockedFlags) {
-                if (argLower.equals(blocked) || argLower.startsWith(blocked + 
"=")) {
-                    throw new IllegalArgumentException(
-                            "Custom argument '" + blocked
-                                                       + "' is not allowed 
because the output directory is managed by the producer. "
-                                                       + "Use the " + 
DoclingHeaders.OUTPUT_FILE_PATH
-                                                       + " header or endpoint 
configuration instead.");
-                }
+            rejectShellMetacharacters(arg, i);
+
+            if (arg.startsWith("--")) {
+                validateLongFlag(arg, i);
+            } else if (arg.startsWith("-")) {
+                validateShortFlag(arg, i);
+            } else {
+                validatePathSafety(arg, i);
             }
+        }
+    }
 
-            if (arg.contains("../") || arg.contains("..\\")) {
-                throw new IllegalArgumentException(
-                        "Custom argument at index " + i + " contains a 
relative path traversal sequence");
+    private void validateLongFlag(String arg, int index) {
+        String flag = arg.contains("=") ? arg.substring(0, arg.indexOf('=')) : 
arg;
+        String flagLower = flag.toLowerCase();
+
+        if (PRODUCER_MANAGED_FLAGS.contains(flagLower)) {
+            throw new IllegalArgumentException(
+                    "Custom argument '" + flag
+                                               + "' is not allowed because the 
output directory is managed by the producer. "
+                                               + "Use the " + 
DoclingHeaders.OUTPUT_FILE_PATH
+                                               + " header or endpoint 
configuration instead.");
+        }
+
+        if (!ALLOWED_DOCLING_FLAGS.contains(flagLower)) {
+            throw new IllegalArgumentException(
+                    "Custom argument '" + flag
+                                               + "' is not a recognized 
docling CLI flag. "
+                                               + "Only known docling flags are 
permitted as custom arguments.");
+        }
+
+        if (arg.contains("=")) {
+            String value = arg.substring(arg.indexOf('=') + 1);
+            validatePathSafety(value, index);
+        }
+    }
+
+    private void validateShortFlag(String arg, int index) {
+        String flagLower = arg.toLowerCase();
+
+        // Allow -v, -vv, -vvv (verbosity levels)
+        if (flagLower.matches("-v+")) {
+            return;
+        }
+
+        if (PRODUCER_MANAGED_FLAGS.contains(flagLower)) {
+            throw new IllegalArgumentException(
+                    "Custom argument '" + arg
+                                               + "' is not allowed because the 
output directory is managed by the producer. "
+                                               + "Use the " + 
DoclingHeaders.OUTPUT_FILE_PATH
+                                               + " header or endpoint 
configuration instead.");
+        }
+
+        throw new IllegalArgumentException(
+                "Custom argument '" + arg
+                                           + "' is not a recognized docling 
CLI flag. "
+                                           + "Only known docling flags are 
permitted as custom arguments.");
+    }
+
+    private static void rejectShellMetacharacters(String arg, int index) {
+        if (arg.contains(";") || arg.contains("|") || arg.contains("`") || 
arg.contains("$(")) {
+            throw new IllegalArgumentException(
+                    "Custom argument at index " + index
+                                               + " contains a disallowed 
character or pattern. "
+                                               + "Shell metacharacters (;, |, 
`, $()) are not permitted.");
+        }
+    }
+
+    private static void validatePathSafety(String value, int index) {
+        if (value.contains("../") || value.contains("..\\")) {
+            throw new IllegalArgumentException(
+                    "Custom argument at index " + index + " contains a 
relative path traversal sequence");
+        }
+        // Normalize path-like values to detect traversal via redundant 
separators
+        if (value.contains("/") || value.contains("\\")) {
+            Path normalized = Paths.get(value).normalize();
+            for (Path component : normalized) {
+                if ("..".equals(component.toString())) {
+                    throw new IllegalArgumentException(
+                            "Custom argument at index " + index
+                                                       + " resolves to a path 
containing traversal after normalization");
+                }
             }
         }
     }
diff --git 
a/components/camel-ai/camel-docling/src/test/java/org/apache/camel/component/docling/DoclingCustomArgsValidationTest.java
 
b/components/camel-ai/camel-docling/src/test/java/org/apache/camel/component/docling/DoclingCustomArgsValidationTest.java
index 3699b5fc451f..d541d5155b71 100644
--- 
a/components/camel-ai/camel-docling/src/test/java/org/apache/camel/component/docling/DoclingCustomArgsValidationTest.java
+++ 
b/components/camel-ai/camel-docling/src/test/java/org/apache/camel/component/docling/DoclingCustomArgsValidationTest.java
@@ -89,7 +89,7 @@ class DoclingCustomArgsValidationTest extends 
CamelTestSupport {
         CamelExecutionException ex = 
assertThrows(CamelExecutionException.class, () -> {
             template.requestBodyAndHeaders("direct:cli-convert",
                     inputFile.toString(),
-                    java.util.Map.of(DoclingHeaders.CUSTOM_ARGUMENTS, 
List.of("--some-flag", "../../etc/passwd")));
+                    java.util.Map.of(DoclingHeaders.CUSTOM_ARGUMENTS, 
List.of("--artifacts-path", "../../etc/passwd")));
         });
 
         assertInstanceOf(IllegalArgumentException.class, ex.getCause());
@@ -145,6 +145,126 @@ class DoclingCustomArgsValidationTest extends 
CamelTestSupport {
                 "No custom arguments should not trigger argument validation");
     }
 
+    // -- Shell metacharacter injection tests --
+
+    @Test
+    void customArgsWithSemicolonAreRejected() throws Exception {
+        Path inputFile = createInputFile();
+
+        CamelExecutionException ex = 
assertThrows(CamelExecutionException.class, () -> {
+            template.requestBodyAndHeaders("direct:cli-convert",
+                    inputFile.toString(),
+                    java.util.Map.of(DoclingHeaders.CUSTOM_ARGUMENTS, 
List.of("--verbose", "; rm -rf /")));
+        });
+
+        assertInstanceOf(IllegalArgumentException.class, ex.getCause());
+        assertTrue(ex.getCause().getMessage().contains("disallowed"));
+    }
+
+    @Test
+    void customArgsWithPipeAreRejected() throws Exception {
+        Path inputFile = createInputFile();
+
+        CamelExecutionException ex = 
assertThrows(CamelExecutionException.class, () -> {
+            template.requestBodyAndHeaders("direct:cli-convert",
+                    inputFile.toString(),
+                    java.util.Map.of(DoclingHeaders.CUSTOM_ARGUMENTS, 
List.of("--verbose", "| cat /etc/passwd")));
+        });
+
+        assertInstanceOf(IllegalArgumentException.class, ex.getCause());
+        assertTrue(ex.getCause().getMessage().contains("disallowed"));
+    }
+
+    @Test
+    void customArgsWithBacktickAreRejected() throws Exception {
+        Path inputFile = createInputFile();
+
+        CamelExecutionException ex = 
assertThrows(CamelExecutionException.class, () -> {
+            template.requestBodyAndHeaders("direct:cli-convert",
+                    inputFile.toString(),
+                    java.util.Map.of(DoclingHeaders.CUSTOM_ARGUMENTS, 
List.of("--verbose", "`whoami`")));
+        });
+
+        assertInstanceOf(IllegalArgumentException.class, ex.getCause());
+        assertTrue(ex.getCause().getMessage().contains("disallowed"));
+    }
+
+    @Test
+    void customArgsWithCommandSubstitutionAreRejected() throws Exception {
+        Path inputFile = createInputFile();
+
+        CamelExecutionException ex = 
assertThrows(CamelExecutionException.class, () -> {
+            template.requestBodyAndHeaders("direct:cli-convert",
+                    inputFile.toString(),
+                    java.util.Map.of(DoclingHeaders.CUSTOM_ARGUMENTS, 
List.of("--verbose", "$(id)")));
+        });
+
+        assertInstanceOf(IllegalArgumentException.class, ex.getCause());
+        assertTrue(ex.getCause().getMessage().contains("disallowed"));
+    }
+
+    // -- Allowlist enforcement tests --
+
+    @Test
+    void customArgsWithUnknownFlagAreRejected() throws Exception {
+        Path inputFile = createInputFile();
+
+        CamelExecutionException ex = 
assertThrows(CamelExecutionException.class, () -> {
+            template.requestBodyAndHeaders("direct:cli-convert",
+                    inputFile.toString(),
+                    java.util.Map.of(DoclingHeaders.CUSTOM_ARGUMENTS, 
List.of("--unknown-flag")));
+        });
+
+        assertInstanceOf(IllegalArgumentException.class, ex.getCause());
+        assertTrue(ex.getCause().getMessage().contains("not a recognized 
docling CLI flag"));
+    }
+
+    @Test
+    void customArgsWithUnknownShortFlagAreRejected() throws Exception {
+        Path inputFile = createInputFile();
+
+        CamelExecutionException ex = 
assertThrows(CamelExecutionException.class, () -> {
+            template.requestBodyAndHeaders("direct:cli-convert",
+                    inputFile.toString(),
+                    java.util.Map.of(DoclingHeaders.CUSTOM_ARGUMENTS, 
List.of("-x")));
+        });
+
+        assertInstanceOf(IllegalArgumentException.class, ex.getCause());
+        assertTrue(ex.getCause().getMessage().contains("not a recognized 
docling CLI flag"));
+    }
+
+    @Test
+    void customArgsWithVerbosityLevelsAreAccepted() throws Exception {
+        Path inputFile = createInputFile();
+
+        // -v and -vv should pass validation (verbosity levels)
+        CamelExecutionException ex = 
assertThrows(CamelExecutionException.class, () -> {
+            template.requestBodyAndHeaders("direct:cli-convert",
+                    inputFile.toString(),
+                    java.util.Map.of(DoclingHeaders.CUSTOM_ARGUMENTS, 
List.of("-vv")));
+        });
+
+        assertFalse(ex.getCause() instanceof IllegalArgumentException,
+                "-vv should pass validation; failure should come from process 
execution");
+    }
+
+    @Test
+    void customArgsWithNormalizedPathTraversalAreRejected() throws Exception {
+        Path inputFile = createInputFile();
+
+        // Path traversal that would be caught only after normalization
+        CamelExecutionException ex = 
assertThrows(CamelExecutionException.class, () -> {
+            template.requestBodyAndHeaders("direct:cli-convert",
+                    inputFile.toString(),
+                    java.util.Map.of(DoclingHeaders.CUSTOM_ARGUMENTS,
+                            List.of("--artifacts-path", 
"/safe/path/subdir/../../etc/passwd")));
+        });
+
+        assertInstanceOf(IllegalArgumentException.class, ex.getCause());
+        assertTrue(ex.getCause().getMessage().contains("path traversal") ||
+                ex.getCause().getMessage().contains("traversal after 
normalization"));
+    }
+
     private Path createInputFile() throws Exception {
         Path file = tempDir.resolve("test-input.txt");
         Files.writeString(file, "test content");

Reply via email to