This is an automated email from the ASF dual-hosted git repository.
acosentino pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/camel.git
The following commit(s) were added to refs/heads/main by this push:
new f86fda0442c6 CAMEL-23212: Camel-Docling: Harden CLI argument injection
validation in camel-docling (#22082)
f86fda0442c6 is described below
commit f86fda0442c65b9d13ce3aa8ac676233b64e3351
Author: Andrea Cosentino <[email protected]>
AuthorDate: Wed Mar 18 16:29:05 2026 +0100
CAMEL-23212: Camel-Docling: Harden CLI argument injection validation in
camel-docling (#22082)
Switch from blocklist to allowlist approach for custom CLI argument
validation in DoclingProducer. Define a set of ~55 recognized docling
CLI flags organized by category (format, pipeline, OCR, tables, PDF,
enrichment, output, advanced, debug, performance, info). Any flag not
in the allowlist is rejected with a clear error message.
Add defense-in-depth rejection of shell metacharacters (;, |, `, $())
in all custom argument values, even though ProcessBuilder uses
list-based invocation and does not interpret them.
Enhance path traversal detection by normalizing path-like values via
Path.normalize() to catch traversal sequences that bypass the literal
../ check (e.g., /safe/subdir/../../etc/passwd).
Add 8 new unit tests covering semicolon, pipe, backtick, and $()
injection attempts, unknown long/short flag rejection, -vv verbosity
acceptance, and normalized path traversal detection.
Document all allowed custom arguments in docling-component.adoc.
Signed-off-by: Andrea Cosentino <[email protected]>
---
.../src/main/docs/docling-component.adoc | 53 ++++++++
.../camel/component/docling/DoclingProducer.java | 151 ++++++++++++++++++---
.../docling/DoclingCustomArgsValidationTest.java | 122 ++++++++++++++++-
3 files changed, 307 insertions(+), 19 deletions(-)
diff --git
a/components/camel-ai/camel-docling/src/main/docs/docling-component.adoc
b/components/camel-ai/camel-docling/src/main/docs/docling-component.adoc
index 3459720929be..68595ddcdac4 100644
--- a/components/camel-ai/camel-docling/src/main/docs/docling-component.adoc
+++ b/components/camel-ai/camel-docling/src/main/docs/docling-component.adoc
@@ -381,6 +381,59 @@ YAML::
----
====
+=== Custom argument validation
+
+When passing custom CLI arguments via the `CamelDoclingCustomArguments`
header, the component enforces an **allowlist** of recognized docling CLI flags.
+Only the following flags are permitted:
+
+[width="100%",cols="2,4",options="header"]
+|===
+| Category | Allowed flags
+
+| Input/output format
+| `--from`, `--to`
+
+| Pipeline
+| `--pipeline`, `--vlm-model`, `--asr-model`
+
+| OCR
+| `--ocr`, `--no-ocr`, `--force-ocr`, `--no-force-ocr`, `--ocr-engine`,
`--ocr-lang`, `--psm`
+
+| Tables
+| `--tables`, `--no-tables`, `--table-mode`
+
+| PDF
+| `--pdf-backend`, `--pdf-password`
+
+| Enrichment
+| `--enrich-code`, `--no-enrich-code`, `--enrich-formula`,
`--no-enrich-formula`, `--enrich-picture-classes`,
`--no-enrich-picture-classes`, `--enrich-picture-description`,
`--no-enrich-picture-description`, `--enrich-chart-extraction`,
`--no-enrich-chart-extraction`
+
+| Output formatting
+| `--image-export-mode`, `--show-layout`, `--no-show-layout`
+
+| Advanced
+| `--headers`, `--artifacts-path`, `--enable-remote-services`,
`--no-enable-remote-services`, `--allow-external-plugins`,
`--no-allow-external-plugins`, `--show-external-plugins`,
`--no-show-external-plugins`, `--document-timeout`, `--device`,
`--num-threads`, `--page-batch-size`
+
+| Debug
+| `--verbose`, `-v` / `-vv` / `-vvv`, `--debug-visualize-cells`,
`--no-debug-visualize-cells`, `--debug-visualize-ocr`,
`--no-debug-visualize-ocr`, `--debug-visualize-layout`,
`--no-debug-visualize-layout`, `--debug-visualize-tables`,
`--no-debug-visualize-tables`
+
+| Performance
+| `--abort-on-error`, `--no-abort-on-error`, `--profiling`, `--no-profiling`,
`--save-profiling`, `--no-save-profiling`
+
+| Info
+| `--version`, `--help`, `--logo`
+
+|===
+
+The `--output` (`-o`) flag is **not permitted** because the output directory
is managed by the producer.
+Use the `CamelDoclingOutputFilePath` header or endpoint configuration instead.
+
+Additionally, the following are rejected:
+
+- **Shell metacharacters**: `;`, `|`, `` ` ``, `$()` — blocked as
defense-in-depth even though ProcessBuilder does not interpret them.
+- **Path traversal**: `../`, `..\`, and paths that resolve to traversal after
normalization.
+- **Unknown flags**: any flag not in the allowlist above.
+
=== Extracting document metadata
[tabs]
diff --git
a/components/camel-ai/camel-docling/src/main/java/org/apache/camel/component/docling/DoclingProducer.java
b/components/camel-ai/camel-docling/src/main/java/org/apache/camel/component/docling/DoclingProducer.java
index 17bfc05e851b..437fa8f540cb 100644
---
a/components/camel-ai/camel-docling/src/main/java/org/apache/camel/component/docling/DoclingProducer.java
+++
b/components/camel-ai/camel-docling/src/main/java/org/apache/camel/component/docling/DoclingProducer.java
@@ -30,6 +30,7 @@ import java.util.Base64;
import java.util.Collection;
import java.util.List;
import java.util.Map;
+import java.util.Set;
import java.util.concurrent.CompletableFuture;
import java.util.concurrent.CompletionStage;
import java.util.concurrent.ConcurrentHashMap;
@@ -86,6 +87,56 @@ public class DoclingProducer extends DefaultProducer {
private static final Logger LOG =
LoggerFactory.getLogger(DoclingProducer.class);
+ /**
+ * Recognized docling CLI flags. Only these flags are permitted in custom
arguments (allowlist approach). Flags
+ * managed by the producer ({@code --output}, {@code -o}) are excluded and
checked separately.
+ */
+ private static final Set<String> ALLOWED_DOCLING_FLAGS = Set.of(
+ // Input/output format
+ "--from", "--to",
+ // Pipeline
+ "--pipeline", "--vlm-model", "--asr-model",
+ // OCR
+ "--ocr", "--no-ocr", "--force-ocr", "--no-force-ocr",
+ "--ocr-engine", "--ocr-lang", "--psm",
+ // Tables
+ "--tables", "--no-tables", "--table-mode",
+ // PDF
+ "--pdf-backend", "--pdf-password",
+ // Enrichment
+ "--enrich-code", "--no-enrich-code",
+ "--enrich-formula", "--no-enrich-formula",
+ "--enrich-picture-classes", "--no-enrich-picture-classes",
+ "--enrich-picture-description", "--no-enrich-picture-description",
+ "--enrich-chart-extraction", "--no-enrich-chart-extraction",
+ // Output formatting
+ "--image-export-mode",
+ "--show-layout", "--no-show-layout",
+ // Advanced
+ "--headers", "--artifacts-path",
+ "--enable-remote-services", "--no-enable-remote-services",
+ "--allow-external-plugins", "--no-allow-external-plugins",
+ "--show-external-plugins", "--no-show-external-plugins",
+ "--document-timeout", "--device", "--num-threads",
"--page-batch-size",
+ // Debug
+ "--verbose",
+ "--debug-visualize-cells", "--no-debug-visualize-cells",
+ "--debug-visualize-ocr", "--no-debug-visualize-ocr",
+ "--debug-visualize-layout", "--no-debug-visualize-layout",
+ "--debug-visualize-tables", "--no-debug-visualize-tables",
+ // Performance / error handling
+ "--abort-on-error", "--no-abort-on-error",
+ "--profiling", "--no-profiling",
+ "--save-profiling", "--no-save-profiling",
+ // Info
+ "--version", "--help", "--logo");
+
+ /**
+ * Flags managed by the producer that must not be overridden through
custom arguments. The output directory is
+ * controlled by the producer via endpoint configuration or the {@link
DoclingHeaders#OUTPUT_FILE_PATH} header.
+ */
+ private static final Set<String> PRODUCER_MANAGED_FLAGS =
Set.of("--output", "-o");
+
private DoclingConfiguration configuration;
private DoclingServeApi doclingServeApi;
private ObjectMapper objectMapper;
@@ -1876,14 +1927,10 @@ public class DoclingProducer extends DefaultProducer {
}
/**
- * Validates custom CLI arguments to ensure they do not conflict with
producer-managed options such as the output
- * directory.
+ * Validates custom CLI arguments using an allowlist approach. Only
recognized docling CLI flags are permitted.
+ * Producer-managed flags, shell metacharacters, and path traversal
sequences are rejected.
*/
private void validateCustomArguments(List<String> customArgs) {
- // The output directory is managed by the producer via endpoint
configuration
- // or the OUTPUT_FILE_PATH header, so it must not be overridden
through custom arguments.
- List<String> blockedFlags = List.of("--output", "-o");
-
for (int i = 0; i < customArgs.size(); i++) {
String arg = customArgs.get(i);
@@ -1891,20 +1938,88 @@ public class DoclingProducer extends DefaultProducer {
throw new IllegalArgumentException("Custom argument at index "
+ i + " is null");
}
- String argLower = arg.toLowerCase();
- for (String blocked : blockedFlags) {
- if (argLower.equals(blocked) || argLower.startsWith(blocked +
"=")) {
- throw new IllegalArgumentException(
- "Custom argument '" + blocked
- + "' is not allowed
because the output directory is managed by the producer. "
- + "Use the " +
DoclingHeaders.OUTPUT_FILE_PATH
- + " header or endpoint
configuration instead.");
- }
+ rejectShellMetacharacters(arg, i);
+
+ if (arg.startsWith("--")) {
+ validateLongFlag(arg, i);
+ } else if (arg.startsWith("-")) {
+ validateShortFlag(arg, i);
+ } else {
+ validatePathSafety(arg, i);
}
+ }
+ }
- if (arg.contains("../") || arg.contains("..\\")) {
- throw new IllegalArgumentException(
- "Custom argument at index " + i + " contains a
relative path traversal sequence");
+ private void validateLongFlag(String arg, int index) {
+ String flag = arg.contains("=") ? arg.substring(0, arg.indexOf('=')) :
arg;
+ String flagLower = flag.toLowerCase();
+
+ if (PRODUCER_MANAGED_FLAGS.contains(flagLower)) {
+ throw new IllegalArgumentException(
+ "Custom argument '" + flag
+ + "' is not allowed because the
output directory is managed by the producer. "
+ + "Use the " +
DoclingHeaders.OUTPUT_FILE_PATH
+ + " header or endpoint
configuration instead.");
+ }
+
+ if (!ALLOWED_DOCLING_FLAGS.contains(flagLower)) {
+ throw new IllegalArgumentException(
+ "Custom argument '" + flag
+ + "' is not a recognized
docling CLI flag. "
+ + "Only known docling flags are
permitted as custom arguments.");
+ }
+
+ if (arg.contains("=")) {
+ String value = arg.substring(arg.indexOf('=') + 1);
+ validatePathSafety(value, index);
+ }
+ }
+
+ private void validateShortFlag(String arg, int index) {
+ String flagLower = arg.toLowerCase();
+
+ // Allow -v, -vv, -vvv (verbosity levels)
+ if (flagLower.matches("-v+")) {
+ return;
+ }
+
+ if (PRODUCER_MANAGED_FLAGS.contains(flagLower)) {
+ throw new IllegalArgumentException(
+ "Custom argument '" + arg
+ + "' is not allowed because the
output directory is managed by the producer. "
+ + "Use the " +
DoclingHeaders.OUTPUT_FILE_PATH
+ + " header or endpoint
configuration instead.");
+ }
+
+ throw new IllegalArgumentException(
+ "Custom argument '" + arg
+ + "' is not a recognized docling
CLI flag. "
+ + "Only known docling flags are
permitted as custom arguments.");
+ }
+
+ private static void rejectShellMetacharacters(String arg, int index) {
+ if (arg.contains(";") || arg.contains("|") || arg.contains("`") ||
arg.contains("$(")) {
+ throw new IllegalArgumentException(
+ "Custom argument at index " + index
+ + " contains a disallowed
character or pattern. "
+ + "Shell metacharacters (;, |,
`, $()) are not permitted.");
+ }
+ }
+
+ private static void validatePathSafety(String value, int index) {
+ if (value.contains("../") || value.contains("..\\")) {
+ throw new IllegalArgumentException(
+ "Custom argument at index " + index + " contains a
relative path traversal sequence");
+ }
+ // Normalize path-like values to detect traversal via redundant
separators
+ if (value.contains("/") || value.contains("\\")) {
+ Path normalized = Paths.get(value).normalize();
+ for (Path component : normalized) {
+ if ("..".equals(component.toString())) {
+ throw new IllegalArgumentException(
+ "Custom argument at index " + index
+ + " resolves to a path
containing traversal after normalization");
+ }
}
}
}
diff --git
a/components/camel-ai/camel-docling/src/test/java/org/apache/camel/component/docling/DoclingCustomArgsValidationTest.java
b/components/camel-ai/camel-docling/src/test/java/org/apache/camel/component/docling/DoclingCustomArgsValidationTest.java
index 3699b5fc451f..d541d5155b71 100644
---
a/components/camel-ai/camel-docling/src/test/java/org/apache/camel/component/docling/DoclingCustomArgsValidationTest.java
+++
b/components/camel-ai/camel-docling/src/test/java/org/apache/camel/component/docling/DoclingCustomArgsValidationTest.java
@@ -89,7 +89,7 @@ class DoclingCustomArgsValidationTest extends
CamelTestSupport {
CamelExecutionException ex =
assertThrows(CamelExecutionException.class, () -> {
template.requestBodyAndHeaders("direct:cli-convert",
inputFile.toString(),
- java.util.Map.of(DoclingHeaders.CUSTOM_ARGUMENTS,
List.of("--some-flag", "../../etc/passwd")));
+ java.util.Map.of(DoclingHeaders.CUSTOM_ARGUMENTS,
List.of("--artifacts-path", "../../etc/passwd")));
});
assertInstanceOf(IllegalArgumentException.class, ex.getCause());
@@ -145,6 +145,126 @@ class DoclingCustomArgsValidationTest extends
CamelTestSupport {
"No custom arguments should not trigger argument validation");
}
+ // -- Shell metacharacter injection tests --
+
+ @Test
+ void customArgsWithSemicolonAreRejected() throws Exception {
+ Path inputFile = createInputFile();
+
+ CamelExecutionException ex =
assertThrows(CamelExecutionException.class, () -> {
+ template.requestBodyAndHeaders("direct:cli-convert",
+ inputFile.toString(),
+ java.util.Map.of(DoclingHeaders.CUSTOM_ARGUMENTS,
List.of("--verbose", "; rm -rf /")));
+ });
+
+ assertInstanceOf(IllegalArgumentException.class, ex.getCause());
+ assertTrue(ex.getCause().getMessage().contains("disallowed"));
+ }
+
+ @Test
+ void customArgsWithPipeAreRejected() throws Exception {
+ Path inputFile = createInputFile();
+
+ CamelExecutionException ex =
assertThrows(CamelExecutionException.class, () -> {
+ template.requestBodyAndHeaders("direct:cli-convert",
+ inputFile.toString(),
+ java.util.Map.of(DoclingHeaders.CUSTOM_ARGUMENTS,
List.of("--verbose", "| cat /etc/passwd")));
+ });
+
+ assertInstanceOf(IllegalArgumentException.class, ex.getCause());
+ assertTrue(ex.getCause().getMessage().contains("disallowed"));
+ }
+
+ @Test
+ void customArgsWithBacktickAreRejected() throws Exception {
+ Path inputFile = createInputFile();
+
+ CamelExecutionException ex =
assertThrows(CamelExecutionException.class, () -> {
+ template.requestBodyAndHeaders("direct:cli-convert",
+ inputFile.toString(),
+ java.util.Map.of(DoclingHeaders.CUSTOM_ARGUMENTS,
List.of("--verbose", "`whoami`")));
+ });
+
+ assertInstanceOf(IllegalArgumentException.class, ex.getCause());
+ assertTrue(ex.getCause().getMessage().contains("disallowed"));
+ }
+
+ @Test
+ void customArgsWithCommandSubstitutionAreRejected() throws Exception {
+ Path inputFile = createInputFile();
+
+ CamelExecutionException ex =
assertThrows(CamelExecutionException.class, () -> {
+ template.requestBodyAndHeaders("direct:cli-convert",
+ inputFile.toString(),
+ java.util.Map.of(DoclingHeaders.CUSTOM_ARGUMENTS,
List.of("--verbose", "$(id)")));
+ });
+
+ assertInstanceOf(IllegalArgumentException.class, ex.getCause());
+ assertTrue(ex.getCause().getMessage().contains("disallowed"));
+ }
+
+ // -- Allowlist enforcement tests --
+
+ @Test
+ void customArgsWithUnknownFlagAreRejected() throws Exception {
+ Path inputFile = createInputFile();
+
+ CamelExecutionException ex =
assertThrows(CamelExecutionException.class, () -> {
+ template.requestBodyAndHeaders("direct:cli-convert",
+ inputFile.toString(),
+ java.util.Map.of(DoclingHeaders.CUSTOM_ARGUMENTS,
List.of("--unknown-flag")));
+ });
+
+ assertInstanceOf(IllegalArgumentException.class, ex.getCause());
+ assertTrue(ex.getCause().getMessage().contains("not a recognized
docling CLI flag"));
+ }
+
+ @Test
+ void customArgsWithUnknownShortFlagAreRejected() throws Exception {
+ Path inputFile = createInputFile();
+
+ CamelExecutionException ex =
assertThrows(CamelExecutionException.class, () -> {
+ template.requestBodyAndHeaders("direct:cli-convert",
+ inputFile.toString(),
+ java.util.Map.of(DoclingHeaders.CUSTOM_ARGUMENTS,
List.of("-x")));
+ });
+
+ assertInstanceOf(IllegalArgumentException.class, ex.getCause());
+ assertTrue(ex.getCause().getMessage().contains("not a recognized
docling CLI flag"));
+ }
+
+ @Test
+ void customArgsWithVerbosityLevelsAreAccepted() throws Exception {
+ Path inputFile = createInputFile();
+
+ // -v and -vv should pass validation (verbosity levels)
+ CamelExecutionException ex =
assertThrows(CamelExecutionException.class, () -> {
+ template.requestBodyAndHeaders("direct:cli-convert",
+ inputFile.toString(),
+ java.util.Map.of(DoclingHeaders.CUSTOM_ARGUMENTS,
List.of("-vv")));
+ });
+
+ assertFalse(ex.getCause() instanceof IllegalArgumentException,
+ "-vv should pass validation; failure should come from process
execution");
+ }
+
+ @Test
+ void customArgsWithNormalizedPathTraversalAreRejected() throws Exception {
+ Path inputFile = createInputFile();
+
+ // Path traversal that would be caught only after normalization
+ CamelExecutionException ex =
assertThrows(CamelExecutionException.class, () -> {
+ template.requestBodyAndHeaders("direct:cli-convert",
+ inputFile.toString(),
+ java.util.Map.of(DoclingHeaders.CUSTOM_ARGUMENTS,
+ List.of("--artifacts-path",
"/safe/path/subdir/../../etc/passwd")));
+ });
+
+ assertInstanceOf(IllegalArgumentException.class, ex.getCause());
+ assertTrue(ex.getCause().getMessage().contains("path traversal") ||
+ ex.getCause().getMessage().contains("traversal after
normalization"));
+ }
+
private Path createInputFile() throws Exception {
Path file = tempDir.resolve("test-input.txt");
Files.writeString(file, "test content");