This is an automated email from the ASF dual-hosted git repository.
gabor pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/parquet-java.git
The following commit(s) were added to refs/heads/master by this push:
new 8be0dadae GH-2967: Support unified config options for convert
parquet-cli (#3283)
8be0dadae is described below
commit 8be0dadaea9ded29d61fb10afb6dfe7d516ee316
Author: Arnav Balyan <[email protected]>
AuthorDate: Mon Sep 1 12:54:42 2025 +0530
GH-2967: Support unified config options for convert parquet-cli (#3283)
---
parquet-cli/README.md | 14 +++++++
.../src/main/java/org/apache/parquet/cli/Main.java | 27 +++++++++++--
.../cli/commands/ConvertCSVCommandTest.java | 15 ++++++++
.../parquet/cli/commands/ConvertCommandTest.java | 44 ++++++++++++++++++++++
4 files changed, 97 insertions(+), 3 deletions(-)
diff --git a/parquet-cli/README.md b/parquet-cli/README.md
index 9b75efaa1..c7b3540a4 100644
--- a/parquet-cli/README.md
+++ b/parquet-cli/README.md
@@ -134,3 +134,17 @@ Usage: parquet [options] [command] [command options]
See 'parquet help <command>' for more information on a specific command.
```
+### Configuration Options
+
+- `--conf` or `--property`: Set any configuration property in format
`key=value`. Can be specified multiple times.
+
+Examples:
+```bash
+parquet convert input.avro -o output.parquet --conf
parquet.avro.write-parquet-uuid=true
+
+parquet convert input.avro -o output.parquet --conf
parquet.avro.write-old-list-structure=false
+
+# Multiple options
+parquet convert-csv input.csv -o output.parquet --schema schema.avsc --conf
parquet.avro.write-parquet-uuid=true --conf
parquet.avro.write-old-list-structure=false
+
+```
diff --git a/parquet-cli/src/main/java/org/apache/parquet/cli/Main.java
b/parquet-cli/src/main/java/org/apache/parquet/cli/Main.java
index 62940054e..e93a21e89 100644
--- a/parquet-cli/src/main/java/org/apache/parquet/cli/Main.java
+++ b/parquet-cli/src/main/java/org/apache/parquet/cli/Main.java
@@ -25,6 +25,7 @@ import com.beust.jcommander.ParameterException;
import com.beust.jcommander.Parameters;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.collect.ImmutableSet;
+import java.util.List;
import java.util.Set;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configurable;
@@ -67,6 +68,11 @@ public class Main extends Configured implements Tool {
description = "Print extra debugging information")
private boolean debug = false;
+ @Parameter(
+ names = {"--conf", "--property"},
+ description = "Set a configuration property (format: key=value). Can be
specified multiple times.")
+ private List<String> confProperties;
+
@VisibleForTesting
@Parameter(names = "--dollar-zero", description = "A way for the runtime
path to be passed in", hidden = true)
String programName = DEFAULT_PROGRAM_NAME;
@@ -162,10 +168,25 @@ public class Main extends Configured implements Tool {
return 1;
}
- try {
- if (command instanceof Configurable) {
- ((Configurable) command).setConf(getConf());
+ // Note to developer: This is a generic way to apply configs to given
command.
+ // If the command does not support the configs, it would simply be ignored.
+ if (command instanceof Configurable) {
+ Configuration merged = new Configuration(getConf());
+ if (confProperties != null) {
+ for (String prop : confProperties) {
+ String[] parts = prop.split("=", 2);
+ if (parts.length != 2) {
+ throw new IllegalArgumentException(
+ "Configuration property must be in format key=value: " + prop);
+ }
+ merged.set(parts[0].trim(), parts[1].trim());
+ console.debug("Set configuration property: {}={}", parts[0].trim(),
parts[1].trim());
+ }
}
+ ((Configurable) command).setConf(merged);
+ }
+
+ try {
return command.run();
} catch (IllegalArgumentException e) {
if (debug) {
diff --git
a/parquet-cli/src/test/java/org/apache/parquet/cli/commands/ConvertCSVCommandTest.java
b/parquet-cli/src/test/java/org/apache/parquet/cli/commands/ConvertCSVCommandTest.java
index 05053c097..29ed16224 100644
---
a/parquet-cli/src/test/java/org/apache/parquet/cli/commands/ConvertCSVCommandTest.java
+++
b/parquet-cli/src/test/java/org/apache/parquet/cli/commands/ConvertCSVCommandTest.java
@@ -61,4 +61,19 @@ public class ConvertCSVCommandTest extends CSVFileTest {
command.setConf(new Configuration());
command.run();
}
+
+ @Test
+ public void testConvertCSVCommandWithGenericConf() throws IOException {
+ File file = csvFile();
+ ConvertCSVCommand command = new ConvertCSVCommand(createLogger());
+ command.targets = Arrays.asList(file.getAbsolutePath());
+ File output = new File(getTempFolder(), getClass().getSimpleName() +
"_with_generic_conf.parquet");
+ command.outputPath = output.getAbsolutePath();
+ Configuration conf = new Configuration();
+ conf.set("parquet.avro.write-parquet-uuid", "true");
+ conf.set("parquet.avro.write-old-list-structure", "false");
+ command.setConf(conf);
+ Assert.assertEquals(0, command.run());
+ Assert.assertTrue(output.exists());
+ }
}
diff --git
a/parquet-cli/src/test/java/org/apache/parquet/cli/commands/ConvertCommandTest.java
b/parquet-cli/src/test/java/org/apache/parquet/cli/commands/ConvertCommandTest.java
index 4870c48b4..c38a5b25b 100644
---
a/parquet-cli/src/test/java/org/apache/parquet/cli/commands/ConvertCommandTest.java
+++
b/parquet-cli/src/test/java/org/apache/parquet/cli/commands/ConvertCommandTest.java
@@ -37,4 +37,48 @@ public class ConvertCommandTest extends AvroFileTest {
Assert.assertEquals(0, command.run());
Assert.assertTrue(output.exists());
}
+
+ @Test
+ public void testConvertCommandWithGenericConf() throws IOException {
+ File file = toAvro(parquetFile());
+ ConvertCommand command = new ConvertCommand(createLogger());
+ command.targets = Arrays.asList(file.getAbsolutePath());
+ File output = new File(getTempFolder(),
"converted_with_generic_conf.parquet");
+ command.outputPath = output.getAbsolutePath();
+ Configuration conf = new Configuration();
+ conf.set("parquet.avro.write-parquet-uuid", "true");
+ conf.set("parquet.avro.write-old-list-structure", "false");
+ conf.set("test.property", "test.value");
+ command.setConf(conf);
+
+ Assert.assertEquals(0, command.run());
+ Assert.assertTrue(output.exists());
+ }
+
+ @Test
+ public void testConvertCommandConfigurationValidation() throws IOException {
+ File file = toAvro(parquetFile());
+ ConvertCommand command = new ConvertCommand(createLogger());
+ command.targets = Arrays.asList(file.getAbsolutePath());
+ File output = new File(getTempFolder(),
"converted_with_config_validation.parquet");
+ command.outputPath = output.getAbsolutePath();
+
+ Configuration conf = new Configuration();
+ conf.set("parquet.avro.write-parquet-uuid", "true");
+ conf.set("parquet.avro.write-old-list-structure", "false");
+ command.setConf(conf);
+
+ Assert.assertEquals(0, command.run());
+ Assert.assertTrue(output.exists());
+
+ File output2 = new File(getTempFolder(),
"converted_with_config_validation2.parquet");
+ command.outputPath = output2.getAbsolutePath();
+ Configuration conf2 = new Configuration();
+ conf2.set("parquet.avro.write-parquet-uuid", "false");
+ conf2.set("parquet.avro.write-old-list-structure", "true");
+ command.setConf(conf2);
+
+ Assert.assertEquals(0, command.run());
+ Assert.assertTrue(output2.exists());
+ }
}