This is an automated email from the ASF dual-hosted git repository.

gabor pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/parquet-java.git


The following commit(s) were added to refs/heads/master by this push:
     new 8be0dadae GH-2967: Support unified config options for convert 
parquet-cli (#3283)
8be0dadae is described below

commit 8be0dadaea9ded29d61fb10afb6dfe7d516ee316
Author: Arnav Balyan <[email protected]>
AuthorDate: Mon Sep 1 12:54:42 2025 +0530

    GH-2967: Support unified config options for convert parquet-cli (#3283)
---
 parquet-cli/README.md                              | 14 +++++++
 .../src/main/java/org/apache/parquet/cli/Main.java | 27 +++++++++++--
 .../cli/commands/ConvertCSVCommandTest.java        | 15 ++++++++
 .../parquet/cli/commands/ConvertCommandTest.java   | 44 ++++++++++++++++++++++
 4 files changed, 97 insertions(+), 3 deletions(-)

diff --git a/parquet-cli/README.md b/parquet-cli/README.md
index 9b75efaa1..c7b3540a4 100644
--- a/parquet-cli/README.md
+++ b/parquet-cli/README.md
@@ -134,3 +134,17 @@ Usage: parquet [options] [command] [command options]
   See 'parquet help <command>' for more information on a specific command.
 ```
 
+### Configuration Options
+
+- `--conf` or `--property`: Set any configuration property in format 
`key=value`. Can be specified multiple times.
+
+Examples:
+```bash
+parquet convert input.avro -o output.parquet --conf 
parquet.avro.write-parquet-uuid=true
+
+parquet convert input.avro -o output.parquet --conf 
parquet.avro.write-old-list-structure=false
+
+# Multiple options
+parquet convert-csv input.csv -o output.parquet --schema schema.avsc --conf 
parquet.avro.write-parquet-uuid=true --conf 
parquet.avro.write-old-list-structure=false
+
+```
diff --git a/parquet-cli/src/main/java/org/apache/parquet/cli/Main.java 
b/parquet-cli/src/main/java/org/apache/parquet/cli/Main.java
index 62940054e..e93a21e89 100644
--- a/parquet-cli/src/main/java/org/apache/parquet/cli/Main.java
+++ b/parquet-cli/src/main/java/org/apache/parquet/cli/Main.java
@@ -25,6 +25,7 @@ import com.beust.jcommander.ParameterException;
 import com.beust.jcommander.Parameters;
 import com.google.common.annotations.VisibleForTesting;
 import com.google.common.collect.ImmutableSet;
+import java.util.List;
 import java.util.Set;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.conf.Configurable;
@@ -67,6 +68,11 @@ public class Main extends Configured implements Tool {
       description = "Print extra debugging information")
   private boolean debug = false;
 
+  @Parameter(
+      names = {"--conf", "--property"},
+      description = "Set a configuration property (format: key=value). Can be 
specified multiple times.")
+  private List<String> confProperties;
+
   @VisibleForTesting
   @Parameter(names = "--dollar-zero", description = "A way for the runtime 
path to be passed in", hidden = true)
   String programName = DEFAULT_PROGRAM_NAME;
@@ -162,10 +168,25 @@ public class Main extends Configured implements Tool {
       return 1;
     }
 
-    try {
-      if (command instanceof Configurable) {
-        ((Configurable) command).setConf(getConf());
+    // Note to developer: This is a generic way to apply configs to given 
command.
+    // If the command does not support the configs, it would simply be ignored.
+    if (command instanceof Configurable) {
+      Configuration merged = new Configuration(getConf());
+      if (confProperties != null) {
+        for (String prop : confProperties) {
+          String[] parts = prop.split("=", 2);
+          if (parts.length != 2) {
+            throw new IllegalArgumentException(
+                "Configuration property must be in format key=value: " + prop);
+          }
+          merged.set(parts[0].trim(), parts[1].trim());
+          console.debug("Set configuration property: {}={}", parts[0].trim(), 
parts[1].trim());
+        }
       }
+      ((Configurable) command).setConf(merged);
+    }
+
+    try {
       return command.run();
     } catch (IllegalArgumentException e) {
       if (debug) {
diff --git 
a/parquet-cli/src/test/java/org/apache/parquet/cli/commands/ConvertCSVCommandTest.java
 
b/parquet-cli/src/test/java/org/apache/parquet/cli/commands/ConvertCSVCommandTest.java
index 05053c097..29ed16224 100644
--- 
a/parquet-cli/src/test/java/org/apache/parquet/cli/commands/ConvertCSVCommandTest.java
+++ 
b/parquet-cli/src/test/java/org/apache/parquet/cli/commands/ConvertCSVCommandTest.java
@@ -61,4 +61,19 @@ public class ConvertCSVCommandTest extends CSVFileTest {
     command.setConf(new Configuration());
     command.run();
   }
+
+  @Test
+  public void testConvertCSVCommandWithGenericConf() throws IOException {
+    File file = csvFile();
+    ConvertCSVCommand command = new ConvertCSVCommand(createLogger());
+    command.targets = Arrays.asList(file.getAbsolutePath());
+    File output = new File(getTempFolder(), getClass().getSimpleName() + 
"_with_generic_conf.parquet");
+    command.outputPath = output.getAbsolutePath();
+    Configuration conf = new Configuration();
+    conf.set("parquet.avro.write-parquet-uuid", "true");
+    conf.set("parquet.avro.write-old-list-structure", "false");
+    command.setConf(conf);
+    Assert.assertEquals(0, command.run());
+    Assert.assertTrue(output.exists());
+  }
 }
diff --git 
a/parquet-cli/src/test/java/org/apache/parquet/cli/commands/ConvertCommandTest.java
 
b/parquet-cli/src/test/java/org/apache/parquet/cli/commands/ConvertCommandTest.java
index 4870c48b4..c38a5b25b 100644
--- 
a/parquet-cli/src/test/java/org/apache/parquet/cli/commands/ConvertCommandTest.java
+++ 
b/parquet-cli/src/test/java/org/apache/parquet/cli/commands/ConvertCommandTest.java
@@ -37,4 +37,48 @@ public class ConvertCommandTest extends AvroFileTest {
     Assert.assertEquals(0, command.run());
     Assert.assertTrue(output.exists());
   }
+
+  @Test
+  public void testConvertCommandWithGenericConf() throws IOException {
+    File file = toAvro(parquetFile());
+    ConvertCommand command = new ConvertCommand(createLogger());
+    command.targets = Arrays.asList(file.getAbsolutePath());
+    File output = new File(getTempFolder(), 
"converted_with_generic_conf.parquet");
+    command.outputPath = output.getAbsolutePath();
+    Configuration conf = new Configuration();
+    conf.set("parquet.avro.write-parquet-uuid", "true");
+    conf.set("parquet.avro.write-old-list-structure", "false");
+    conf.set("test.property", "test.value");
+    command.setConf(conf);
+
+    Assert.assertEquals(0, command.run());
+    Assert.assertTrue(output.exists());
+  }
+
+  @Test
+  public void testConvertCommandConfigurationValidation() throws IOException {
+    File file = toAvro(parquetFile());
+    ConvertCommand command = new ConvertCommand(createLogger());
+    command.targets = Arrays.asList(file.getAbsolutePath());
+    File output = new File(getTempFolder(), 
"converted_with_config_validation.parquet");
+    command.outputPath = output.getAbsolutePath();
+
+    Configuration conf = new Configuration();
+    conf.set("parquet.avro.write-parquet-uuid", "true");
+    conf.set("parquet.avro.write-old-list-structure", "false");
+    command.setConf(conf);
+
+    Assert.assertEquals(0, command.run());
+    Assert.assertTrue(output.exists());
+
+    File output2 = new File(getTempFolder(), 
"converted_with_config_validation2.parquet");
+    command.outputPath = output2.getAbsolutePath();
+    Configuration conf2 = new Configuration();
+    conf2.set("parquet.avro.write-parquet-uuid", "false");
+    conf2.set("parquet.avro.write-old-list-structure", "true");
+    command.setConf(conf2);
+
+    Assert.assertEquals(0, command.run());
+    Assert.assertTrue(output2.exists());
+  }
 }

Reply via email to