This is an automated email from the ASF dual-hosted git repository.
gangwu pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/parquet-mr.git
The following commit(s) were added to refs/heads/master by this push:
new 65bc51846 PARQUET-2359: Add simple plain Parquet configuration
implementation (#1182)
65bc51846 is described below
commit 65bc51846010360f3dd4304103ec3c637776d7c9
Author: Atour <[email protected]>
AuthorDate: Tue Nov 14 17:28:06 2023 +0100
PARQUET-2359: Add simple plain Parquet configuration implementation (#1182)
---
.../org/apache/parquet/avro/TestReadWrite.java | 55 ++++--
.../parquet/conf/PlainParquetConfiguration.java | 192 +++++++++++++++++++++
pom.xml | 1 +
3 files changed, 231 insertions(+), 17 deletions(-)
diff --git
a/parquet-avro/src/test/java/org/apache/parquet/avro/TestReadWrite.java
b/parquet-avro/src/test/java/org/apache/parquet/avro/TestReadWrite.java
index 9aaa9e3b2..dfa211794 100644
--- a/parquet-avro/src/test/java/org/apache/parquet/avro/TestReadWrite.java
+++ b/parquet-avro/src/test/java/org/apache/parquet/avro/TestReadWrite.java
@@ -54,6 +54,7 @@ import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.parquet.conf.HadoopParquetConfiguration;
import org.apache.parquet.conf.ParquetConfiguration;
+import org.apache.parquet.conf.PlainParquetConfiguration;
import org.apache.parquet.hadoop.ParquetReader;
import org.apache.parquet.hadoop.ParquetWriter;
import org.apache.parquet.hadoop.api.WriteSupport;
@@ -79,31 +80,39 @@ public class TestReadWrite {
@Parameterized.Parameters
public static Collection<Object[]> data() {
Object[][] data = new Object[][] {
- { false, false, false }, // use the new converters with hadoop config
- { true, false, false }, // use the old converters with hadoop config
- { false, true, false }, // use a local disk location with hadoop
config
- { false, false, true }, // use the new converters with parquet
config interface
- { true, false, true }, // use the old converters with parquet
config interface
- { false, true, true } }; // use a local disk location with parquet
config interface
+ { true, false, false, false }, // use the old converters with hadoop
config
+ { true, false, true, false }, // use the old converters with
parquet config interface
+ { false, false, false, false }, // use the new converters with hadoop
config
+ { false, true, false, false }, // use a local disk location with
hadoop config
+ { false, false, true, false }, // use the new converters with
parquet config interface
+ { false, true, true, false }, // use a local disk location with
parquet config interface
+ { false, false, true, true }, // use the new converters with plain
parquet config
+ { false, true, true, true } }; // use a local disk location with
plain parquet config
return Arrays.asList(data);
}
private final boolean compat;
private final boolean local;
private final boolean confInterface;
+ private final boolean plainConf;
private final Configuration testConf = new Configuration();
- private final ParquetConfiguration parquetConf = new
HadoopParquetConfiguration(true);
+ private final ParquetConfiguration hadoopConfWithInterface = new
HadoopParquetConfiguration();
+ private final ParquetConfiguration plainParquetConf = new
PlainParquetConfiguration();
- public TestReadWrite(boolean compat, boolean local, boolean confInterface) {
+ public TestReadWrite(boolean compat, boolean local, boolean confInterface,
boolean plainConf) {
this.compat = compat;
this.local = local;
this.confInterface = confInterface;
+ this.plainConf = plainConf;
this.testConf.setBoolean(AvroReadSupport.AVRO_COMPATIBILITY, compat);
this.testConf.setBoolean("parquet.avro.add-list-element-records", false);
this.testConf.setBoolean("parquet.avro.write-old-list-structure", false);
- this.parquetConf.setBoolean(AvroReadSupport.AVRO_COMPATIBILITY, compat);
- this.parquetConf.setBoolean("parquet.avro.add-list-element-records",
false);
- this.parquetConf.setBoolean("parquet.avro.write-old-list-structure",
false);
+
this.hadoopConfWithInterface.setBoolean(AvroReadSupport.AVRO_COMPATIBILITY,
compat);
+
this.hadoopConfWithInterface.setBoolean("parquet.avro.add-list-element-records",
false);
+
this.hadoopConfWithInterface.setBoolean("parquet.avro.write-old-list-structure",
false);
+ this.plainParquetConf.setBoolean(AvroReadSupport.AVRO_COMPATIBILITY,
compat);
+ this.plainParquetConf.setBoolean("parquet.avro.add-list-element-records",
false);
+ this.plainParquetConf.setBoolean("parquet.avro.write-old-list-structure",
false);
}
@Test
@@ -891,9 +900,15 @@ public class TestReadWrite {
.withSchema(schema);
}
if (confInterface) {
- return writerBuilder
- .withConf(parquetConf)
- .build();
+ if (plainConf) {
+ return writerBuilder
+ .withConf(hadoopConfWithInterface)
+ .build();
+ } else {
+ return writerBuilder
+ .withConf(plainParquetConf)
+ .build();
+ }
} else {
return writerBuilder
.withConf(testConf)
@@ -911,9 +926,15 @@ public class TestReadWrite {
return new AvroParquetReader<>(testConf, new Path(file));
}
if (confInterface) {
- return readerBuilder
- .withConf(parquetConf)
- .build();
+ if (plainConf) {
+ return readerBuilder
+ .withConf(hadoopConfWithInterface)
+ .build();
+ } else {
+ return readerBuilder
+ .withConf(plainParquetConf)
+ .build();
+ }
} else {
return readerBuilder
.withConf(testConf)
diff --git
a/parquet-common/src/main/java/org/apache/parquet/conf/PlainParquetConfiguration.java
b/parquet-common/src/main/java/org/apache/parquet/conf/PlainParquetConfiguration.java
new file mode 100644
index 000000000..4b5485181
--- /dev/null
+++
b/parquet-common/src/main/java/org/apache/parquet/conf/PlainParquetConfiguration.java
@@ -0,0 +1,192 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.parquet.conf;
+
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.Map;
+
+/**
+ * Configuration for Parquet without Hadoop dependency.
+ */
+public class PlainParquetConfiguration implements ParquetConfiguration {
+
+ private final Map<String, String> map;
+
+ public PlainParquetConfiguration() {
+ map = new HashMap<>();
+ }
+
+ public PlainParquetConfiguration(Map<String, String> properties) {
+ map = new HashMap<>(properties);
+ }
+
+ @Override
+ public void set(String s, String s1) {
+ map.put(s, s1);
+ }
+
+ @Override
+ public void setLong(String name, long value) {
+ set(name, String.valueOf(value));
+ }
+
+ @Override
+ public void setInt(String name, int value) {
+ set(name, String.valueOf(value));
+ }
+
+ @Override
+ public void setBoolean(String name, boolean value) {
+ set(name, String.valueOf(value));
+ }
+
+ @Override
+ public void setStrings(String name, String... value) {
+ if (value.length > 0) {
+ StringBuilder sb = new StringBuilder(value[0]);
+ for (int i = 1; i < value.length; ++i) {
+ sb.append(',');
+ sb.append(value[i]);
+ }
+ set(name, sb.toString());
+ } else {
+ set(name, "");
+ }
+ }
+
+ @Override
+ public void setClass(String name, Class<?> value, Class<?> xface) {
+ if (xface.isAssignableFrom(value)) {
+ set(name, value.getName());
+ } else {
+ throw new RuntimeException(xface.getCanonicalName() + " is not
assignable from " + value.getCanonicalName());
+ }
+ }
+
+ @Override
+ public String get(String name) {
+ return map.get(name);
+ }
+
+ @Override
+ public String get(String name, String defaultValue) {
+ String value = get(name);
+ if (value != null) {
+ return value;
+ } else {
+ return defaultValue;
+ }
+ }
+
+ @Override
+ public long getLong(String name, long defaultValue) {
+ String value = get(name);
+ if (value != null) {
+ return Long.parseLong(value);
+ } else {
+ return defaultValue;
+ }
+ }
+
+ @Override
+ public int getInt(String name, int defaultValue) {
+ String value = get(name);
+ if (value != null) {
+ return Integer.parseInt(value);
+ } else {
+ return defaultValue;
+ }
+ }
+
+ @Override
+ public boolean getBoolean(String name, boolean defaultValue) {
+ String value = get(name);
+ if (value != null) {
+ return Boolean.parseBoolean(value);
+ } else {
+ return defaultValue;
+ }
+ }
+
+ @Override
+ public String getTrimmed(String name) {
+ String value = get(name);
+ if (value != null) {
+ return value.trim();
+ } else {
+ return null;
+ }
+ }
+
+ @Override
+ public String getTrimmed(String name, String defaultValue) {
+ String value = get(name);
+ if (value != null) {
+ return value.trim();
+ } else {
+ return defaultValue;
+ }
+ }
+
+ @Override
+ public String[] getStrings(String name, String[] defaultValue) {
+ String value = get(name);
+ if (value != null) {
+ return value.split(",");
+ } else {
+ return defaultValue;
+ }
+ }
+
+ @Override
+ public Class<?> getClass(String name, Class<?> defaultValue) {
+ String value = get(name);
+ if (value != null) {
+ try {
+ return Class.forName(value);
+ } catch (ClassNotFoundException e) {
+ throw new RuntimeException(e);
+ }
+ } else {
+ return defaultValue;
+ }
+ }
+
+ @Override
+ @SuppressWarnings("unchecked")
+ public <U> Class<? extends U> getClass(String name, Class<? extends U>
defaultValue, Class<U> xface) {
+ Class<?> value = getClass(name, defaultValue);
+ if (value != null && value.isAssignableFrom(xface)) {
+ return (Class<? extends U>) value;
+ }
+ return defaultValue;
+ }
+
+ @Override
+ public Class<?> getClassByName(String name) throws ClassNotFoundException {
+ return Class.forName(name);
+ }
+
+ @Override
+ public Iterator<Map.Entry<String, String>> iterator() {
+ return map.entrySet().iterator();
+ }
+}
diff --git a/pom.xml b/pom.xml
index bc3c65461..5c1f9f98b 100644
--- a/pom.xml
+++ b/pom.xml
@@ -546,6 +546,7 @@
<exclude>org.apache.parquet.hadoop.ColumnChunkPageWriteStore</exclude>
<exclude>org.apache.parquet.hadoop.ParquetRecordWriter</exclude>
<!-- likely japicmp bug, triggers on new interface methods after
updating to 0.18.1 -->
+
<exclude>org.apache.parquet.conf.PlainParquetConfiguration#getClass(java.lang.String,java.lang.Class,java.lang.Class)</exclude>
<exclude>org.apache.parquet.conf.ParquetConfiguration#getClass(java.lang.String,java.lang.Class,java.lang.Class)</exclude>
<exclude>org.apache.parquet.hadoop.util.SerializationUtil#readObjectFromConfAsBase64(java.lang.String,org.apache.parquet.conf.ParquetConfiguration)</exclude>
<exclude>org.apache.parquet.conf.HadoopParquetConfiguration#getClass(java.lang.String,java.lang.Class,java.lang.Class)</exclude>