This is an automated email from the ASF dual-hosted git repository.
gangwu pushed a commit to branch parquet-1.14.x
in repository https://gitbox.apache.org/repos/asf/parquet-java.git
The following commit(s) were added to refs/heads/parquet-1.14.x by this push:
new 9c8fde0c7 PARQUET-2468: ParquetMetadata must convert to json (#1349)
(#1360)
9c8fde0c7 is described below
commit 9c8fde0c7a8bacaba28c9e0a953aba0a83b25fdd
Author: Gang Wu <[email protected]>
AuthorDate: Fri May 31 09:39:23 2024 +0800
PARQUET-2468: ParquetMetadata must convert to json (#1349) (#1360)
Co-authored-by: Michel Davit <[email protected]>
---
parquet-hadoop/pom.xml | 10 ++++++
.../hadoop/metadata/ColumnChunkMetaData.java | 4 +++
.../hadoop/metadata/ColumnChunkProperties.java | 2 ++
.../parquet/hadoop/metadata/FileMetaData.java | 2 ++
.../parquet/hadoop/metadata/ParquetMetadata.java | 35 ++++++++++++------
.../converter/TestParquetMetadataConverter.java | 41 ++++++++++++++++++++--
parquet-jackson/pom.xml | 18 ++++++++++
pom.xml | 1 +
8 files changed, 100 insertions(+), 13 deletions(-)
diff --git a/parquet-hadoop/pom.xml b/parquet-hadoop/pom.xml
index 7d02ac54a..465d7c95f 100644
--- a/parquet-hadoop/pom.xml
+++ b/parquet-hadoop/pom.xml
@@ -118,11 +118,21 @@
<artifactId>jackson-core</artifactId>
<version>${jackson.version}</version>
</dependency>
+ <dependency>
+ <groupId>${jackson.groupId}</groupId>
+ <artifactId>jackson-annotations</artifactId>
+ <version>${jackson.version}</version>
+ </dependency>
<dependency>
<groupId>${jackson.groupId}</groupId>
<artifactId>jackson-databind</artifactId>
<version>${jackson-databind.version}</version>
</dependency>
+ <dependency>
+ <groupId>${jackson.datatype.groupId}</groupId>
+ <artifactId>jackson-datatype-jdk8</artifactId>
+ <version>${jackson-modules-java8.version}</version>
+ </dependency>
<dependency>
<groupId>org.xerial.snappy</groupId>
<artifactId>snappy-java</artifactId>
diff --git
a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/metadata/ColumnChunkMetaData.java
b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/metadata/ColumnChunkMetaData.java
index 3dac15ba7..14a949b0e 100644
---
a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/metadata/ColumnChunkMetaData.java
+++
b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/metadata/ColumnChunkMetaData.java
@@ -22,6 +22,7 @@ import static
org.apache.parquet.column.Encoding.PLAIN_DICTIONARY;
import static org.apache.parquet.column.Encoding.RLE_DICTIONARY;
import static org.apache.parquet.format.Util.readColumnMetaData;
+import com.fasterxml.jackson.annotation.JsonIgnore;
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.util.Set;
@@ -338,6 +339,7 @@ public abstract class ColumnChunkMetaData {
* @deprecated will be removed in 2.0.0. Use {@link #getPrimitiveType()}
instead.
*/
@Deprecated
+ @JsonIgnore
public PrimitiveTypeName getType() {
decryptIfNeeded();
return properties.getType();
@@ -380,6 +382,7 @@ public abstract class ColumnChunkMetaData {
/**
* @return the stats for this column
*/
+ @JsonIgnore
public abstract Statistics getStatistics();
/**
@@ -387,6 +390,7 @@ public abstract class ColumnChunkMetaData {
*
* @return the size stats for this column
*/
+ @JsonIgnore
public SizeStatistics getSizeStatistics() {
throw new UnsupportedOperationException("SizeStatistics is not
implemented");
}
diff --git
a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/metadata/ColumnChunkProperties.java
b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/metadata/ColumnChunkProperties.java
index 3b0a33b14..026e37a1c 100644
---
a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/metadata/ColumnChunkProperties.java
+++
b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/metadata/ColumnChunkProperties.java
@@ -18,6 +18,7 @@
*/
package org.apache.parquet.hadoop.metadata;
+import com.fasterxml.jackson.annotation.JsonIgnore;
import java.util.Arrays;
import java.util.Set;
import org.apache.parquet.column.Encoding;
@@ -76,6 +77,7 @@ public class ColumnChunkProperties {
* @deprecated will be removed in 2.0.0. Use {@link #getPrimitiveType()}
instead.
*/
@Deprecated
+ @JsonIgnore
public PrimitiveTypeName getType() {
return type.getPrimitiveTypeName();
}
diff --git
a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/metadata/FileMetaData.java
b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/metadata/FileMetaData.java
index c608cd405..4143dd805 100644
---
a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/metadata/FileMetaData.java
+++
b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/metadata/FileMetaData.java
@@ -20,6 +20,7 @@ package org.apache.parquet.hadoop.metadata;
import static java.util.Collections.unmodifiableMap;
+import com.fasterxml.jackson.annotation.JsonIgnore;
import java.io.Serializable;
import java.util.Map;
import java.util.Objects;
@@ -109,6 +110,7 @@ public final class FileMetaData implements Serializable {
return createdBy;
}
+ @JsonIgnore
public InternalFileDecryptor getFileDecryptor() {
return fileDecryptor;
}
diff --git
a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/metadata/ParquetMetadata.java
b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/metadata/ParquetMetadata.java
index e30e872a6..640ecfba1 100755
---
a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/metadata/ParquetMetadata.java
+++
b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/metadata/ParquetMetadata.java
@@ -19,6 +19,9 @@
package org.apache.parquet.hadoop.metadata;
import com.fasterxml.jackson.databind.ObjectMapper;
+import com.fasterxml.jackson.databind.ObjectWriter;
+import com.fasterxml.jackson.databind.SerializationFeature;
+import com.fasterxml.jackson.datatype.jdk8.Jdk8Module;
import java.io.IOException;
import java.io.StringReader;
import java.io.StringWriter;
@@ -32,6 +35,14 @@ public class ParquetMetadata {
private static final ObjectMapper objectMapper = new ObjectMapper();
+ static {
+ // Enable FAIL_ON_EMPTY_BEANS on objectmapper. Without this feature
parquet-casdacing tests fail,
+ // because LogicalTypeAnnotation implementations are classes without any
property.
+ objectMapper.disable(SerializationFeature.FAIL_ON_EMPTY_BEANS);
+ // Add support for Java 8 Optional
+ objectMapper.registerModule(new Jdk8Module());
+ }
+
/**
* @param parquetMetaData an instance of parquet metadata to convert
* @return the json representation
@@ -50,19 +61,23 @@ public class ParquetMetadata {
private static String toJSON(ParquetMetadata parquetMetaData, boolean
isPrettyPrint) {
try (StringWriter stringWriter = new StringWriter()) {
+ Object objectToPrint;
+ if (parquetMetaData.getFileMetaData() == null
+ || parquetMetaData.getFileMetaData().getEncryptionType()
+ == FileMetaData.EncryptionType.UNENCRYPTED) {
+ objectToPrint = parquetMetaData;
+ } else {
+ objectToPrint = parquetMetaData.getFileMetaData();
+ }
+
+ ObjectWriter writer;
if (isPrettyPrint) {
- Object objectToPrint;
- if (parquetMetaData.getFileMetaData() == null
- || parquetMetaData.getFileMetaData().getEncryptionType()
- == FileMetaData.EncryptionType.UNENCRYPTED) {
- objectToPrint = parquetMetaData;
- } else {
- objectToPrint = parquetMetaData.getFileMetaData();
- }
- objectMapper.writerWithDefaultPrettyPrinter().writeValue(stringWriter,
objectToPrint);
+ writer = objectMapper.writerWithDefaultPrettyPrinter();
} else {
- objectMapper.writeValue(stringWriter, parquetMetaData);
+ writer = objectMapper.writer();
}
+
+ writer.writeValue(stringWriter, objectToPrint);
return stringWriter.toString();
} catch (IOException e) {
throw new RuntimeException(e);
diff --git
a/parquet-hadoop/src/test/java/org/apache/parquet/format/converter/TestParquetMetadataConverter.java
b/parquet-hadoop/src/test/java/org/apache/parquet/format/converter/TestParquetMetadataConverter.java
index 4dcede624..2cffb5186 100644
---
a/parquet-hadoop/src/test/java/org/apache/parquet/format/converter/TestParquetMetadataConverter.java
+++
b/parquet-hadoop/src/test/java/org/apache/parquet/format/converter/TestParquetMetadataConverter.java
@@ -87,6 +87,10 @@ import org.apache.parquet.column.statistics.IntStatistics;
import org.apache.parquet.column.statistics.LongStatistics;
import org.apache.parquet.column.statistics.SizeStatistics;
import org.apache.parquet.column.statistics.Statistics;
+import org.apache.parquet.crypto.DecryptionPropertiesFactory;
+import org.apache.parquet.crypto.EncryptionPropertiesFactory;
+import org.apache.parquet.crypto.FileDecryptionProperties;
+import org.apache.parquet.crypto.InternalFileDecryptor;
import org.apache.parquet.example.Paper;
import org.apache.parquet.example.data.Group;
import org.apache.parquet.example.data.simple.SimpleGroup;
@@ -635,11 +639,16 @@ public class TestParquetMetadataConverter {
}
@Test
- public void testNullFieldMetadataDebugLogging() {
+ public void testFieldMetadataDebugLogging() {
MessageType schema = parseMessageType("message test { optional binary
some_null_field; }");
org.apache.parquet.hadoop.metadata.FileMetaData fileMetaData =
- new org.apache.parquet.hadoop.metadata.FileMetaData(schema, new
HashMap<String, String>(), null);
- List<BlockMetaData> blockMetaDataList = new ArrayList<BlockMetaData>();
+ new org.apache.parquet.hadoop.metadata.FileMetaData(
+ schema,
+ new HashMap<>(),
+ null,
+
org.apache.parquet.hadoop.metadata.FileMetaData.EncryptionType.UNENCRYPTED,
+ null);
+ List<BlockMetaData> blockMetaDataList = new ArrayList<>();
BlockMetaData blockMetaData = new BlockMetaData();
blockMetaData.addColumn(createColumnChunkMetaData());
blockMetaDataList.add(blockMetaData);
@@ -647,6 +656,32 @@ public class TestParquetMetadataConverter {
ParquetMetadata.toJSON(metadata);
}
+ @Test
+ public void testEncryptedFieldMetadataDebugLogging() {
+ Configuration conf = new Configuration();
+ conf.set(
+ EncryptionPropertiesFactory.CRYPTO_FACTORY_CLASS_PROPERTY_NAME,
+ "org.apache.parquet.crypto.SampleDecryptionPropertiesFactory");
+ DecryptionPropertiesFactory decryptionPropertiesFactory =
DecryptionPropertiesFactory.loadFactory(conf);
+ FileDecryptionProperties decryptionProperties =
+ decryptionPropertiesFactory.getFileDecryptionProperties(conf, null);
+
+ MessageType schema = parseMessageType("message test { optional binary
some_null_field; }");
+
+ org.apache.parquet.hadoop.metadata.FileMetaData fileMetaData =
+ new org.apache.parquet.hadoop.metadata.FileMetaData(
+ schema,
+ new HashMap<>(),
+ null,
+
org.apache.parquet.hadoop.metadata.FileMetaData.EncryptionType.ENCRYPTED_FOOTER,
+ new InternalFileDecryptor(decryptionProperties));
+
+ List<BlockMetaData> blockMetaDataList = new ArrayList<>();
+ ParquetMetadata metadata = new ParquetMetadata(fileMetaData,
blockMetaDataList);
+ ParquetMetadata.toJSON(metadata);
+ System.out.println(ParquetMetadata.toPrettyJSON(metadata));
+ }
+
@Test
public void testMetadataToJson() {
ParquetMetadata metadata = new ParquetMetadata(null, null);
diff --git a/parquet-jackson/pom.xml b/parquet-jackson/pom.xml
index 6bd860fd8..22453aae1 100644
--- a/parquet-jackson/pom.xml
+++ b/parquet-jackson/pom.xml
@@ -38,11 +38,22 @@
<artifactId>jackson-core</artifactId>
<version>${jackson.version}</version>
</dependency>
+ <dependency>
+ <groupId>${jackson.groupId}</groupId>
+ <artifactId>jackson-annotations</artifactId>
+ <version>${jackson.version}</version>
+ </dependency>
<dependency>
<groupId>com.fasterxml.jackson.core</groupId>
<artifactId>jackson-databind</artifactId>
<version>${jackson-databind.version}</version>
</dependency>
+ <!-- Add support for Java 8 Optional -->
+ <dependency>
+ <groupId>com.fasterxml.jackson.datatype</groupId>
+ <artifactId>jackson-datatype-jdk8</artifactId>
+ <version>${jackson-modules-java8.version}</version>
+ </dependency>
</dependencies>
<properties>
@@ -70,6 +81,7 @@
<artifactSet>
<includes>
<include>${jackson.groupId}:*</include>
+ <include>${jackson.datatype.groupId}:*</include>
</includes>
</artifactSet>
<filters>
@@ -79,6 +91,12 @@
<include>**</include>
</includes>
</filter>
+ <filter>
+ <artifact>${jackson.datatype.groupId}:*</artifact>
+ <includes>
+ <include>**</include>
+ </includes>
+ </filter>
</filters>
<relocations>
<relocation>
diff --git a/pom.xml b/pom.xml
index 59ad34d94..73d89ebb5 100644
--- a/pom.xml
+++ b/pom.xml
@@ -68,6 +68,7 @@
<jackson.package>com.fasterxml.jackson</jackson.package>
<jackson.version>2.17.0</jackson.version>
<jackson-databind.version>2.17.0</jackson-databind.version>
+ <jackson-modules-java8.version>2.17.0</jackson-modules-java8.version>
<japicmp.version>0.21.0</japicmp.version>
<javax.annotation.version>1.3.2</javax.annotation.version>
<spotless.version>2.30.0</spotless.version>