This is an automated email from the ASF dual-hosted git repository.

fokko pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/parquet-mr.git


The following commit(s) were added to refs/heads/master by this push:
     new 9782d636b Modest refactor of ParquetFileWriter (#1146)
9782d636b is described below

commit 9782d636bb9ca18ee822fec357e3aac3f3fad412
Author: Fokko Driesprong <[email protected]>
AuthorDate: Fri Sep 22 12:36:11 2023 +0200

    Modest refactor of ParquetFileWriter (#1146)
    
    IDEA was lighting up as a christmas tree :)
---
 .../apache/parquet/hadoop/ParquetFileWriter.java   | 29 +++++++++-------------
 1 file changed, 12 insertions(+), 17 deletions(-)

diff --git 
a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ParquetFileWriter.java 
b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ParquetFileWriter.java
index 7ec38ee24..80b9907a2 100644
--- 
a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ParquetFileWriter.java
+++ 
b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ParquetFileWriter.java
@@ -28,7 +28,6 @@ import java.io.ByteArrayOutputStream;
 import java.io.IOException;
 import java.nio.charset.StandardCharsets;
 import java.util.ArrayList;
-import java.util.Arrays;
 import java.util.HashMap;
 import java.util.HashSet;
 import java.util.LinkedHashSet;
@@ -122,7 +121,7 @@ public class ParquetFileWriter {
   private final int columnIndexTruncateLength;
 
   // file data
-  private List<BlockMetaData> blocks = new ArrayList<BlockMetaData>();
+  private final List<BlockMetaData> blocks = new ArrayList<BlockMetaData>();
 
   // The column/offset indexes per blocks per column chunks
   private final List<List<ColumnIndex>> columnIndexes = new ArrayList<>();
@@ -148,11 +147,11 @@ public class ParquetFileWriter {
   private long currentRecordCount; // set in startBlock
 
   // column chunk data accumulated as pages are written
-  private EncodingStats.Builder encodingStatsBuilder;
+  private final EncodingStats.Builder encodingStatsBuilder;
   private Set<Encoding> currentEncodings;
   private long uncompressedLength;
   private long compressedLength;
-  private Statistics currentStatistics; // accumulated in writePage(s)
+  private Statistics<?> currentStatistics; // accumulated in writePage(s)
   private ColumnIndexBuilder columnIndexBuilder;
   private OffsetIndexBuilder offsetIndexBuilder;
 
@@ -168,7 +167,7 @@ public class ParquetFileWriter {
   private ParquetMetadata footer = null;
 
   private final CRC32 crc;
-  private boolean pageWriteChecksumEnabled;
+  private final boolean pageWriteChecksumEnabled;
 
   /**
    * Captures the order in which methods should be called
@@ -372,7 +371,7 @@ public class ParquetFileWriter {
           StringBuilder columnList = new StringBuilder();
           columnList.append("[");
           for (String[] columnPath : schema.getPaths()) {
-            columnList.append(ColumnPath.get(columnPath).toDotString() + "], 
[");
+            
columnList.append(ColumnPath.get(columnPath).toDotString()).append("], [");
           }
           throw new ParquetCryptoRuntimeException("Encrypted column [" + 
entry.getKey().toDotString() +
             "] not in file schema column list: " + columnList.substring(0, 
columnList.length() - 3));
@@ -590,7 +589,7 @@ public class ParquetFileWriter {
   public void writeDataPage(
       int valueCount, int uncompressedPageSize,
       BytesInput bytes,
-      Statistics statistics,
+      Statistics<?> statistics,
       Encoding rlEncoding,
       Encoding dlEncoding,
       Encoding valuesEncoding) throws IOException {
@@ -615,7 +614,7 @@ public class ParquetFileWriter {
   public void writeDataPage(
     int valueCount, int uncompressedPageSize,
     BytesInput bytes,
-    Statistics statistics,
+    Statistics<?> statistics,
     long rowCount,
     Encoding rlEncoding,
     Encoding dlEncoding,
@@ -640,7 +639,7 @@ public class ParquetFileWriter {
   public void writeDataPage(
       int valueCount, int uncompressedPageSize,
       BytesInput bytes,
-      Statistics statistics,
+      Statistics<?> statistics,
       long rowCount,
       Encoding rlEncoding,
       Encoding dlEncoding,
@@ -655,7 +654,7 @@ public class ParquetFileWriter {
   private void innerWriteDataPage(
       int valueCount, int uncompressedPageSize,
       BytesInput bytes,
-      Statistics statistics,
+      Statistics<?> statistics,
       Encoding rlEncoding,
       Encoding dlEncoding,
       Encoding valuesEncoding,
@@ -680,7 +679,7 @@ public class ParquetFileWriter {
   public void writeDataPage(
     int valueCount, int uncompressedPageSize,
     BytesInput bytes,
-    Statistics statistics,
+    Statistics<?> statistics,
     Encoding rlEncoding,
     Encoding dlEncoding,
     Encoding valuesEncoding,
@@ -1608,12 +1607,8 @@ public class ParquetFileWriter {
       schema = mergeInto(toMerge.getSchema(), schema, strict);
     }
     for (Entry<String, String> entry : 
toMerge.getKeyValueMetaData().entrySet()) {
-      Set<String> values = newKeyValues.get(entry.getKey());
-      if (values == null) {
-        values = new LinkedHashSet<String>();
-        newKeyValues.put(entry.getKey(), values);
-      }
-      values.add(entry.getValue());
+        Set<String> values = newKeyValues.computeIfAbsent(entry.getKey(), k -> 
new LinkedHashSet<String>());
+        values.add(entry.getValue());
     }
     createdBy.add(toMerge.getCreatedBy());
     return new GlobalMetaData(

Reply via email to