This is an automated email from the ASF dual-hosted git repository.

gangwu pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/parquet-mr.git


The following commit(s) were added to refs/heads/master by this push:
     new 3781dfe78 PARQUET-2428: Make RawPagesReader accept specified columns. 
(#1269)
3781dfe78 is described below

commit 3781dfe78979997308809e9e9ca19bed31c4eb9c
Author: Yujiang Zhong <[email protected]>
AuthorDate: Sun Feb 18 13:36:55 2024 +0800

    PARQUET-2428: Make RawPagesReader accept specified columns. (#1269)
---
 .../org/apache/parquet/cli/commands/ShowPagesCommand.java   |  2 +-
 .../org/apache/parquet/cli/rawpages/RawPagesReader.java     | 13 +++++++++++++
 2 files changed, 14 insertions(+), 1 deletion(-)

diff --git 
a/parquet-cli/src/main/java/org/apache/parquet/cli/commands/ShowPagesCommand.java
 
b/parquet-cli/src/main/java/org/apache/parquet/cli/commands/ShowPagesCommand.java
index 6f6e67d82..faee61815 100644
--- 
a/parquet-cli/src/main/java/org/apache/parquet/cli/commands/ShowPagesCommand.java
+++ 
b/parquet-cli/src/main/java/org/apache/parquet/cli/commands/ShowPagesCommand.java
@@ -83,7 +83,7 @@ public class ShowPagesCommand extends BaseCommand {
     // command.
     if (raw) {
       try (RawPagesReader reader =
-          new 
RawPagesReader(HadoopInputFile.fromPath(qualifiedPath(targets.get(0)), 
getConf()))) {
+          new 
RawPagesReader(HadoopInputFile.fromPath(qualifiedPath(targets.get(0)), 
getConf()), columns)) {
         reader.listPages(console);
       }
       return 0;
diff --git 
a/parquet-cli/src/main/java/org/apache/parquet/cli/rawpages/RawPagesReader.java 
b/parquet-cli/src/main/java/org/apache/parquet/cli/rawpages/RawPagesReader.java
index d53fea539..febd94eba 100644
--- 
a/parquet-cli/src/main/java/org/apache/parquet/cli/rawpages/RawPagesReader.java
+++ 
b/parquet-cli/src/main/java/org/apache/parquet/cli/rawpages/RawPagesReader.java
@@ -21,6 +21,9 @@ package org.apache.parquet.cli.rawpages;
 import static org.apache.parquet.hadoop.ParquetFileWriter.MAGIC;
 
 import java.io.IOException;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Set;
 import org.apache.parquet.cli.util.RawUtils;
 import org.apache.parquet.format.CliUtils;
 import org.apache.parquet.format.ColumnChunk;
@@ -37,8 +40,13 @@ public class RawPagesReader implements AutoCloseable {
 
   private final SeekableInputStream input;
   private final FileMetaData footer;
+  private final Set<String> columns;
 
   public RawPagesReader(InputFile file) throws IOException {
+    this(file, null);
+  }
+
+  public RawPagesReader(InputFile file, List<String> cols) throws IOException {
     long fileLen = file.getLength();
 
     if (fileLen < MAGIC.length + 4 + MAGIC.length) {
@@ -47,6 +55,7 @@ public class RawPagesReader implements AutoCloseable {
 
     input = file.newStream();
     footer = RawUtils.readFooter(input, fileLen);
+    columns = cols == null || cols.isEmpty() ? null : new HashSet<>(cols);
   }
 
   public void listPages(Logger console) throws IOException {
@@ -55,6 +64,10 @@ public class RawPagesReader implements AutoCloseable {
       for (ColumnChunk columnChunk : rowGroup.getColumns()) {
         ColumnMetaData metaData = columnChunk.getMeta_data();
         String path = String.join(".", metaData.getPath_in_schema());
+        if (columns != null && !columns.contains(path)) {
+          continue;
+        }
+
         long totalSize = metaData.getTotal_compressed_size();
         long dictOffset = metaData.getDictionary_page_offset();
         long seekTo = metaData.getData_page_offset();

Reply via email to