This is an automated email from the ASF dual-hosted git repository.

fokko pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/parquet-mr.git


The following commit(s) were added to refs/heads/master by this push:
     new 5e115b1fb PARQUET-2283: Remove Hadoop HiddenFileFilter (#1072)
5e115b1fb is described below

commit 5e115b1fb4581cbd0a90aa4a8970eb1694082075
Author: Fokko Driesprong <[email protected]>
AuthorDate: Tue Apr 18 09:41:27 2023 +0200

    PARQUET-2283: Remove Hadoop HiddenFileFilter (#1072)
    
    For Iceberg/Flink we would like to run without the hadoop dependencies.
    The use of the HiddenFileFilter is blocking this. This replaces the filter
    with a nice stream.
---
 .../apache/parquet/hadoop/ParquetFileReader.java   | 25 +++++++++++++++-------
 1 file changed, 17 insertions(+), 8 deletions(-)

diff --git 
a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ParquetFileReader.java 
b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ParquetFileReader.java
index 7fa71cb61..b50149cdb 100644
--- 
a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ParquetFileReader.java
+++ 
b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ParquetFileReader.java
@@ -53,6 +53,8 @@ import java.util.concurrent.ExecutionException;
 import java.util.concurrent.ExecutorService;
 import java.util.concurrent.Executors;
 import java.util.concurrent.Future;
+import java.util.stream.Collectors;
+import java.util.stream.Stream;
 import java.util.zip.CRC32;
 
 import org.apache.hadoop.conf.Configuration;
@@ -99,7 +101,6 @@ import org.apache.parquet.hadoop.metadata.ColumnPath;
 import org.apache.parquet.hadoop.metadata.FileMetaData;
 import org.apache.parquet.hadoop.metadata.ParquetMetadata;
 import org.apache.parquet.hadoop.util.HadoopInputFile;
-import org.apache.parquet.hadoop.util.HiddenFileFilter;
 import org.apache.parquet.hadoop.util.counters.BenchmarkCounter;
 import org.apache.parquet.internal.column.columnindex.ColumnIndex;
 import org.apache.parquet.internal.column.columnindex.OffsetIndex;
@@ -374,17 +375,25 @@ public class ParquetFileReader implements Closeable {
     return readAllFootersInParallelUsingSummaryFiles(configuration, files, 
skipRowGroups);
   }
 
+  static boolean filterHiddenFiles(FileStatus file) {
+    final char c = file.getPath().getName().charAt(0);
+    return c != '.' && c != '_';
+  }
+
   private static List<FileStatus> listFiles(Configuration conf, FileStatus 
fileStatus) throws IOException {
     if (fileStatus.isDir()) {
       FileSystem fs = fileStatus.getPath().getFileSystem(conf);
-      FileStatus[] list = fs.listStatus(fileStatus.getPath(), 
HiddenFileFilter.INSTANCE);
-      List<FileStatus> result = new ArrayList<FileStatus>();
-      for (FileStatus sub : list) {
-        result.addAll(listFiles(conf, sub));
-      }
-      return result;
+      return Arrays.stream(fs.listStatus(fileStatus.getPath()))
+        .filter(ParquetFileReader::filterHiddenFiles)
+        .flatMap(sub -> {
+          try {
+            return listFiles(conf, sub).stream();
+          } catch (IOException e) {
+            throw new RuntimeException(e);
+          }
+        }).collect(Collectors.toList());
     } else {
-      return Arrays.asList(fileStatus);
+      return Collections.singletonList(fileStatus);
     }
   }
 

Reply via email to