This is an automated email from the ASF dual-hosted git repository.

junhao pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/paimon.git


The following commit(s) were added to refs/heads/master by this push:
     new f2cae5d5c3 [core] Reduce useless getFileStatus for Parquet Reader 
(#5217)
f2cae5d5c3 is described below

commit f2cae5d5c312f78f73029dd7359b05a6324de48e
Author: Jingsong Lee <jingsongl...@gmail.com>
AuthorDate: Wed Mar 5 18:16:40 2025 +0800

    [core] Reduce useless getFileStatus for Parquet Reader (#5217)
---
 .../apache/paimon/format/SimpleStatsExtractor.java |  4 ++--
 .../org/apache/paimon/fs/local/LocalFileIO.java    | 14 ++++++++++++
 .../paimon/format/SimpleColStatsExtractorTest.java |  2 +-
 .../apache/paimon/io/KeyValueDataFileWriter.java   |  6 ++++--
 .../org/apache/paimon/io/RowDataFileWriter.java    |  6 ++++--
 .../paimon/io/StatsCollectingSingleFileWriter.java |  4 ++--
 .../org/apache/paimon/migrate/FileMetaUtils.java   |  2 +-
 .../test/java/org/apache/paimon/SnapshotTest.java  | 25 ++++++++++++++++++++++
 .../paimon/stats/TestSimpleStatsExtractor.java     |  8 +++----
 .../format/avro/AvroSimpleStatsExtractor.java      | 10 ++++-----
 .../format/orc/filter/OrcSimpleStatsExtractor.java |  8 +++----
 .../paimon/format/parquet/ParquetInputFile.java    | 22 ++++++++++---------
 .../format/parquet/ParquetReaderFactory.java       |  6 ++++--
 .../parquet/ParquetSimpleStatsExtractor.java       | 10 ++++-----
 .../apache/paimon/format/parquet/ParquetUtil.java  | 19 +++++++++-------
 .../format/parquet/ParquetFormatReadWriteTest.java |  5 +++--
 16 files changed, 100 insertions(+), 51 deletions(-)

diff --git 
a/paimon-common/src/main/java/org/apache/paimon/format/SimpleStatsExtractor.java
 
b/paimon-common/src/main/java/org/apache/paimon/format/SimpleStatsExtractor.java
index a2d599aaab..9ffbf495e5 100644
--- 
a/paimon-common/src/main/java/org/apache/paimon/format/SimpleStatsExtractor.java
+++ 
b/paimon-common/src/main/java/org/apache/paimon/format/SimpleStatsExtractor.java
@@ -27,9 +27,9 @@ import java.io.IOException;
 /** Extracts statistics directly from file. */
 public interface SimpleStatsExtractor {
 
-    SimpleColStats[] extract(FileIO fileIO, Path path) throws IOException;
+    SimpleColStats[] extract(FileIO fileIO, Path path, long length) throws 
IOException;
 
-    Pair<SimpleColStats[], FileInfo> extractWithFileInfo(FileIO fileIO, Path 
path)
+    Pair<SimpleColStats[], FileInfo> extractWithFileInfo(FileIO fileIO, Path 
path, long length)
             throws IOException;
 
     /** File info fetched from physical file. */
diff --git 
a/paimon-common/src/main/java/org/apache/paimon/fs/local/LocalFileIO.java 
b/paimon-common/src/main/java/org/apache/paimon/fs/local/LocalFileIO.java
index ac84c242c5..0b1c4ad260 100644
--- a/paimon-common/src/main/java/org/apache/paimon/fs/local/LocalFileIO.java
+++ b/paimon-common/src/main/java/org/apache/paimon/fs/local/LocalFileIO.java
@@ -26,6 +26,9 @@ import org.apache.paimon.fs.PositionOutputStream;
 import org.apache.paimon.fs.SeekableInputStream;
 import org.apache.paimon.fs.VectoredReadable;
 
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
 import java.io.File;
 import java.io.FileInputStream;
 import java.io.FileNotFoundException;
@@ -49,6 +52,8 @@ import static 
org.apache.paimon.utils.Preconditions.checkState;
 /** {@link FileIO} for local file. */
 public class LocalFileIO implements FileIO {
 
+    private static final Logger LOG = 
LoggerFactory.getLogger(LocalFileIO.class);
+
     private static final long serialVersionUID = 1L;
 
     // the lock to ensure atomic renaming
@@ -68,11 +73,13 @@ public class LocalFileIO implements FileIO {
 
     @Override
     public SeekableInputStream newInputStream(Path path) throws IOException {
+        LOG.debug("Invoking newInputStream for {}", path);
         return new LocalSeekableInputStream(toFile(path));
     }
 
     @Override
     public PositionOutputStream newOutputStream(Path path, boolean overwrite) 
throws IOException {
+        LOG.debug("Invoking newOutputStream for {}", path);
         if (exists(path) && !overwrite) {
             throw new FileAlreadyExistsException("File already exists: " + 
path);
         }
@@ -87,6 +94,7 @@ public class LocalFileIO implements FileIO {
 
     @Override
     public FileStatus getFileStatus(Path path) throws IOException {
+        LOG.debug("Invoking getFileStatus for {}", path);
         final File file = toFile(path);
         if (file.exists()) {
             return new LocalFileStatus(file, SCHEME);
@@ -103,6 +111,7 @@ public class LocalFileIO implements FileIO {
 
     @Override
     public FileStatus[] listStatus(Path path) throws IOException {
+        LOG.debug("Invoking listStatus for {}", path);
         final File file = toFile(path);
         FileStatus[] results = new FileStatus[0];
 
@@ -133,11 +142,13 @@ public class LocalFileIO implements FileIO {
 
     @Override
     public boolean exists(Path path) throws IOException {
+        LOG.debug("Invoking exists for {}", path);
         return toFile(path).exists();
     }
 
     @Override
     public boolean delete(Path path, boolean recursive) throws IOException {
+        LOG.debug("Invoking delete for {}", path);
         File file = toFile(path);
         if (file.isFile()) {
             return file.delete();
@@ -175,6 +186,7 @@ public class LocalFileIO implements FileIO {
 
     @Override
     public boolean mkdirs(Path path) throws IOException {
+        LOG.debug("Invoking mkdirs for {}", path);
         return mkdirsInternal(toFile(path));
     }
 
@@ -196,6 +208,7 @@ public class LocalFileIO implements FileIO {
 
     @Override
     public boolean rename(Path src, Path dst) throws IOException {
+        LOG.debug("Invoking rename for {} to {}", src, dst);
         File srcFile = toFile(src);
         File dstFile = toFile(dst);
         File dstParent = dstFile.getParentFile();
@@ -219,6 +232,7 @@ public class LocalFileIO implements FileIO {
 
     @Override
     public void copyFile(Path sourcePath, Path targetPath, boolean overwrite) 
throws IOException {
+        LOG.debug("Invoking copyFile for {} to {}", sourcePath, targetPath);
         if (!overwrite && exists(targetPath)) {
             return;
         }
diff --git 
a/paimon-common/src/test/java/org/apache/paimon/format/SimpleColStatsExtractorTest.java
 
b/paimon-common/src/test/java/org/apache/paimon/format/SimpleColStatsExtractorTest.java
index ddb3da7a63..c12bc3215d 100644
--- 
a/paimon-common/src/test/java/org/apache/paimon/format/SimpleColStatsExtractorTest.java
+++ 
b/paimon-common/src/test/java/org/apache/paimon/format/SimpleColStatsExtractorTest.java
@@ -99,7 +99,7 @@ public abstract class SimpleColStatsExtractorTest {
 
         SimpleStatsExtractor extractor = format.createStatsExtractor(rowType, 
stats).get();
         assertThat(extractor).isNotNull();
-        SimpleColStats[] actual = extractor.extract(fileIO, path);
+        SimpleColStats[] actual = extractor.extract(fileIO, path, 
fileIO.getFileSize(path));
         for (int i = 0; i < expected.length; i++) {
             expected[i] = regenerate(expected[i], rowType.getTypeAt(i));
         }
diff --git 
a/paimon-core/src/main/java/org/apache/paimon/io/KeyValueDataFileWriter.java 
b/paimon-core/src/main/java/org/apache/paimon/io/KeyValueDataFileWriter.java
index 3c7f6b45bb..e655f0ed3a 100644
--- a/paimon-core/src/main/java/org/apache/paimon/io/KeyValueDataFileWriter.java
+++ b/paimon-core/src/main/java/org/apache/paimon/io/KeyValueDataFileWriter.java
@@ -169,7 +169,9 @@ public abstract class KeyValueDataFileWriter
             return null;
         }
 
-        Pair<SimpleColStats[], SimpleColStats[]> keyValueStats = 
fetchKeyValueStats(fieldStats());
+        long fileSize = fileIO.getFileSize(path);
+        Pair<SimpleColStats[], SimpleColStats[]> keyValueStats =
+                fetchKeyValueStats(fieldStats(fileSize));
 
         SimpleStats keyStats = 
keyStatsConverter.toBinaryAllMode(keyValueStats.getKey());
         Pair<List<String>, SimpleStats> valueStatsPair =
@@ -183,7 +185,7 @@ public abstract class KeyValueDataFileWriter
         String externalPath = isExternalPath ? path.toString() : null;
         return new DataFileMeta(
                 path.getName(),
-                fileIO.getFileSize(path),
+                fileSize,
                 recordCount(),
                 minKey,
                 keySerializer.toBinaryRow(maxKey).copy(),
diff --git 
a/paimon-core/src/main/java/org/apache/paimon/io/RowDataFileWriter.java 
b/paimon-core/src/main/java/org/apache/paimon/io/RowDataFileWriter.java
index 25906e2dfa..a21041a3ab 100644
--- a/paimon-core/src/main/java/org/apache/paimon/io/RowDataFileWriter.java
+++ b/paimon-core/src/main/java/org/apache/paimon/io/RowDataFileWriter.java
@@ -109,7 +109,9 @@ public class RowDataFileWriter extends 
StatsCollectingSingleFileWriter<InternalR
 
     @Override
     public DataFileMeta result() throws IOException {
-        Pair<List<String>, SimpleStats> statsPair = 
statsArraySerializer.toBinary(fieldStats());
+        long fileSize = fileIO.getFileSize(path);
+        Pair<List<String>, SimpleStats> statsPair =
+                statsArraySerializer.toBinary(fieldStats(fileSize));
         DataFileIndexWriter.FileIndexResult indexResult =
                 dataFileIndexWriter == null
                         ? DataFileIndexWriter.EMPTY_RESULT
@@ -117,7 +119,7 @@ public class RowDataFileWriter extends 
StatsCollectingSingleFileWriter<InternalR
         String externalPath = isExternalPath ? path.toString() : null;
         return DataFileMeta.forAppend(
                 path.getName(),
-                fileIO.getFileSize(path),
+                fileSize,
                 recordCount(),
                 statsPair.getRight(),
                 seqNumCounter.getValue() - super.recordCount(),
diff --git 
a/paimon-core/src/main/java/org/apache/paimon/io/StatsCollectingSingleFileWriter.java
 
b/paimon-core/src/main/java/org/apache/paimon/io/StatsCollectingSingleFileWriter.java
index 67a3fa6d1a..07fc26d26f 100644
--- 
a/paimon-core/src/main/java/org/apache/paimon/io/StatsCollectingSingleFileWriter.java
+++ 
b/paimon-core/src/main/java/org/apache/paimon/io/StatsCollectingSingleFileWriter.java
@@ -96,13 +96,13 @@ public abstract class StatsCollectingSingleFileWriter<T, R> 
extends SingleFileWr
         super.writeBundle(bundle);
     }
 
-    public SimpleColStats[] fieldStats() throws IOException {
+    public SimpleColStats[] fieldStats(long fileSize) throws IOException {
         Preconditions.checkState(closed, "Cannot access metric unless the 
writer is closed.");
         if (simpleStatsExtractor != null) {
             if (isStatsDisabled) {
                 return noneStats;
             } else {
-                return simpleStatsExtractor.extract(fileIO, path);
+                return simpleStatsExtractor.extract(fileIO, path, fileSize);
             }
         } else {
             return simpleStatsCollector.extract();
diff --git 
a/paimon-core/src/main/java/org/apache/paimon/migrate/FileMetaUtils.java 
b/paimon-core/src/main/java/org/apache/paimon/migrate/FileMetaUtils.java
index 405870d5fa..a960ce7575 100644
--- a/paimon-core/src/main/java/org/apache/paimon/migrate/FileMetaUtils.java
+++ b/paimon-core/src/main/java/org/apache/paimon/migrate/FileMetaUtils.java
@@ -218,7 +218,7 @@ public class FileMetaUtils {
         SimpleStatsConverter statsArraySerializer = new 
SimpleStatsConverter(rowTypeWithSchemaId);
 
         Pair<SimpleColStats[], SimpleStatsExtractor.FileInfo> fileInfo =
-                simpleStatsExtractor.extractWithFileInfo(fileIO, path);
+                simpleStatsExtractor.extractWithFileInfo(fileIO, path, 
fileSize);
         SimpleStats stats = 
statsArraySerializer.toBinaryAllMode(fileInfo.getLeft());
 
         return DataFileMeta.forAppend(
diff --git a/paimon-core/src/test/java/org/apache/paimon/SnapshotTest.java 
b/paimon-core/src/test/java/org/apache/paimon/SnapshotTest.java
index 41f5e2dd95..1cceeffbfa 100644
--- a/paimon-core/src/test/java/org/apache/paimon/SnapshotTest.java
+++ b/paimon-core/src/test/java/org/apache/paimon/SnapshotTest.java
@@ -26,6 +26,7 @@ import org.apache.paimon.utils.SnapshotManager;
 import org.junit.jupiter.api.Test;
 
 import static 
org.apache.paimon.utils.FileSystemBranchManager.DEFAULT_MAIN_BRANCH;
+import static org.assertj.core.api.Assertions.assertThat;
 
 /** Test for snapshots. */
 public class SnapshotTest {
@@ -50,6 +51,30 @@ public class SnapshotTest {
                         + "}");
     }
 
+    @Test
+    public void testSnapshotWithSizes() {
+        String json =
+                "{\n"
+                        + "  \"version\" : 3,\n"
+                        + "  \"id\" : 5,\n"
+                        + "  \"schemaId\" : 0,\n"
+                        + "  \"baseManifestList\" : null,\n"
+                        + "  \"baseManifestListSize\" : 6,\n"
+                        + "  \"deltaManifestList\" : null,\n"
+                        + "  \"deltaManifestListSize\" : 8,\n"
+                        + "  \"changelogManifestListSize\" : 10,\n"
+                        + "  \"commitUser\" : null,\n"
+                        + "  \"commitIdentifier\" : 0,\n"
+                        + "  \"commitKind\" : \"APPEND\",\n"
+                        + "  \"timeMillis\" : 1234,\n"
+                        + "  \"totalRecordCount\" : null,\n"
+                        + "  \"deltaRecordCount\" : null,\n"
+                        + "  \"unknownKey\" : 22222\n"
+                        + "}";
+        Snapshot snapshot = Snapshot.fromJson(json);
+        assertThat(Snapshot.fromJson(snapshot.toJson())).isEqualTo(snapshot);
+    }
+
     public static SnapshotManager newSnapshotManager(FileIO fileIO, Path 
tablePath) {
         return newSnapshotManager(fileIO, tablePath, DEFAULT_MAIN_BRANCH);
     }
diff --git 
a/paimon-core/src/test/java/org/apache/paimon/stats/TestSimpleStatsExtractor.java
 
b/paimon-core/src/test/java/org/apache/paimon/stats/TestSimpleStatsExtractor.java
index a410fd6e47..96efc4d11a 100644
--- 
a/paimon-core/src/test/java/org/apache/paimon/stats/TestSimpleStatsExtractor.java
+++ 
b/paimon-core/src/test/java/org/apache/paimon/stats/TestSimpleStatsExtractor.java
@@ -59,13 +59,13 @@ public class TestSimpleStatsExtractor implements 
SimpleStatsExtractor {
     }
 
     @Override
-    public SimpleColStats[] extract(FileIO fileIO, Path path) throws 
IOException {
-        return extractWithFileInfo(fileIO, path).getLeft();
+    public SimpleColStats[] extract(FileIO fileIO, Path path, long length) 
throws IOException {
+        return extractWithFileInfo(fileIO, path, length).getLeft();
     }
 
     @Override
-    public Pair<SimpleColStats[], FileInfo> extractWithFileInfo(FileIO fileIO, 
Path path)
-            throws IOException {
+    public Pair<SimpleColStats[], FileInfo> extractWithFileInfo(
+            FileIO fileIO, Path path, long length) throws IOException {
         IdentityObjectSerializer serializer = new 
IdentityObjectSerializer(rowType);
         FormatReaderFactory readerFactory = 
format.createReaderFactory(rowType);
         List<InternalRow> records = readListFromFile(fileIO, path, serializer, 
readerFactory);
diff --git 
a/paimon-format/src/main/java/org/apache/paimon/format/avro/AvroSimpleStatsExtractor.java
 
b/paimon-format/src/main/java/org/apache/paimon/format/avro/AvroSimpleStatsExtractor.java
index 6539de25c3..16e014dd5d 100644
--- 
a/paimon-format/src/main/java/org/apache/paimon/format/avro/AvroSimpleStatsExtractor.java
+++ 
b/paimon-format/src/main/java/org/apache/paimon/format/avro/AvroSimpleStatsExtractor.java
@@ -39,25 +39,23 @@ import java.util.stream.IntStream;
 public class AvroSimpleStatsExtractor implements SimpleStatsExtractor {
 
     private final RowType rowType;
-    private final SimpleColStatsCollector.Factory[] statsCollectors;
 
     public AvroSimpleStatsExtractor(
             RowType rowType, SimpleColStatsCollector.Factory[] 
statsCollectors) {
         this.rowType = rowType;
-        this.statsCollectors = statsCollectors;
         Preconditions.checkArgument(
                 rowType.getFieldCount() == statsCollectors.length,
                 "The stats collector is not aligned to write schema.");
     }
 
     @Override
-    public SimpleColStats[] extract(FileIO fileIO, Path path) throws 
IOException {
-        return extractWithFileInfo(fileIO, path).getLeft();
+    public SimpleColStats[] extract(FileIO fileIO, Path path, long length) 
throws IOException {
+        return extractWithFileInfo(fileIO, path, length).getLeft();
     }
 
     @Override
-    public Pair<SimpleColStats[], FileInfo> extractWithFileInfo(FileIO fileIO, 
Path path)
-            throws IOException {
+    public Pair<SimpleColStats[], FileInfo> extractWithFileInfo(
+            FileIO fileIO, Path path, long length) throws IOException {
 
         SeekableInputStream fileInputStream = fileIO.newInputStream(path);
         long rowCount = getRowCount(fileInputStream);
diff --git 
a/paimon-format/src/main/java/org/apache/paimon/format/orc/filter/OrcSimpleStatsExtractor.java
 
b/paimon-format/src/main/java/org/apache/paimon/format/orc/filter/OrcSimpleStatsExtractor.java
index c0b9b6f59b..88fe069d46 100644
--- 
a/paimon-format/src/main/java/org/apache/paimon/format/orc/filter/OrcSimpleStatsExtractor.java
+++ 
b/paimon-format/src/main/java/org/apache/paimon/format/orc/filter/OrcSimpleStatsExtractor.java
@@ -71,13 +71,13 @@ public class OrcSimpleStatsExtractor implements 
SimpleStatsExtractor {
     }
 
     @Override
-    public SimpleColStats[] extract(FileIO fileIO, Path path) throws 
IOException {
-        return extractWithFileInfo(fileIO, path).getLeft();
+    public SimpleColStats[] extract(FileIO fileIO, Path path, long length) 
throws IOException {
+        return extractWithFileInfo(fileIO, path, length).getLeft();
     }
 
     @Override
-    public Pair<SimpleColStats[], FileInfo> extractWithFileInfo(FileIO fileIO, 
Path path)
-            throws IOException {
+    public Pair<SimpleColStats[], FileInfo> extractWithFileInfo(
+            FileIO fileIO, Path path, long length) throws IOException {
         try (Reader reader =
                 OrcReaderFactory.createReader(new Configuration(false), 
fileIO, path, null)) {
             long rowCount = reader.getNumberOfRows();
diff --git 
a/paimon-format/src/main/java/org/apache/paimon/format/parquet/ParquetInputFile.java
 
b/paimon-format/src/main/java/org/apache/paimon/format/parquet/ParquetInputFile.java
index 7c52c24971..0e68416ba2 100644
--- 
a/paimon-format/src/main/java/org/apache/paimon/format/parquet/ParquetInputFile.java
+++ 
b/paimon-format/src/main/java/org/apache/paimon/format/parquet/ParquetInputFile.java
@@ -19,7 +19,6 @@
 package org.apache.paimon.format.parquet;
 
 import org.apache.paimon.fs.FileIO;
-import org.apache.paimon.fs.FileStatus;
 import org.apache.paimon.fs.Path;
 
 import org.apache.parquet.io.InputFile;
@@ -30,33 +29,36 @@ import java.io.IOException;
 public class ParquetInputFile implements InputFile {
 
     private final FileIO fileIO;
-    private final FileStatus stat;
+    private final Path path;
+    private final long length;
 
-    public static ParquetInputFile fromPath(FileIO fileIO, Path path) throws 
IOException {
-        return new ParquetInputFile(fileIO, fileIO.getFileStatus(path));
+    public static ParquetInputFile fromPath(FileIO fileIO, Path path, long 
length)
+            throws IOException {
+        return new ParquetInputFile(fileIO, path, length);
     }
 
-    private ParquetInputFile(FileIO fileIO, FileStatus stat) {
+    private ParquetInputFile(FileIO fileIO, Path path, long length) {
         this.fileIO = fileIO;
-        this.stat = stat;
+        this.path = path;
+        this.length = length;
     }
 
     public Path getPath() {
-        return stat.getPath();
+        return path;
     }
 
     @Override
     public long getLength() {
-        return stat.getLen();
+        return length;
     }
 
     @Override
     public ParquetInputStream newStream() throws IOException {
-        return new ParquetInputStream(fileIO.newInputStream(stat.getPath()));
+        return new ParquetInputStream(fileIO.newInputStream(path));
     }
 
     @Override
     public String toString() {
-        return stat.getPath().toString();
+        return path.toString();
     }
 }
diff --git 
a/paimon-format/src/main/java/org/apache/paimon/format/parquet/ParquetReaderFactory.java
 
b/paimon-format/src/main/java/org/apache/paimon/format/parquet/ParquetReaderFactory.java
index 82ffa79336..15db2d113a 100644
--- 
a/paimon-format/src/main/java/org/apache/paimon/format/parquet/ParquetReaderFactory.java
+++ 
b/paimon-format/src/main/java/org/apache/paimon/format/parquet/ParquetReaderFactory.java
@@ -113,7 +113,8 @@ public class ParquetReaderFactory implements 
FormatReaderFactory {
 
         ParquetFileReader reader =
                 new ParquetFileReader(
-                        ParquetInputFile.fromPath(context.fileIO(), 
context.filePath()),
+                        ParquetInputFile.fromPath(
+                                context.fileIO(), context.filePath(), 
context.fileSize()),
                         builder.build(),
                         context.selection());
         MessageType fileSchema = reader.getFileMetaData().getSchema();
@@ -145,7 +146,8 @@ public class ParquetReaderFactory implements 
FormatReaderFactory {
 
         ParquetFileReader reader =
                 new ParquetFileReader(
-                        ParquetInputFile.fromPath(context.fileIO(), 
context.filePath()),
+                        ParquetInputFile.fromPath(
+                                context.fileIO(), context.filePath(), 
context.fileSize()),
                         builder.build(),
                         context.selection());
         MessageType fileSchema = reader.getFileMetaData().getSchema();
diff --git 
a/paimon-format/src/main/java/org/apache/paimon/format/parquet/ParquetSimpleStatsExtractor.java
 
b/paimon-format/src/main/java/org/apache/paimon/format/parquet/ParquetSimpleStatsExtractor.java
index c0d6cef1b8..4dbd232572 100644
--- 
a/paimon-format/src/main/java/org/apache/paimon/format/parquet/ParquetSimpleStatsExtractor.java
+++ 
b/paimon-format/src/main/java/org/apache/paimon/format/parquet/ParquetSimpleStatsExtractor.java
@@ -67,15 +67,15 @@ public class ParquetSimpleStatsExtractor implements 
SimpleStatsExtractor {
     }
 
     @Override
-    public SimpleColStats[] extract(FileIO fileIO, Path path) throws 
IOException {
-        return extractWithFileInfo(fileIO, path).getLeft();
+    public SimpleColStats[] extract(FileIO fileIO, Path path, long length) 
throws IOException {
+        return extractWithFileInfo(fileIO, path, length).getLeft();
     }
 
     @Override
-    public Pair<SimpleColStats[], FileInfo> extractWithFileInfo(FileIO fileIO, 
Path path)
-            throws IOException {
+    public Pair<SimpleColStats[], FileInfo> extractWithFileInfo(
+            FileIO fileIO, Path path, long length) throws IOException {
         Pair<Map<String, Statistics<?>>, FileInfo> statsPair =
-                ParquetUtil.extractColumnStats(fileIO, path);
+                ParquetUtil.extractColumnStats(fileIO, path, length);
         SimpleColStatsCollector[] collectors = 
SimpleColStatsCollector.create(statsCollectors);
         return Pair.of(
                 IntStream.range(0, rowType.getFieldCount())
diff --git 
a/paimon-format/src/main/java/org/apache/paimon/format/parquet/ParquetUtil.java 
b/paimon-format/src/main/java/org/apache/paimon/format/parquet/ParquetUtil.java
index 038c91445b..0ec4fa162a 100644
--- 
a/paimon-format/src/main/java/org/apache/paimon/format/parquet/ParquetUtil.java
+++ 
b/paimon-format/src/main/java/org/apache/paimon/format/parquet/ParquetUtil.java
@@ -40,15 +40,16 @@ import java.util.Map;
 public class ParquetUtil {
 
     /**
-     * Extract stats from specified Parquet files path.
+     * Extract stats from specified Parquet file path.
      *
-     * @param path the path of parquet files to be read
+     * @param path the path of parquet file to be read
+     * @param length the length of parquet file to be read
      * @return result sets as map, key is column name, value is statistics 
(for example, null count,
      *     minimum value, maximum value)
      */
     public static Pair<Map<String, Statistics<?>>, 
SimpleStatsExtractor.FileInfo>
-            extractColumnStats(FileIO fileIO, Path path) throws IOException {
-        try (ParquetFileReader reader = getParquetReader(fileIO, path)) {
+            extractColumnStats(FileIO fileIO, Path path, long length) throws 
IOException {
+        try (ParquetFileReader reader = getParquetReader(fileIO, path, 
length)) {
             ParquetMetadata parquetMetadata = reader.getFooter();
             List<BlockMetaData> blockMetaDataList = 
parquetMetadata.getBlocks();
             Map<String, Statistics<?>> resultStats = new HashMap<>();
@@ -72,14 +73,16 @@ public class ParquetUtil {
     }
 
     /**
-     * Generate {@link ParquetFileReader} instance to read the Parquet files 
at the given path.
+     * Generate {@link ParquetFileReader} instance to read the Parquet file at 
the given path.
      *
-     * @param path the path of parquet files to be read
+     * @param path the path of parquet file to be read
+     * @param length the length of parquet file to be read
      * @return parquet reader, used for reading footer, status, etc.
      */
-    public static ParquetFileReader getParquetReader(FileIO fileIO, Path path) 
throws IOException {
+    public static ParquetFileReader getParquetReader(FileIO fileIO, Path path, 
long length)
+            throws IOException {
         return new ParquetFileReader(
-                ParquetInputFile.fromPath(fileIO, path),
+                ParquetInputFile.fromPath(fileIO, path, length),
                 ParquetReadOptions.builder().build(),
                 null);
     }
diff --git 
a/paimon-format/src/test/java/org/apache/paimon/format/parquet/ParquetFormatReadWriteTest.java
 
b/paimon-format/src/test/java/org/apache/paimon/format/parquet/ParquetFormatReadWriteTest.java
index 221d524fff..024ea93b6e 100644
--- 
a/paimon-format/src/test/java/org/apache/paimon/format/parquet/ParquetFormatReadWriteTest.java
+++ 
b/paimon-format/src/test/java/org/apache/paimon/format/parquet/ParquetFormatReadWriteTest.java
@@ -64,7 +64,7 @@ public class ParquetFormatReadWriteTest extends 
FormatReadWriteTest {
         RowType rowType = DataTypes.ROW(DataTypes.INT().notNull(), 
DataTypes.BIGINT());
 
         if (ThreadLocalRandom.current().nextBoolean()) {
-            rowType = (RowType) rowType.notNull();
+            rowType = rowType.notNull();
         }
 
         PositionOutputStream out = fileIO.newOutputStream(file, false);
@@ -75,7 +75,8 @@ public class ParquetFormatReadWriteTest extends 
FormatReadWriteTest {
         writer.close();
         out.close();
 
-        try (ParquetFileReader reader = ParquetUtil.getParquetReader(fileIO, 
file)) {
+        try (ParquetFileReader reader =
+                ParquetUtil.getParquetReader(fileIO, file, 
fileIO.getFileSize(file))) {
             ParquetMetadata parquetMetadata = reader.getFooter();
             List<BlockMetaData> blockMetaDataList = 
parquetMetadata.getBlocks();
             for (BlockMetaData blockMetaData : blockMetaDataList) {

Reply via email to