This is an automated email from the ASF dual-hosted git repository. junhao pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/paimon.git
The following commit(s) were added to refs/heads/master by this push: new f2cae5d5c3 [core] Reduce useless getFileStatus for Parquet Reader (#5217) f2cae5d5c3 is described below commit f2cae5d5c312f78f73029dd7359b05a6324de48e Author: Jingsong Lee <jingsongl...@gmail.com> AuthorDate: Wed Mar 5 18:16:40 2025 +0800 [core] Reduce useless getFileStatus for Parquet Reader (#5217) --- .../apache/paimon/format/SimpleStatsExtractor.java | 4 ++-- .../org/apache/paimon/fs/local/LocalFileIO.java | 14 ++++++++++++ .../paimon/format/SimpleColStatsExtractorTest.java | 2 +- .../apache/paimon/io/KeyValueDataFileWriter.java | 6 ++++-- .../org/apache/paimon/io/RowDataFileWriter.java | 6 ++++-- .../paimon/io/StatsCollectingSingleFileWriter.java | 4 ++-- .../org/apache/paimon/migrate/FileMetaUtils.java | 2 +- .../test/java/org/apache/paimon/SnapshotTest.java | 25 ++++++++++++++++++++++ .../paimon/stats/TestSimpleStatsExtractor.java | 8 +++---- .../format/avro/AvroSimpleStatsExtractor.java | 10 ++++----- .../format/orc/filter/OrcSimpleStatsExtractor.java | 8 +++---- .../paimon/format/parquet/ParquetInputFile.java | 22 ++++++++++--------- .../format/parquet/ParquetReaderFactory.java | 6 ++++-- .../parquet/ParquetSimpleStatsExtractor.java | 10 ++++----- .../apache/paimon/format/parquet/ParquetUtil.java | 19 +++++++++------- .../format/parquet/ParquetFormatReadWriteTest.java | 5 +++-- 16 files changed, 100 insertions(+), 51 deletions(-) diff --git a/paimon-common/src/main/java/org/apache/paimon/format/SimpleStatsExtractor.java b/paimon-common/src/main/java/org/apache/paimon/format/SimpleStatsExtractor.java index a2d599aaab..9ffbf495e5 100644 --- a/paimon-common/src/main/java/org/apache/paimon/format/SimpleStatsExtractor.java +++ b/paimon-common/src/main/java/org/apache/paimon/format/SimpleStatsExtractor.java @@ -27,9 +27,9 @@ import java.io.IOException; /** Extracts statistics directly from file. */ public interface SimpleStatsExtractor { - SimpleColStats[] extract(FileIO fileIO, Path path) throws IOException; + SimpleColStats[] extract(FileIO fileIO, Path path, long length) throws IOException; - Pair<SimpleColStats[], FileInfo> extractWithFileInfo(FileIO fileIO, Path path) + Pair<SimpleColStats[], FileInfo> extractWithFileInfo(FileIO fileIO, Path path, long length) throws IOException; /** File info fetched from physical file. */ diff --git a/paimon-common/src/main/java/org/apache/paimon/fs/local/LocalFileIO.java b/paimon-common/src/main/java/org/apache/paimon/fs/local/LocalFileIO.java index ac84c242c5..0b1c4ad260 100644 --- a/paimon-common/src/main/java/org/apache/paimon/fs/local/LocalFileIO.java +++ b/paimon-common/src/main/java/org/apache/paimon/fs/local/LocalFileIO.java @@ -26,6 +26,9 @@ import org.apache.paimon.fs.PositionOutputStream; import org.apache.paimon.fs.SeekableInputStream; import org.apache.paimon.fs.VectoredReadable; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + import java.io.File; import java.io.FileInputStream; import java.io.FileNotFoundException; @@ -49,6 +52,8 @@ import static org.apache.paimon.utils.Preconditions.checkState; /** {@link FileIO} for local file. */ public class LocalFileIO implements FileIO { + private static final Logger LOG = LoggerFactory.getLogger(LocalFileIO.class); + private static final long serialVersionUID = 1L; // the lock to ensure atomic renaming @@ -68,11 +73,13 @@ public class LocalFileIO implements FileIO { @Override public SeekableInputStream newInputStream(Path path) throws IOException { + LOG.debug("Invoking newInputStream for {}", path); return new LocalSeekableInputStream(toFile(path)); } @Override public PositionOutputStream newOutputStream(Path path, boolean overwrite) throws IOException { + LOG.debug("Invoking newOutputStream for {}", path); if (exists(path) && !overwrite) { throw new FileAlreadyExistsException("File already exists: " + path); } @@ -87,6 +94,7 @@ public class LocalFileIO implements FileIO { @Override public FileStatus getFileStatus(Path path) throws IOException { + LOG.debug("Invoking getFileStatus for {}", path); final File file = toFile(path); if (file.exists()) { return new LocalFileStatus(file, SCHEME); @@ -103,6 +111,7 @@ public class LocalFileIO implements FileIO { @Override public FileStatus[] listStatus(Path path) throws IOException { + LOG.debug("Invoking listStatus for {}", path); final File file = toFile(path); FileStatus[] results = new FileStatus[0]; @@ -133,11 +142,13 @@ public class LocalFileIO implements FileIO { @Override public boolean exists(Path path) throws IOException { + LOG.debug("Invoking exists for {}", path); return toFile(path).exists(); } @Override public boolean delete(Path path, boolean recursive) throws IOException { + LOG.debug("Invoking delete for {}", path); File file = toFile(path); if (file.isFile()) { return file.delete(); @@ -175,6 +186,7 @@ public class LocalFileIO implements FileIO { @Override public boolean mkdirs(Path path) throws IOException { + LOG.debug("Invoking mkdirs for {}", path); return mkdirsInternal(toFile(path)); } @@ -196,6 +208,7 @@ public class LocalFileIO implements FileIO { @Override public boolean rename(Path src, Path dst) throws IOException { + LOG.debug("Invoking rename for {} to {}", src, dst); File srcFile = toFile(src); File dstFile = toFile(dst); File dstParent = dstFile.getParentFile(); @@ -219,6 +232,7 @@ public class LocalFileIO implements FileIO { @Override public void copyFile(Path sourcePath, Path targetPath, boolean overwrite) throws IOException { + LOG.debug("Invoking copyFile for {} to {}", sourcePath, targetPath); if (!overwrite && exists(targetPath)) { return; } diff --git a/paimon-common/src/test/java/org/apache/paimon/format/SimpleColStatsExtractorTest.java b/paimon-common/src/test/java/org/apache/paimon/format/SimpleColStatsExtractorTest.java index ddb3da7a63..c12bc3215d 100644 --- a/paimon-common/src/test/java/org/apache/paimon/format/SimpleColStatsExtractorTest.java +++ b/paimon-common/src/test/java/org/apache/paimon/format/SimpleColStatsExtractorTest.java @@ -99,7 +99,7 @@ public abstract class SimpleColStatsExtractorTest { SimpleStatsExtractor extractor = format.createStatsExtractor(rowType, stats).get(); assertThat(extractor).isNotNull(); - SimpleColStats[] actual = extractor.extract(fileIO, path); + SimpleColStats[] actual = extractor.extract(fileIO, path, fileIO.getFileSize(path)); for (int i = 0; i < expected.length; i++) { expected[i] = regenerate(expected[i], rowType.getTypeAt(i)); } diff --git a/paimon-core/src/main/java/org/apache/paimon/io/KeyValueDataFileWriter.java b/paimon-core/src/main/java/org/apache/paimon/io/KeyValueDataFileWriter.java index 3c7f6b45bb..e655f0ed3a 100644 --- a/paimon-core/src/main/java/org/apache/paimon/io/KeyValueDataFileWriter.java +++ b/paimon-core/src/main/java/org/apache/paimon/io/KeyValueDataFileWriter.java @@ -169,7 +169,9 @@ public abstract class KeyValueDataFileWriter return null; } - Pair<SimpleColStats[], SimpleColStats[]> keyValueStats = fetchKeyValueStats(fieldStats()); + long fileSize = fileIO.getFileSize(path); + Pair<SimpleColStats[], SimpleColStats[]> keyValueStats = + fetchKeyValueStats(fieldStats(fileSize)); SimpleStats keyStats = keyStatsConverter.toBinaryAllMode(keyValueStats.getKey()); Pair<List<String>, SimpleStats> valueStatsPair = @@ -183,7 +185,7 @@ public abstract class KeyValueDataFileWriter String externalPath = isExternalPath ? path.toString() : null; return new DataFileMeta( path.getName(), - fileIO.getFileSize(path), + fileSize, recordCount(), minKey, keySerializer.toBinaryRow(maxKey).copy(), diff --git a/paimon-core/src/main/java/org/apache/paimon/io/RowDataFileWriter.java b/paimon-core/src/main/java/org/apache/paimon/io/RowDataFileWriter.java index 25906e2dfa..a21041a3ab 100644 --- a/paimon-core/src/main/java/org/apache/paimon/io/RowDataFileWriter.java +++ b/paimon-core/src/main/java/org/apache/paimon/io/RowDataFileWriter.java @@ -109,7 +109,9 @@ public class RowDataFileWriter extends StatsCollectingSingleFileWriter<InternalR @Override public DataFileMeta result() throws IOException { - Pair<List<String>, SimpleStats> statsPair = statsArraySerializer.toBinary(fieldStats()); + long fileSize = fileIO.getFileSize(path); + Pair<List<String>, SimpleStats> statsPair = + statsArraySerializer.toBinary(fieldStats(fileSize)); DataFileIndexWriter.FileIndexResult indexResult = dataFileIndexWriter == null ? DataFileIndexWriter.EMPTY_RESULT @@ -117,7 +119,7 @@ public class RowDataFileWriter extends StatsCollectingSingleFileWriter<InternalR String externalPath = isExternalPath ? path.toString() : null; return DataFileMeta.forAppend( path.getName(), - fileIO.getFileSize(path), + fileSize, recordCount(), statsPair.getRight(), seqNumCounter.getValue() - super.recordCount(), diff --git a/paimon-core/src/main/java/org/apache/paimon/io/StatsCollectingSingleFileWriter.java b/paimon-core/src/main/java/org/apache/paimon/io/StatsCollectingSingleFileWriter.java index 67a3fa6d1a..07fc26d26f 100644 --- a/paimon-core/src/main/java/org/apache/paimon/io/StatsCollectingSingleFileWriter.java +++ b/paimon-core/src/main/java/org/apache/paimon/io/StatsCollectingSingleFileWriter.java @@ -96,13 +96,13 @@ public abstract class StatsCollectingSingleFileWriter<T, R> extends SingleFileWr super.writeBundle(bundle); } - public SimpleColStats[] fieldStats() throws IOException { + public SimpleColStats[] fieldStats(long fileSize) throws IOException { Preconditions.checkState(closed, "Cannot access metric unless the writer is closed."); if (simpleStatsExtractor != null) { if (isStatsDisabled) { return noneStats; } else { - return simpleStatsExtractor.extract(fileIO, path); + return simpleStatsExtractor.extract(fileIO, path, fileSize); } } else { return simpleStatsCollector.extract(); diff --git a/paimon-core/src/main/java/org/apache/paimon/migrate/FileMetaUtils.java b/paimon-core/src/main/java/org/apache/paimon/migrate/FileMetaUtils.java index 405870d5fa..a960ce7575 100644 --- a/paimon-core/src/main/java/org/apache/paimon/migrate/FileMetaUtils.java +++ b/paimon-core/src/main/java/org/apache/paimon/migrate/FileMetaUtils.java @@ -218,7 +218,7 @@ public class FileMetaUtils { SimpleStatsConverter statsArraySerializer = new SimpleStatsConverter(rowTypeWithSchemaId); Pair<SimpleColStats[], SimpleStatsExtractor.FileInfo> fileInfo = - simpleStatsExtractor.extractWithFileInfo(fileIO, path); + simpleStatsExtractor.extractWithFileInfo(fileIO, path, fileSize); SimpleStats stats = statsArraySerializer.toBinaryAllMode(fileInfo.getLeft()); return DataFileMeta.forAppend( diff --git a/paimon-core/src/test/java/org/apache/paimon/SnapshotTest.java b/paimon-core/src/test/java/org/apache/paimon/SnapshotTest.java index 41f5e2dd95..1cceeffbfa 100644 --- a/paimon-core/src/test/java/org/apache/paimon/SnapshotTest.java +++ b/paimon-core/src/test/java/org/apache/paimon/SnapshotTest.java @@ -26,6 +26,7 @@ import org.apache.paimon.utils.SnapshotManager; import org.junit.jupiter.api.Test; import static org.apache.paimon.utils.FileSystemBranchManager.DEFAULT_MAIN_BRANCH; +import static org.assertj.core.api.Assertions.assertThat; /** Test for snapshots. */ public class SnapshotTest { @@ -50,6 +51,30 @@ public class SnapshotTest { + "}"); } + @Test + public void testSnapshotWithSizes() { + String json = + "{\n" + + " \"version\" : 3,\n" + + " \"id\" : 5,\n" + + " \"schemaId\" : 0,\n" + + " \"baseManifestList\" : null,\n" + + " \"baseManifestListSize\" : 6,\n" + + " \"deltaManifestList\" : null,\n" + + " \"deltaManifestListSize\" : 8,\n" + + " \"changelogManifestListSize\" : 10,\n" + + " \"commitUser\" : null,\n" + + " \"commitIdentifier\" : 0,\n" + + " \"commitKind\" : \"APPEND\",\n" + + " \"timeMillis\" : 1234,\n" + + " \"totalRecordCount\" : null,\n" + + " \"deltaRecordCount\" : null,\n" + + " \"unknownKey\" : 22222\n" + + "}"; + Snapshot snapshot = Snapshot.fromJson(json); + assertThat(Snapshot.fromJson(snapshot.toJson())).isEqualTo(snapshot); + } + public static SnapshotManager newSnapshotManager(FileIO fileIO, Path tablePath) { return newSnapshotManager(fileIO, tablePath, DEFAULT_MAIN_BRANCH); } diff --git a/paimon-core/src/test/java/org/apache/paimon/stats/TestSimpleStatsExtractor.java b/paimon-core/src/test/java/org/apache/paimon/stats/TestSimpleStatsExtractor.java index a410fd6e47..96efc4d11a 100644 --- a/paimon-core/src/test/java/org/apache/paimon/stats/TestSimpleStatsExtractor.java +++ b/paimon-core/src/test/java/org/apache/paimon/stats/TestSimpleStatsExtractor.java @@ -59,13 +59,13 @@ public class TestSimpleStatsExtractor implements SimpleStatsExtractor { } @Override - public SimpleColStats[] extract(FileIO fileIO, Path path) throws IOException { - return extractWithFileInfo(fileIO, path).getLeft(); + public SimpleColStats[] extract(FileIO fileIO, Path path, long length) throws IOException { + return extractWithFileInfo(fileIO, path, length).getLeft(); } @Override - public Pair<SimpleColStats[], FileInfo> extractWithFileInfo(FileIO fileIO, Path path) - throws IOException { + public Pair<SimpleColStats[], FileInfo> extractWithFileInfo( + FileIO fileIO, Path path, long length) throws IOException { IdentityObjectSerializer serializer = new IdentityObjectSerializer(rowType); FormatReaderFactory readerFactory = format.createReaderFactory(rowType); List<InternalRow> records = readListFromFile(fileIO, path, serializer, readerFactory); diff --git a/paimon-format/src/main/java/org/apache/paimon/format/avro/AvroSimpleStatsExtractor.java b/paimon-format/src/main/java/org/apache/paimon/format/avro/AvroSimpleStatsExtractor.java index 6539de25c3..16e014dd5d 100644 --- a/paimon-format/src/main/java/org/apache/paimon/format/avro/AvroSimpleStatsExtractor.java +++ b/paimon-format/src/main/java/org/apache/paimon/format/avro/AvroSimpleStatsExtractor.java @@ -39,25 +39,23 @@ import java.util.stream.IntStream; public class AvroSimpleStatsExtractor implements SimpleStatsExtractor { private final RowType rowType; - private final SimpleColStatsCollector.Factory[] statsCollectors; public AvroSimpleStatsExtractor( RowType rowType, SimpleColStatsCollector.Factory[] statsCollectors) { this.rowType = rowType; - this.statsCollectors = statsCollectors; Preconditions.checkArgument( rowType.getFieldCount() == statsCollectors.length, "The stats collector is not aligned to write schema."); } @Override - public SimpleColStats[] extract(FileIO fileIO, Path path) throws IOException { - return extractWithFileInfo(fileIO, path).getLeft(); + public SimpleColStats[] extract(FileIO fileIO, Path path, long length) throws IOException { + return extractWithFileInfo(fileIO, path, length).getLeft(); } @Override - public Pair<SimpleColStats[], FileInfo> extractWithFileInfo(FileIO fileIO, Path path) - throws IOException { + public Pair<SimpleColStats[], FileInfo> extractWithFileInfo( + FileIO fileIO, Path path, long length) throws IOException { SeekableInputStream fileInputStream = fileIO.newInputStream(path); long rowCount = getRowCount(fileInputStream); diff --git a/paimon-format/src/main/java/org/apache/paimon/format/orc/filter/OrcSimpleStatsExtractor.java b/paimon-format/src/main/java/org/apache/paimon/format/orc/filter/OrcSimpleStatsExtractor.java index c0b9b6f59b..88fe069d46 100644 --- a/paimon-format/src/main/java/org/apache/paimon/format/orc/filter/OrcSimpleStatsExtractor.java +++ b/paimon-format/src/main/java/org/apache/paimon/format/orc/filter/OrcSimpleStatsExtractor.java @@ -71,13 +71,13 @@ public class OrcSimpleStatsExtractor implements SimpleStatsExtractor { } @Override - public SimpleColStats[] extract(FileIO fileIO, Path path) throws IOException { - return extractWithFileInfo(fileIO, path).getLeft(); + public SimpleColStats[] extract(FileIO fileIO, Path path, long length) throws IOException { + return extractWithFileInfo(fileIO, path, length).getLeft(); } @Override - public Pair<SimpleColStats[], FileInfo> extractWithFileInfo(FileIO fileIO, Path path) - throws IOException { + public Pair<SimpleColStats[], FileInfo> extractWithFileInfo( + FileIO fileIO, Path path, long length) throws IOException { try (Reader reader = OrcReaderFactory.createReader(new Configuration(false), fileIO, path, null)) { long rowCount = reader.getNumberOfRows(); diff --git a/paimon-format/src/main/java/org/apache/paimon/format/parquet/ParquetInputFile.java b/paimon-format/src/main/java/org/apache/paimon/format/parquet/ParquetInputFile.java index 7c52c24971..0e68416ba2 100644 --- a/paimon-format/src/main/java/org/apache/paimon/format/parquet/ParquetInputFile.java +++ b/paimon-format/src/main/java/org/apache/paimon/format/parquet/ParquetInputFile.java @@ -19,7 +19,6 @@ package org.apache.paimon.format.parquet; import org.apache.paimon.fs.FileIO; -import org.apache.paimon.fs.FileStatus; import org.apache.paimon.fs.Path; import org.apache.parquet.io.InputFile; @@ -30,33 +29,36 @@ import java.io.IOException; public class ParquetInputFile implements InputFile { private final FileIO fileIO; - private final FileStatus stat; + private final Path path; + private final long length; - public static ParquetInputFile fromPath(FileIO fileIO, Path path) throws IOException { - return new ParquetInputFile(fileIO, fileIO.getFileStatus(path)); + public static ParquetInputFile fromPath(FileIO fileIO, Path path, long length) + throws IOException { + return new ParquetInputFile(fileIO, path, length); } - private ParquetInputFile(FileIO fileIO, FileStatus stat) { + private ParquetInputFile(FileIO fileIO, Path path, long length) { this.fileIO = fileIO; - this.stat = stat; + this.path = path; + this.length = length; } public Path getPath() { - return stat.getPath(); + return path; } @Override public long getLength() { - return stat.getLen(); + return length; } @Override public ParquetInputStream newStream() throws IOException { - return new ParquetInputStream(fileIO.newInputStream(stat.getPath())); + return new ParquetInputStream(fileIO.newInputStream(path)); } @Override public String toString() { - return stat.getPath().toString(); + return path.toString(); } } diff --git a/paimon-format/src/main/java/org/apache/paimon/format/parquet/ParquetReaderFactory.java b/paimon-format/src/main/java/org/apache/paimon/format/parquet/ParquetReaderFactory.java index 82ffa79336..15db2d113a 100644 --- a/paimon-format/src/main/java/org/apache/paimon/format/parquet/ParquetReaderFactory.java +++ b/paimon-format/src/main/java/org/apache/paimon/format/parquet/ParquetReaderFactory.java @@ -113,7 +113,8 @@ public class ParquetReaderFactory implements FormatReaderFactory { ParquetFileReader reader = new ParquetFileReader( - ParquetInputFile.fromPath(context.fileIO(), context.filePath()), + ParquetInputFile.fromPath( + context.fileIO(), context.filePath(), context.fileSize()), builder.build(), context.selection()); MessageType fileSchema = reader.getFileMetaData().getSchema(); @@ -145,7 +146,8 @@ public class ParquetReaderFactory implements FormatReaderFactory { ParquetFileReader reader = new ParquetFileReader( - ParquetInputFile.fromPath(context.fileIO(), context.filePath()), + ParquetInputFile.fromPath( + context.fileIO(), context.filePath(), context.fileSize()), builder.build(), context.selection()); MessageType fileSchema = reader.getFileMetaData().getSchema(); diff --git a/paimon-format/src/main/java/org/apache/paimon/format/parquet/ParquetSimpleStatsExtractor.java b/paimon-format/src/main/java/org/apache/paimon/format/parquet/ParquetSimpleStatsExtractor.java index c0d6cef1b8..4dbd232572 100644 --- a/paimon-format/src/main/java/org/apache/paimon/format/parquet/ParquetSimpleStatsExtractor.java +++ b/paimon-format/src/main/java/org/apache/paimon/format/parquet/ParquetSimpleStatsExtractor.java @@ -67,15 +67,15 @@ public class ParquetSimpleStatsExtractor implements SimpleStatsExtractor { } @Override - public SimpleColStats[] extract(FileIO fileIO, Path path) throws IOException { - return extractWithFileInfo(fileIO, path).getLeft(); + public SimpleColStats[] extract(FileIO fileIO, Path path, long length) throws IOException { + return extractWithFileInfo(fileIO, path, length).getLeft(); } @Override - public Pair<SimpleColStats[], FileInfo> extractWithFileInfo(FileIO fileIO, Path path) - throws IOException { + public Pair<SimpleColStats[], FileInfo> extractWithFileInfo( + FileIO fileIO, Path path, long length) throws IOException { Pair<Map<String, Statistics<?>>, FileInfo> statsPair = - ParquetUtil.extractColumnStats(fileIO, path); + ParquetUtil.extractColumnStats(fileIO, path, length); SimpleColStatsCollector[] collectors = SimpleColStatsCollector.create(statsCollectors); return Pair.of( IntStream.range(0, rowType.getFieldCount()) diff --git a/paimon-format/src/main/java/org/apache/paimon/format/parquet/ParquetUtil.java b/paimon-format/src/main/java/org/apache/paimon/format/parquet/ParquetUtil.java index 038c91445b..0ec4fa162a 100644 --- a/paimon-format/src/main/java/org/apache/paimon/format/parquet/ParquetUtil.java +++ b/paimon-format/src/main/java/org/apache/paimon/format/parquet/ParquetUtil.java @@ -40,15 +40,16 @@ import java.util.Map; public class ParquetUtil { /** - * Extract stats from specified Parquet files path. + * Extract stats from specified Parquet file path. * - * @param path the path of parquet files to be read + * @param path the path of parquet file to be read + * @param length the length of parquet file to be read * @return result sets as map, key is column name, value is statistics (for example, null count, * minimum value, maximum value) */ public static Pair<Map<String, Statistics<?>>, SimpleStatsExtractor.FileInfo> - extractColumnStats(FileIO fileIO, Path path) throws IOException { - try (ParquetFileReader reader = getParquetReader(fileIO, path)) { + extractColumnStats(FileIO fileIO, Path path, long length) throws IOException { + try (ParquetFileReader reader = getParquetReader(fileIO, path, length)) { ParquetMetadata parquetMetadata = reader.getFooter(); List<BlockMetaData> blockMetaDataList = parquetMetadata.getBlocks(); Map<String, Statistics<?>> resultStats = new HashMap<>(); @@ -72,14 +73,16 @@ public class ParquetUtil { } /** - * Generate {@link ParquetFileReader} instance to read the Parquet files at the given path. + * Generate {@link ParquetFileReader} instance to read the Parquet file at the given path. * - * @param path the path of parquet files to be read + * @param path the path of parquet file to be read + * @param length the length of parquet file to be read * @return parquet reader, used for reading footer, status, etc. */ - public static ParquetFileReader getParquetReader(FileIO fileIO, Path path) throws IOException { + public static ParquetFileReader getParquetReader(FileIO fileIO, Path path, long length) + throws IOException { return new ParquetFileReader( - ParquetInputFile.fromPath(fileIO, path), + ParquetInputFile.fromPath(fileIO, path, length), ParquetReadOptions.builder().build(), null); } diff --git a/paimon-format/src/test/java/org/apache/paimon/format/parquet/ParquetFormatReadWriteTest.java b/paimon-format/src/test/java/org/apache/paimon/format/parquet/ParquetFormatReadWriteTest.java index 221d524fff..024ea93b6e 100644 --- a/paimon-format/src/test/java/org/apache/paimon/format/parquet/ParquetFormatReadWriteTest.java +++ b/paimon-format/src/test/java/org/apache/paimon/format/parquet/ParquetFormatReadWriteTest.java @@ -64,7 +64,7 @@ public class ParquetFormatReadWriteTest extends FormatReadWriteTest { RowType rowType = DataTypes.ROW(DataTypes.INT().notNull(), DataTypes.BIGINT()); if (ThreadLocalRandom.current().nextBoolean()) { - rowType = (RowType) rowType.notNull(); + rowType = rowType.notNull(); } PositionOutputStream out = fileIO.newOutputStream(file, false); @@ -75,7 +75,8 @@ public class ParquetFormatReadWriteTest extends FormatReadWriteTest { writer.close(); out.close(); - try (ParquetFileReader reader = ParquetUtil.getParquetReader(fileIO, file)) { + try (ParquetFileReader reader = + ParquetUtil.getParquetReader(fileIO, file, fileIO.getFileSize(file))) { ParquetMetadata parquetMetadata = reader.getFooter(); List<BlockMetaData> blockMetaDataList = parquetMetadata.getBlocks(); for (BlockMetaData blockMetaData : blockMetaDataList) {