This is an automated email from the ASF dual-hosted git repository. lzljs3620320 pushed a commit to branch release-1.0 in repository https://gitbox.apache.org/repos/asf/paimon.git
commit 40c8611aea8ffe1c5ea0b741e83c3e4a686b8d13 Author: tsreaper <[email protected]> AuthorDate: Thu Jan 16 13:05:19 2025 +0800 [core] Add min_partition_stats and max_partition_stats columns to manifests system table (#4922) --- docs/content/concepts/system-tables.md | 42 +++++++++++----------- .../apache/paimon/table/system/ManifestsTable.java | 32 ++++++++++++++--- .../paimon/table/system/ManifestsTableTest.java | 16 ++++++++- 3 files changed, 63 insertions(+), 27 deletions(-) diff --git a/docs/content/concepts/system-tables.md b/docs/content/concepts/system-tables.md index 92119874e2..7ef54b140e 100644 --- a/docs/content/concepts/system-tables.md +++ b/docs/content/concepts/system-tables.md @@ -279,45 +279,45 @@ You can query all manifest files contained in the latest snapshot or the specifi SELECT * FROM my_table$manifests; /* -+--------------------------------+-------------+------------------+-------------------+---------------+ -| file_name | file_size | num_added_files | num_deleted_files | schema_id | -+--------------------------------+-------------+------------------+-------------------+---------------+ -| manifest-f4dcab43-ef6b-4713... | 12365| 40 | 0 | 0 | -| manifest-f4dcab43-ef6b-4713... | 1648 | 1 | 0 | 0 | -+--------------------------------+-------------+------------------+-------------------+---------------+ ++--------------------------------+-------------+------------------+-------------------+---------------+---------------------+---------------------+ +| file_name | file_size | num_added_files | num_deleted_files | schema_id | min_partition_stats | max_partition_stats | ++--------------------------------+-------------+------------------+-------------------+---------------+---------------------+---------------------+ +| manifest-f4dcab43-ef6b-4713... | 12365| 40 | 0 | 0 | {20230315, 00} | {20230315, 20} | +| manifest-f4dcab43-ef6b-4713... | 1648 | 1 | 0 | 0 | {20230115, 00} | {20230316, 23} | ++--------------------------------+-------------+------------------+-------------------+---------------+---------------------+---------------------+ 2 rows in set */ -- You can also query the manifest with specified snapshot SELECT * FROM my_table$manifests /*+ OPTIONS('scan.snapshot-id'='1') */; /* -+--------------------------------+-------------+------------------+-------------------+---------------+ -| file_name | file_size | num_added_files | num_deleted_files | schema_id | -+--------------------------------+-------------+------------------+-------------------+---------------+ -| manifest-f4dcab43-ef6b-4713... | 12365| 40 | 0 | 0 | -+--------------------------------+-------------+------------------+-------------------+---------------+ ++--------------------------------+-------------+------------------+-------------------+---------------+---------------------+---------------------+ +| file_name | file_size | num_added_files | num_deleted_files | schema_id | min_partition_stats | max_partition_stats | ++--------------------------------+-------------+------------------+-------------------+---------------+---------------------+---------------------+ +| manifest-f4dcab43-ef6b-4713... | 12365| 40 | 0 | 0 | {20230315, 00} | {20230315, 20} | ++--------------------------------+-------------+------------------+-------------------+---------------+---------------------+---------------------+ 1 rows in set */ - You can also query the manifest with specified tagName SELECT * FROM my_table$manifests /*+ OPTIONS('scan.tag-name'='tag1') */; /* -+--------------------------------+-------------+------------------+-------------------+---------------+ -| file_name | file_size | num_added_files | num_deleted_files | schema_id | -+--------------------------------+-------------+------------------+-------------------+---------------+ -| manifest-f4dcab43-ef6b-4713... | 12365| 40 | 0 | 0 | -+--------------------------------+-------------+------------------+-------------------+---------------+ ++--------------------------------+-------------+------------------+-------------------+---------------+---------------------+---------------------+ +| file_name | file_size | num_added_files | num_deleted_files | schema_id | min_partition_stats | max_partition_stats | ++--------------------------------+-------------+------------------+-------------------+---------------+---------------------+---------------------+ +| manifest-f4dcab43-ef6b-4713... | 12365| 40 | 0 | 0 | {20230315, 00} | {20230315, 20} | ++--------------------------------+-------------+------------------+-------------------+---------------+---------------------+---------------------+ 1 rows in set */ - You can also query the manifest with specified timestamp in unix milliseconds SELECT * FROM my_table$manifests /*+ OPTIONS('scan.timestamp-millis'='1678883047356') */; /* -+--------------------------------+-------------+------------------+-------------------+---------------+ -| file_name | file_size | num_added_files | num_deleted_files | schema_id | -+--------------------------------+-------------+------------------+-------------------+---------------+ -| manifest-f4dcab43-ef6b-4713... | 12365| 40 | 0 | 0 | -+--------------------------------+-------------+------------------+-------------------+---------------+ ++--------------------------------+-------------+------------------+-------------------+---------------+---------------------+---------------------+ +| file_name | file_size | num_added_files | num_deleted_files | schema_id | min_partition_stats | max_partition_stats | ++--------------------------------+-------------+------------------+-------------------+---------------+---------------------+---------------------+ +| manifest-f4dcab43-ef6b-4713... | 12365| 40 | 0 | 0 | {20230315, 00} | {20230315, 20} | ++--------------------------------+-------------+------------------+-------------------+---------------+---------------------+---------------------+ 1 rows in set */ ``` diff --git a/paimon-core/src/main/java/org/apache/paimon/table/system/ManifestsTable.java b/paimon-core/src/main/java/org/apache/paimon/table/system/ManifestsTable.java index d88636d02a..4b3ddf30ce 100644 --- a/paimon-core/src/main/java/org/apache/paimon/table/system/ManifestsTable.java +++ b/paimon-core/src/main/java/org/apache/paimon/table/system/ManifestsTable.java @@ -20,6 +20,8 @@ package org.apache.paimon.table.system; import org.apache.paimon.CoreOptions; import org.apache.paimon.Snapshot; +import org.apache.paimon.casting.CastExecutor; +import org.apache.paimon.casting.CastExecutors; import org.apache.paimon.data.BinaryString; import org.apache.paimon.data.GenericRow; import org.apache.paimon.data.InternalRow; @@ -64,7 +66,7 @@ public class ManifestsTable implements ReadonlyTable { private static final Logger LOG = LoggerFactory.getLogger(ManifestsTable.class); - private static final long serialVersionUID = 1L; + private static final long serialVersionUID = 2L; public static final String MANIFESTS = "manifests"; @@ -75,7 +77,15 @@ public class ManifestsTable implements ReadonlyTable { new DataField(1, "file_size", new BigIntType(false)), new DataField(2, "num_added_files", new BigIntType(false)), new DataField(3, "num_deleted_files", new BigIntType(false)), - new DataField(4, "schema_id", new BigIntType(false)))); + new DataField(4, "schema_id", new BigIntType(false)), + new DataField( + 5, + "min_partition_stats", + SerializationUtils.newStringType(true)), + new DataField( + 6, + "max_partition_stats", + SerializationUtils.newStringType(true)))); private final FileStoreTable dataTable; @@ -176,8 +186,16 @@ public class ManifestsTable implements ReadonlyTable { } List<ManifestFileMeta> manifestFileMetas = allManifests(dataTable); + @SuppressWarnings("unchecked") + CastExecutor<InternalRow, BinaryString> partitionCastExecutor = + (CastExecutor<InternalRow, BinaryString>) + CastExecutors.resolveToString( + dataTable.schema().logicalPartitionType()); + Iterator<InternalRow> rows = - Iterators.transform(manifestFileMetas.iterator(), this::toRow); + Iterators.transform( + manifestFileMetas.iterator(), + meta -> toRow(meta, partitionCastExecutor)); if (readType != null) { rows = Iterators.transform( @@ -189,13 +207,17 @@ public class ManifestsTable implements ReadonlyTable { return new IteratorRecordReader<>(rows); } - private InternalRow toRow(ManifestFileMeta manifestFileMeta) { + private InternalRow toRow( + ManifestFileMeta manifestFileMeta, + CastExecutor<InternalRow, BinaryString> partitionCastExecutor) { return GenericRow.of( BinaryString.fromString(manifestFileMeta.fileName()), manifestFileMeta.fileSize(), manifestFileMeta.numAddedFiles(), manifestFileMeta.numDeletedFiles(), - manifestFileMeta.schemaId()); + manifestFileMeta.schemaId(), + partitionCastExecutor.cast(manifestFileMeta.partitionStats().minValues()), + partitionCastExecutor.cast(manifestFileMeta.partitionStats().maxValues())); } } diff --git a/paimon-core/src/test/java/org/apache/paimon/table/system/ManifestsTableTest.java b/paimon-core/src/test/java/org/apache/paimon/table/system/ManifestsTableTest.java index a39e6f6fa8..f375dfd2c8 100644 --- a/paimon-core/src/test/java/org/apache/paimon/table/system/ManifestsTableTest.java +++ b/paimon-core/src/test/java/org/apache/paimon/table/system/ManifestsTableTest.java @@ -177,7 +177,21 @@ public class ManifestsTableTest extends TableTestBase { manifestFileMeta.fileSize(), manifestFileMeta.numAddedFiles(), manifestFileMeta.numDeletedFiles(), - manifestFileMeta.schemaId())); + manifestFileMeta.schemaId(), + BinaryString.fromString( + String.format( + "{%d}", + manifestFileMeta + .partitionStats() + .minValues() + .getInt(0))), + BinaryString.fromString( + String.format( + "{%d}", + manifestFileMeta + .partitionStats() + .maxValues() + .getInt(0))))); } return expectedRow; }
