This is an automated email from the ASF dual-hosted git repository.
lzljs3620320 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/paimon.git
The following commit(s) were added to refs/heads/master by this push:
new 98c9f9acec [core] Add min_partition_stats and max_partition_stats
columns to manifests system table (#4922)
98c9f9acec is described below
commit 98c9f9acec0d0e28c3115e7009997bd3d07b7c19
Author: tsreaper <[email protected]>
AuthorDate: Thu Jan 16 13:05:19 2025 +0800
[core] Add min_partition_stats and max_partition_stats columns to manifests
system table (#4922)
---
docs/content/concepts/system-tables.md | 42 +++++++++++-----------
.../apache/paimon/table/system/ManifestsTable.java | 32 ++++++++++++++---
.../paimon/table/system/ManifestsTableTest.java | 16 ++++++++-
3 files changed, 63 insertions(+), 27 deletions(-)
diff --git a/docs/content/concepts/system-tables.md
b/docs/content/concepts/system-tables.md
index c6fc768071..aea72c000a 100644
--- a/docs/content/concepts/system-tables.md
+++ b/docs/content/concepts/system-tables.md
@@ -279,45 +279,45 @@ You can query all manifest files contained in the latest
snapshot or the specifi
SELECT * FROM my_table$manifests;
/*
-+--------------------------------+-------------+------------------+-------------------+---------------+
-| file_name | file_size | num_added_files |
num_deleted_files | schema_id |
-+--------------------------------+-------------+------------------+-------------------+---------------+
-| manifest-f4dcab43-ef6b-4713... | 12365| 40 |
0 | 0 |
-| manifest-f4dcab43-ef6b-4713... | 1648 | 1 |
0 | 0 |
-+--------------------------------+-------------+------------------+-------------------+---------------+
++--------------------------------+-------------+------------------+-------------------+---------------+---------------------+---------------------+
+| file_name | file_size | num_added_files |
num_deleted_files | schema_id | min_partition_stats | max_partition_stats |
++--------------------------------+-------------+------------------+-------------------+---------------+---------------------+---------------------+
+| manifest-f4dcab43-ef6b-4713... | 12365| 40 |
0 | 0 | {20230315, 00} | {20230315, 20} |
+| manifest-f4dcab43-ef6b-4713... | 1648 | 1 |
0 | 0 | {20230115, 00} | {20230316, 23} |
++--------------------------------+-------------+------------------+-------------------+---------------+---------------------+---------------------+
2 rows in set
*/
-- You can also query the manifest with specified snapshot
SELECT * FROM my_table$manifests /*+ OPTIONS('scan.snapshot-id'='1') */;
/*
-+--------------------------------+-------------+------------------+-------------------+---------------+
-| file_name | file_size | num_added_files |
num_deleted_files | schema_id |
-+--------------------------------+-------------+------------------+-------------------+---------------+
-| manifest-f4dcab43-ef6b-4713... | 12365| 40 |
0 | 0 |
-+--------------------------------+-------------+------------------+-------------------+---------------+
++--------------------------------+-------------+------------------+-------------------+---------------+---------------------+---------------------+
+| file_name | file_size | num_added_files |
num_deleted_files | schema_id | min_partition_stats | max_partition_stats |
++--------------------------------+-------------+------------------+-------------------+---------------+---------------------+---------------------+
+| manifest-f4dcab43-ef6b-4713... | 12365| 40 |
0 | 0 | {20230315, 00} | {20230315, 20} |
++--------------------------------+-------------+------------------+-------------------+---------------+---------------------+---------------------+
1 rows in set
*/
- You can also query the manifest with specified tagName
SELECT * FROM my_table$manifests /*+ OPTIONS('scan.tag-name'='tag1') */;
/*
-+--------------------------------+-------------+------------------+-------------------+---------------+
-| file_name | file_size | num_added_files |
num_deleted_files | schema_id |
-+--------------------------------+-------------+------------------+-------------------+---------------+
-| manifest-f4dcab43-ef6b-4713... | 12365| 40 |
0 | 0 |
-+--------------------------------+-------------+------------------+-------------------+---------------+
++--------------------------------+-------------+------------------+-------------------+---------------+---------------------+---------------------+
+| file_name | file_size | num_added_files |
num_deleted_files | schema_id | min_partition_stats | max_partition_stats |
++--------------------------------+-------------+------------------+-------------------+---------------+---------------------+---------------------+
+| manifest-f4dcab43-ef6b-4713... | 12365| 40 |
0 | 0 | {20230315, 00} | {20230315, 20} |
++--------------------------------+-------------+------------------+-------------------+---------------+---------------------+---------------------+
1 rows in set
*/
- You can also query the manifest with specified timestamp in unix milliseconds
SELECT * FROM my_table$manifests /*+
OPTIONS('scan.timestamp-millis'='1678883047356') */;
/*
-+--------------------------------+-------------+------------------+-------------------+---------------+
-| file_name | file_size | num_added_files |
num_deleted_files | schema_id |
-+--------------------------------+-------------+------------------+-------------------+---------------+
-| manifest-f4dcab43-ef6b-4713... | 12365| 40 |
0 | 0 |
-+--------------------------------+-------------+------------------+-------------------+---------------+
++--------------------------------+-------------+------------------+-------------------+---------------+---------------------+---------------------+
+| file_name | file_size | num_added_files |
num_deleted_files | schema_id | min_partition_stats | max_partition_stats |
++--------------------------------+-------------+------------------+-------------------+---------------+---------------------+---------------------+
+| manifest-f4dcab43-ef6b-4713... | 12365| 40 |
0 | 0 | {20230315, 00} | {20230315, 20} |
++--------------------------------+-------------+------------------+-------------------+---------------+---------------------+---------------------+
1 rows in set
*/
```
diff --git
a/paimon-core/src/main/java/org/apache/paimon/table/system/ManifestsTable.java
b/paimon-core/src/main/java/org/apache/paimon/table/system/ManifestsTable.java
index d88636d02a..4b3ddf30ce 100644
---
a/paimon-core/src/main/java/org/apache/paimon/table/system/ManifestsTable.java
+++
b/paimon-core/src/main/java/org/apache/paimon/table/system/ManifestsTable.java
@@ -20,6 +20,8 @@ package org.apache.paimon.table.system;
import org.apache.paimon.CoreOptions;
import org.apache.paimon.Snapshot;
+import org.apache.paimon.casting.CastExecutor;
+import org.apache.paimon.casting.CastExecutors;
import org.apache.paimon.data.BinaryString;
import org.apache.paimon.data.GenericRow;
import org.apache.paimon.data.InternalRow;
@@ -64,7 +66,7 @@ public class ManifestsTable implements ReadonlyTable {
private static final Logger LOG =
LoggerFactory.getLogger(ManifestsTable.class);
- private static final long serialVersionUID = 1L;
+ private static final long serialVersionUID = 2L;
public static final String MANIFESTS = "manifests";
@@ -75,7 +77,15 @@ public class ManifestsTable implements ReadonlyTable {
new DataField(1, "file_size", new
BigIntType(false)),
new DataField(2, "num_added_files", new
BigIntType(false)),
new DataField(3, "num_deleted_files", new
BigIntType(false)),
- new DataField(4, "schema_id", new
BigIntType(false))));
+ new DataField(4, "schema_id", new
BigIntType(false)),
+ new DataField(
+ 5,
+ "min_partition_stats",
+ SerializationUtils.newStringType(true)),
+ new DataField(
+ 6,
+ "max_partition_stats",
+ SerializationUtils.newStringType(true))));
private final FileStoreTable dataTable;
@@ -176,8 +186,16 @@ public class ManifestsTable implements ReadonlyTable {
}
List<ManifestFileMeta> manifestFileMetas = allManifests(dataTable);
+ @SuppressWarnings("unchecked")
+ CastExecutor<InternalRow, BinaryString> partitionCastExecutor =
+ (CastExecutor<InternalRow, BinaryString>)
+ CastExecutors.resolveToString(
+ dataTable.schema().logicalPartitionType());
+
Iterator<InternalRow> rows =
- Iterators.transform(manifestFileMetas.iterator(),
this::toRow);
+ Iterators.transform(
+ manifestFileMetas.iterator(),
+ meta -> toRow(meta, partitionCastExecutor));
if (readType != null) {
rows =
Iterators.transform(
@@ -189,13 +207,17 @@ public class ManifestsTable implements ReadonlyTable {
return new IteratorRecordReader<>(rows);
}
- private InternalRow toRow(ManifestFileMeta manifestFileMeta) {
+ private InternalRow toRow(
+ ManifestFileMeta manifestFileMeta,
+ CastExecutor<InternalRow, BinaryString> partitionCastExecutor)
{
return GenericRow.of(
BinaryString.fromString(manifestFileMeta.fileName()),
manifestFileMeta.fileSize(),
manifestFileMeta.numAddedFiles(),
manifestFileMeta.numDeletedFiles(),
- manifestFileMeta.schemaId());
+ manifestFileMeta.schemaId(),
+
partitionCastExecutor.cast(manifestFileMeta.partitionStats().minValues()),
+
partitionCastExecutor.cast(manifestFileMeta.partitionStats().maxValues()));
}
}
diff --git
a/paimon-core/src/test/java/org/apache/paimon/table/system/ManifestsTableTest.java
b/paimon-core/src/test/java/org/apache/paimon/table/system/ManifestsTableTest.java
index a39e6f6fa8..f375dfd2c8 100644
---
a/paimon-core/src/test/java/org/apache/paimon/table/system/ManifestsTableTest.java
+++
b/paimon-core/src/test/java/org/apache/paimon/table/system/ManifestsTableTest.java
@@ -177,7 +177,21 @@ public class ManifestsTableTest extends TableTestBase {
manifestFileMeta.fileSize(),
manifestFileMeta.numAddedFiles(),
manifestFileMeta.numDeletedFiles(),
- manifestFileMeta.schemaId()));
+ manifestFileMeta.schemaId(),
+ BinaryString.fromString(
+ String.format(
+ "{%d}",
+ manifestFileMeta
+ .partitionStats()
+ .minValues()
+ .getInt(0))),
+ BinaryString.fromString(
+ String.format(
+ "{%d}",
+ manifestFileMeta
+ .partitionStats()
+ .maxValues()
+ .getInt(0)))));
}
return expectedRow;
}