This is an automated email from the ASF dual-hosted git repository.
lzljs3620320 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-paimon.git
The following commit(s) were added to refs/heads/master by this push:
new f9ed9bf36 [core] support sequence number for filesTable (#1768)
f9ed9bf36 is described below
commit f9ed9bf3650425ad871512671d3e0e8840f3918d
Author: GuojunLi <[email protected]>
AuthorDate: Wed Aug 9 16:08:23 2023 +0800
[core] support sequence number for filesTable (#1768)
---
docs/content/how-to/system-tables.md | 34 ++---
.../org/apache/paimon/table/system/FilesTable.java | 6 +-
.../apache/paimon/table/system/FilesTableTest.java | 170 +++++++++++++++++++++
3 files changed, 192 insertions(+), 18 deletions(-)
diff --git a/docs/content/how-to/system-tables.md
b/docs/content/how-to/system-tables.md
index f01bc4092..5db5ff9f8 100644
--- a/docs/content/how-to/system-tables.md
+++ b/docs/content/how-to/system-tables.md
@@ -138,16 +138,16 @@ You can query the files of the table with specific
snapshot.
SELECT * FROM MyTable$files;
/*
-+-----------+--------+--------------------------------+-------------+-----------+-------+--------------+--------------------+---------+---------+------------------------+-------------------------+-------------------------+-----------------------+
-| partition | bucket | file_path | file_format |
schema_id | level | record_count | file_size_in_bytes | min_key | max_key |
null_value_counts | min_value_stats | max_value_stats |
creation_time |
-+-----------+--------+--------------------------------+-------------+-----------+-------+--------------+--------------------+---------+---------+------------------------+-------------------------+-------------------------+-----------------------+
-| [3] | 0 | data-8f64af95-29cc-4342-adc... | orc |
0 | 0 | 1 | 593 | [c] | [c] | {cnt=0,
val=0, word=0} | {cnt=3, val=33, word=c} | {cnt=3, val=33, word=c}
|2023-02-24T16:06:21.166|
-| [2] | 0 | data-8b369068-0d37-4011-aa5... | orc |
0 | 0 | 1 | 593 | [b] | [b] | {cnt=0,
val=0, word=0} | {cnt=2, val=22, word=b} | {cnt=2, val=22, word=b}
|2023-02-24T16:06:21.166|
-| [2] | 0 | data-83aa7973-060b-40b6-8c8... | orc |
0 | 0 | 1 | 605 | [d] | [d] | {cnt=0,
val=0, word=0} | {cnt=2, val=32, word=d} | {cnt=2, val=32, word=d}
|2023-02-24T16:06:21.166|
-| [5] | 0 | data-3d304f4a-bcea-44dc-a13... | orc |
0 | 0 | 1 | 593 | [c] | [c] | {cnt=0,
val=0, word=0} | {cnt=5, val=51, word=c} | {cnt=5, val=51, word=c}
|2023-02-24T16:06:21.166|
-| [1] | 0 | data-10abb5bc-0170-43ae-b6a... | orc |
0 | 0 | 1 | 595 | [a] | [a] | {cnt=0,
val=0, word=0} | {cnt=1, val=11, word=a} | {cnt=1, val=11, word=a}
|2023-02-24T16:06:21.166|
-| [4] | 0 | data-2c9b7095-65b7-4013-a7a... | orc |
0 | 0 | 1 | 593 | [a] | [a] | {cnt=0,
val=0, word=0} | {cnt=4, val=12, word=a} | {cnt=4, val=12, word=a}
|2023-02-24T16:06:21.166|
-+-----------+--------+--------------------------------+-------------+-----------+-------+--------------+--------------------+---------+---------+------------------------+-------------------------+-------------------------+-----------------------+
++-----------+--------+--------------------------------+-------------+-----------+-------+--------------+--------------------+---------+---------+------------------------+-------------------------+-------------------------+---------------------+---------------------+-----------------------+
+| partition | bucket | file_path | file_format |
schema_id | level | record_count | file_size_in_bytes | min_key | max_key |
null_value_counts | min_value_stats | max_value_stats |
min_sequence_number | max_sequence_number | creation_time |
++-----------+--------+--------------------------------+-------------+-----------+-------+--------------+--------------------+---------+---------+------------------------+-------------------------+-------------------------+---------------------+---------------------+-----------------------+
+| [3] | 0 | data-8f64af95-29cc-4342-adc... | orc |
0 | 0 | 1 | 593 | [c] | [c] | {cnt=0,
val=0, word=0} | {cnt=3, val=33, word=c} | {cnt=3, val=33, word=c} |
1691551246234 | 1691551246637 |2023-02-24T16:06:21.166|
+| [2] | 0 | data-8b369068-0d37-4011-aa5... | orc |
0 | 0 | 1 | 593 | [b] | [b] | {cnt=0,
val=0, word=0} | {cnt=2, val=22, word=b} | {cnt=2, val=22, word=b} |
1691551246233 | 1691551246732 |2023-02-24T16:06:21.166|
+| [2] | 0 | data-83aa7973-060b-40b6-8c8... | orc |
0 | 0 | 1 | 605 | [d] | [d] | {cnt=0,
val=0, word=0} | {cnt=2, val=32, word=d} | {cnt=2, val=32, word=d} |
1691551246267 | 1691551246798 |2023-02-24T16:06:21.166|
+| [5] | 0 | data-3d304f4a-bcea-44dc-a13... | orc |
0 | 0 | 1 | 593 | [c] | [c] | {cnt=0,
val=0, word=0} | {cnt=5, val=51, word=c} | {cnt=5, val=51, word=c} |
1691551246788 | 1691551246152 |2023-02-24T16:06:21.166|
+| [1] | 0 | data-10abb5bc-0170-43ae-b6a... | orc |
0 | 0 | 1 | 595 | [a] | [a] | {cnt=0,
val=0, word=0} | {cnt=1, val=11, word=a} | {cnt=1, val=11, word=a} |
1691551246722 | 1691551246273 |2023-02-24T16:06:21.166|
+| [4] | 0 | data-2c9b7095-65b7-4013-a7a... | orc |
0 | 0 | 1 | 593 | [a] | [a] | {cnt=0,
val=0, word=0} | {cnt=4, val=12, word=a} | {cnt=4, val=12, word=a} |
1691551246321 | 1691551246109 |2023-02-24T16:06:21.166|
++-----------+--------+--------------------------------+-------------+-----------+-------+--------------+--------------------+---------+---------+------------------------+-------------------------+-------------------------+---------------------+---------------------+-----------------------+
6 rows in set
*/
@@ -155,13 +155,13 @@ SELECT * FROM MyTable$files;
SELECT * FROM MyTable$files /*+ OPTIONS('scan.snapshot-id'='1') */;
/*
-+-----------+--------+--------------------------------+-------------+-----------+-------+--------------+--------------------+---------+---------+------------------------+-------------------------+-------------------------+-----------------------+
-| partition | bucket | file_path | file_format |
schema_id | level | record_count | file_size_in_bytes | min_key | max_key |
null_value_counts | min_value_stats | max_value_stats |
creation_time |
-+-----------+--------+--------------------------------+-------------+-----------+-------+--------------+--------------------+---------+---------+------------------------+-------------------------+-------------------------+-----------------------+
-| [3] | 0 | data-8f64af95-29cc-4342-adc... | orc |
0 | 0 | 1 | 593 | [c] | [c] | {cnt=0,
val=0, word=0} | {cnt=3, val=33, word=c} | {cnt=3, val=33, word=c}
|2023-02-24T16:06:21.166|
-| [2] | 0 | data-8b369068-0d37-4011-aa5... | orc |
0 | 0 | 1 | 593 | [b] | [b] | {cnt=0,
val=0, word=0} | {cnt=2, val=22, word=b} | {cnt=2, val=22, word=b}
|2023-02-24T16:06:21.166|
-| [1] | 0 | data-10abb5bc-0170-43ae-b6a... | orc |
0 | 0 | 1 | 595 | [a] | [a] | {cnt=0,
val=0, word=0} | {cnt=1, val=11, word=a} | {cnt=1, val=11, word=a}
|2023-02-24T16:06:21.166|
-+-----------+--------+--------------------------------+-------------+-----------+-------+--------------+--------------------+---------+---------+------------------------+-------------------------+-------------------------+-----------------------+
++-----------+--------+--------------------------------+-------------+-----------+-------+--------------+--------------------+---------+---------+------------------------+-------------------------+-------------------------+---------------------+---------------------+-----------------------+
+| partition | bucket | file_path | file_format |
schema_id | level | record_count | file_size_in_bytes | min_key | max_key |
null_value_counts | min_value_stats | max_value_stats |
min_sequence_number | max_sequence_number | creation_time |
++-----------+--------+--------------------------------+-------------+-----------+-------+--------------+--------------------+---------+---------+------------------------+-------------------------+-------------------------+---------------------+---------------------+-----------------------+
+| [3] | 0 | data-8f64af95-29cc-4342-adc... | orc |
0 | 0 | 1 | 593 | [c] | [c] | {cnt=0,
val=0, word=0} | {cnt=3, val=33, word=c} | {cnt=3, val=33, word=c} |
1691551246234 | 1691551246637 |2023-02-24T16:06:21.166|
+| [2] | 0 | data-8b369068-0d37-4011-aa5... | orc |
0 | 0 | 1 | 593 | [b] | [b] | {cnt=0,
val=0, word=0} | {cnt=2, val=22, word=b} | {cnt=2, val=22, word=b} |
1691551246233 | 1691551246732 |2023-02-24T16:06:21.166|
+| [1] | 0 | data-10abb5bc-0170-43ae-b6a... | orc |
0 | 0 | 1 | 595 | [a] | [a] | {cnt=0,
val=0, word=0} | {cnt=1, val=11, word=a} | {cnt=1, val=11, word=a} |
1691551246267 | 1691551246798 |2023-02-24T16:06:21.166|
++-----------+--------+--------------------------------+-------------+-----------+-------+--------------+--------------------+---------+---------+------------------------+-------------------------+-------------------------+---------------------+---------------------+-----------------------+
3 rows in set
*/
```
diff --git
a/paimon-core/src/main/java/org/apache/paimon/table/system/FilesTable.java
b/paimon-core/src/main/java/org/apache/paimon/table/system/FilesTable.java
index c7a7e1ec7..f0508bda8 100644
--- a/paimon-core/src/main/java/org/apache/paimon/table/system/FilesTable.java
+++ b/paimon-core/src/main/java/org/apache/paimon/table/system/FilesTable.java
@@ -98,7 +98,9 @@ public class FilesTable implements ReadonlyTable {
11, "min_value_stats",
SerializationUtils.newStringType(false)),
new DataField(
12, "max_value_stats",
SerializationUtils.newStringType(false)),
- new DataField(13, "creation_time",
DataTypes.TIMESTAMP_MILLIS())));
+ new DataField(13, "min_sequence_number", new
BigIntType(true)),
+ new DataField(14, "max_sequence_number", new
BigIntType(true)),
+ new DataField(15, "creation_time",
DataTypes.TIMESTAMP_MILLIS())));
private final FileStoreTable storeTable;
@@ -337,6 +339,8 @@ public class FilesTable implements ReadonlyTable {
() ->
BinaryString.fromString(statsGetter.nullValueCounts().toString()),
() ->
BinaryString.fromString(statsGetter.lowerValueBounds().toString()),
() ->
BinaryString.fromString(statsGetter.upperValueBounds().toString()),
+ dataFileMeta::minSequenceNumber,
+ dataFileMeta::maxSequenceNumber,
dataFileMeta::creationTime
};
diff --git
a/paimon-core/src/test/java/org/apache/paimon/table/system/FilesTableTest.java
b/paimon-core/src/test/java/org/apache/paimon/table/system/FilesTableTest.java
new file mode 100644
index 000000000..da8730861
--- /dev/null
+++
b/paimon-core/src/test/java/org/apache/paimon/table/system/FilesTableTest.java
@@ -0,0 +1,170 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.paimon.table.system;
+
+import org.apache.paimon.CoreOptions;
+import org.apache.paimon.catalog.Catalog;
+import org.apache.paimon.catalog.Identifier;
+import org.apache.paimon.data.BinaryString;
+import org.apache.paimon.data.GenericRow;
+import org.apache.paimon.data.InternalRow;
+import org.apache.paimon.fs.FileIO;
+import org.apache.paimon.fs.Path;
+import org.apache.paimon.fs.local.LocalFileIO;
+import org.apache.paimon.io.DataFileMeta;
+import org.apache.paimon.manifest.FileKind;
+import org.apache.paimon.manifest.ManifestEntry;
+import org.apache.paimon.operation.FileStoreScan;
+import org.apache.paimon.schema.Schema;
+import org.apache.paimon.schema.SchemaManager;
+import org.apache.paimon.schema.SchemaUtils;
+import org.apache.paimon.schema.TableSchema;
+import org.apache.paimon.table.FileStoreTable;
+import org.apache.paimon.table.FileStoreTableFactory;
+import org.apache.paimon.table.TableTestBase;
+import org.apache.paimon.types.DataTypes;
+import org.apache.paimon.utils.SnapshotManager;
+
+import org.junit.jupiter.api.BeforeEach;
+import org.junit.jupiter.api.Test;
+
+import java.io.FileNotFoundException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.List;
+
+import static org.assertj.core.api.Assertions.assertThat;
+import static org.assertj.core.api.Assertions.assertThatThrownBy;
+
+/** Tests for {@link FilesTable}. */
+public class FilesTableTest extends TableTestBase {
+ private static final String tableName = "MyTable";
+
+ private FileStoreTable table;
+ private FileStoreScan scan;
+ private FilesTable filesTable;
+ private SnapshotManager snapshotManager;
+
+ @BeforeEach
+ public void before() throws Exception {
+ FileIO fileIO = LocalFileIO.create();
+ Path tablePath = new Path(String.format("%s/%s.db/%s", warehouse,
database, tableName));
+ Schema schema =
+ Schema.newBuilder()
+ .column("pk", DataTypes.INT())
+ .column("pt", DataTypes.INT())
+ .column("col1", DataTypes.INT())
+ .partitionKeys("pt")
+ .primaryKey("pk", "pt")
+ .option(CoreOptions.CHANGELOG_PRODUCER.key(), "input")
+ .option(CoreOptions.BUCKET.key(), "2")
+ .option(CoreOptions.SEQUENCE_FIELD.key(), "col1")
+ .build();
+ TableSchema tableSchema =
+ SchemaUtils.forceCommit(new SchemaManager(fileIO, tablePath),
schema);
+ table = FileStoreTableFactory.create(LocalFileIO.create(), tablePath,
tableSchema);
+ scan = table.store().newScan();
+
+ Identifier filesTableId =
+ identifier(tableName + Catalog.SYSTEM_TABLE_SPLITTER +
FilesTable.FILES);
+ filesTable = (FilesTable) catalog.getTable(filesTableId);
+ snapshotManager = new SnapshotManager(fileIO, tablePath);
+
+ // snapshot 1: append
+ write(table, GenericRow.of(1, 1, 1), GenericRow.of(1, 2, 5));
+
+ // snapshot 2: append
+ write(table, GenericRow.of(2, 1, 3), GenericRow.of(2, 2, 4));
+ }
+
+ @Test
+ public void testReadFilesFromLatest() throws Exception {
+ List<InternalRow> expectedRow = getExceptedResult(2L);
+ List<InternalRow> result = read(filesTable);
+ assertThat(result).containsExactlyInAnyOrderElementsOf(expectedRow);
+ }
+
+ @Test
+ public void testReadFilesFromSpecifiedSnapshot() throws Exception {
+ List<InternalRow> expectedRow = getExceptedResult(1L);
+ filesTable =
+ (FilesTable)
+ filesTable.copy(
+
Collections.singletonMap(CoreOptions.SCAN_SNAPSHOT_ID.key(), "1"));
+ List<InternalRow> result = read(filesTable);
+ assertThat(result).containsExactlyInAnyOrderElementsOf(expectedRow);
+ }
+
+ @Test
+ public void testReadFilesFromNotExistSnapshot() {
+ filesTable =
+ (FilesTable)
+ filesTable.copy(
+
Collections.singletonMap(CoreOptions.SCAN_SNAPSHOT_ID.key(), "3"));
+ assertThatThrownBy(() -> read(filesTable))
+ .hasRootCauseInstanceOf(FileNotFoundException.class);
+ }
+
+ private List<InternalRow> getExceptedResult(long snapshotId) {
+ if (!snapshotManager.snapshotExists(snapshotId)) {
+ return Collections.emptyList();
+ }
+
+ FileStoreScan.Plan plan = scan.withSnapshot(snapshotId).plan();
+
+ List<ManifestEntry> files = plan.files(FileKind.ADD);
+
+ List<InternalRow> expectedRow = new ArrayList<>();
+ for (ManifestEntry fileEntry : files) {
+ String partition = String.valueOf(fileEntry.partition().getInt(0));
+ DataFileMeta file = fileEntry.file();
+ String minKey = String.valueOf(file.minKey().getInt(0));
+ String maxKey = String.valueOf(file.maxKey().getInt(0));
+ String minSequenceNumber =
String.valueOf(file.minSequenceNumber());
+ String maxSequenceNumber =
String.valueOf(file.maxSequenceNumber());
+ expectedRow.add(
+ GenericRow.of(
+ BinaryString.fromString(Arrays.toString(new
String[] {partition})),
+ fileEntry.bucket(),
+ BinaryString.fromString(file.fileName()),
+ BinaryString.fromString("orc"),
+ file.schemaId(),
+ file.level(),
+ file.rowCount(),
+ file.fileSize(),
+ BinaryString.fromString(Arrays.toString(new
String[] {minKey})),
+ BinaryString.fromString(Arrays.toString(new
String[] {maxKey})),
+ BinaryString.fromString(
+ String.format("{col1=%s, pk=%s, pt=%s}",
0, 0, 0)),
+ BinaryString.fromString(
+ String.format(
+ "{col1=%s, pk=%s, pt=%s}",
+ minSequenceNumber, minKey,
partition)),
+ BinaryString.fromString(
+ String.format(
+ "{col1=%s, pk=%s, pt=%s}",
+ maxSequenceNumber, maxKey,
partition)),
+ file.minSequenceNumber(),
+ file.maxSequenceNumber(),
+ file.creationTime()));
+ }
+ return expectedRow;
+ }
+}