This is an automated email from the ASF dual-hosted git repository.

lzljs3620320 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-paimon.git


The following commit(s) were added to refs/heads/master by this push:
     new f9ed9bf36 [core] support sequence number for filesTable (#1768)
f9ed9bf36 is described below

commit f9ed9bf3650425ad871512671d3e0e8840f3918d
Author: GuojunLi <[email protected]>
AuthorDate: Wed Aug 9 16:08:23 2023 +0800

    [core] support sequence number for filesTable (#1768)
---
 docs/content/how-to/system-tables.md               |  34 ++---
 .../org/apache/paimon/table/system/FilesTable.java |   6 +-
 .../apache/paimon/table/system/FilesTableTest.java | 170 +++++++++++++++++++++
 3 files changed, 192 insertions(+), 18 deletions(-)

diff --git a/docs/content/how-to/system-tables.md 
b/docs/content/how-to/system-tables.md
index f01bc4092..5db5ff9f8 100644
--- a/docs/content/how-to/system-tables.md
+++ b/docs/content/how-to/system-tables.md
@@ -138,16 +138,16 @@ You can query the files of the table with specific 
snapshot.
 SELECT * FROM MyTable$files;
 
 /*
-+-----------+--------+--------------------------------+-------------+-----------+-------+--------------+--------------------+---------+---------+------------------------+-------------------------+-------------------------+-----------------------+
-| partition | bucket |                      file_path | file_format | 
schema_id | level | record_count | file_size_in_bytes | min_key | max_key |     
 null_value_counts |         min_value_stats |         max_value_stats |        
 creation_time |
-+-----------+--------+--------------------------------+-------------+-----------+-------+--------------+--------------------+---------+---------+------------------------+-------------------------+-------------------------+-----------------------+
-|       [3] |      0 | data-8f64af95-29cc-4342-adc... |         orc |         
0 |     0 |            1 |                593 |     [c] |     [c] | {cnt=0, 
val=0, word=0} | {cnt=3, val=33, word=c} | {cnt=3, val=33, word=c} 
|2023-02-24T16:06:21.166|
-|       [2] |      0 | data-8b369068-0d37-4011-aa5... |         orc |         
0 |     0 |            1 |                593 |     [b] |     [b] | {cnt=0, 
val=0, word=0} | {cnt=2, val=22, word=b} | {cnt=2, val=22, word=b} 
|2023-02-24T16:06:21.166|
-|       [2] |      0 | data-83aa7973-060b-40b6-8c8... |         orc |         
0 |     0 |            1 |                605 |     [d] |     [d] | {cnt=0, 
val=0, word=0} | {cnt=2, val=32, word=d} | {cnt=2, val=32, word=d} 
|2023-02-24T16:06:21.166|
-|       [5] |      0 | data-3d304f4a-bcea-44dc-a13... |         orc |         
0 |     0 |            1 |                593 |     [c] |     [c] | {cnt=0, 
val=0, word=0} | {cnt=5, val=51, word=c} | {cnt=5, val=51, word=c} 
|2023-02-24T16:06:21.166|
-|       [1] |      0 | data-10abb5bc-0170-43ae-b6a... |         orc |         
0 |     0 |            1 |                595 |     [a] |     [a] | {cnt=0, 
val=0, word=0} | {cnt=1, val=11, word=a} | {cnt=1, val=11, word=a} 
|2023-02-24T16:06:21.166|
-|       [4] |      0 | data-2c9b7095-65b7-4013-a7a... |         orc |         
0 |     0 |            1 |                593 |     [a] |     [a] | {cnt=0, 
val=0, word=0} | {cnt=4, val=12, word=a} | {cnt=4, val=12, word=a} 
|2023-02-24T16:06:21.166|
-+-----------+--------+--------------------------------+-------------+-----------+-------+--------------+--------------------+---------+---------+------------------------+-------------------------+-------------------------+-----------------------+
++-----------+--------+--------------------------------+-------------+-----------+-------+--------------+--------------------+---------+---------+------------------------+-------------------------+-------------------------+---------------------+---------------------+-----------------------+
+| partition | bucket |                      file_path | file_format | 
schema_id | level | record_count | file_size_in_bytes | min_key | max_key |     
 null_value_counts |         min_value_stats |         max_value_stats | 
min_sequence_number | max_sequence_number |         creation_time |
++-----------+--------+--------------------------------+-------------+-----------+-------+--------------+--------------------+---------+---------+------------------------+-------------------------+-------------------------+---------------------+---------------------+-----------------------+
+|       [3] |      0 | data-8f64af95-29cc-4342-adc... |         orc |         
0 |     0 |            1 |                593 |     [c] |     [c] | {cnt=0, 
val=0, word=0} | {cnt=3, val=33, word=c} | {cnt=3, val=33, word=c} |       
1691551246234 |       1691551246637 |2023-02-24T16:06:21.166|
+|       [2] |      0 | data-8b369068-0d37-4011-aa5... |         orc |         
0 |     0 |            1 |                593 |     [b] |     [b] | {cnt=0, 
val=0, word=0} | {cnt=2, val=22, word=b} | {cnt=2, val=22, word=b} |       
1691551246233 |       1691551246732 |2023-02-24T16:06:21.166|
+|       [2] |      0 | data-83aa7973-060b-40b6-8c8... |         orc |         
0 |     0 |            1 |                605 |     [d] |     [d] | {cnt=0, 
val=0, word=0} | {cnt=2, val=32, word=d} | {cnt=2, val=32, word=d} |       
1691551246267 |       1691551246798 |2023-02-24T16:06:21.166|
+|       [5] |      0 | data-3d304f4a-bcea-44dc-a13... |         orc |         
0 |     0 |            1 |                593 |     [c] |     [c] | {cnt=0, 
val=0, word=0} | {cnt=5, val=51, word=c} | {cnt=5, val=51, word=c} |       
1691551246788 |       1691551246152 |2023-02-24T16:06:21.166|
+|       [1] |      0 | data-10abb5bc-0170-43ae-b6a... |         orc |         
0 |     0 |            1 |                595 |     [a] |     [a] | {cnt=0, 
val=0, word=0} | {cnt=1, val=11, word=a} | {cnt=1, val=11, word=a} |       
1691551246722 |       1691551246273 |2023-02-24T16:06:21.166|
+|       [4] |      0 | data-2c9b7095-65b7-4013-a7a... |         orc |         
0 |     0 |            1 |                593 |     [a] |     [a] | {cnt=0, 
val=0, word=0} | {cnt=4, val=12, word=a} | {cnt=4, val=12, word=a} |       
1691551246321 |       1691551246109 |2023-02-24T16:06:21.166|
++-----------+--------+--------------------------------+-------------+-----------+-------+--------------+--------------------+---------+---------+------------------------+-------------------------+-------------------------+---------------------+---------------------+-----------------------+
 6 rows in set
 */
 
@@ -155,13 +155,13 @@ SELECT * FROM MyTable$files;
 SELECT * FROM MyTable$files /*+ OPTIONS('scan.snapshot-id'='1') */;
 
 /*
-+-----------+--------+--------------------------------+-------------+-----------+-------+--------------+--------------------+---------+---------+------------------------+-------------------------+-------------------------+-----------------------+
-| partition | bucket |                      file_path | file_format | 
schema_id | level | record_count | file_size_in_bytes | min_key | max_key |     
 null_value_counts |         min_value_stats |         max_value_stats |        
 creation_time |
-+-----------+--------+--------------------------------+-------------+-----------+-------+--------------+--------------------+---------+---------+------------------------+-------------------------+-------------------------+-----------------------+
-|       [3] |      0 | data-8f64af95-29cc-4342-adc... |         orc |         
0 |     0 |            1 |                593 |     [c] |     [c] | {cnt=0, 
val=0, word=0} | {cnt=3, val=33, word=c} | {cnt=3, val=33, word=c} 
|2023-02-24T16:06:21.166|
-|       [2] |      0 | data-8b369068-0d37-4011-aa5... |         orc |         
0 |     0 |            1 |                593 |     [b] |     [b] | {cnt=0, 
val=0, word=0} | {cnt=2, val=22, word=b} | {cnt=2, val=22, word=b} 
|2023-02-24T16:06:21.166|
-|       [1] |      0 | data-10abb5bc-0170-43ae-b6a... |         orc |         
0 |     0 |            1 |                595 |     [a] |     [a] | {cnt=0, 
val=0, word=0} | {cnt=1, val=11, word=a} | {cnt=1, val=11, word=a} 
|2023-02-24T16:06:21.166|
-+-----------+--------+--------------------------------+-------------+-----------+-------+--------------+--------------------+---------+---------+------------------------+-------------------------+-------------------------+-----------------------+
++-----------+--------+--------------------------------+-------------+-----------+-------+--------------+--------------------+---------+---------+------------------------+-------------------------+-------------------------+---------------------+---------------------+-----------------------+
+| partition | bucket |                      file_path | file_format | 
schema_id | level | record_count | file_size_in_bytes | min_key | max_key |     
 null_value_counts |         min_value_stats |         max_value_stats | 
min_sequence_number | max_sequence_number |         creation_time |
++-----------+--------+--------------------------------+-------------+-----------+-------+--------------+--------------------+---------+---------+------------------------+-------------------------+-------------------------+---------------------+---------------------+-----------------------+
+|       [3] |      0 | data-8f64af95-29cc-4342-adc... |         orc |         
0 |     0 |            1 |                593 |     [c] |     [c] | {cnt=0, 
val=0, word=0} | {cnt=3, val=33, word=c} | {cnt=3, val=33, word=c} |       
1691551246234 |       1691551246637 |2023-02-24T16:06:21.166|
+|       [2] |      0 | data-8b369068-0d37-4011-aa5... |         orc |         
0 |     0 |            1 |                593 |     [b] |     [b] | {cnt=0, 
val=0, word=0} | {cnt=2, val=22, word=b} | {cnt=2, val=22, word=b} |       
1691551246233 |       1691551246732 |2023-02-24T16:06:21.166|
+|       [1] |      0 | data-10abb5bc-0170-43ae-b6a... |         orc |         
0 |     0 |            1 |                595 |     [a] |     [a] | {cnt=0, 
val=0, word=0} | {cnt=1, val=11, word=a} | {cnt=1, val=11, word=a} |       
1691551246267 |       1691551246798 |2023-02-24T16:06:21.166|
++-----------+--------+--------------------------------+-------------+-----------+-------+--------------+--------------------+---------+---------+------------------------+-------------------------+-------------------------+---------------------+---------------------+-----------------------+
 3 rows in set
 */
 ```
diff --git 
a/paimon-core/src/main/java/org/apache/paimon/table/system/FilesTable.java 
b/paimon-core/src/main/java/org/apache/paimon/table/system/FilesTable.java
index c7a7e1ec7..f0508bda8 100644
--- a/paimon-core/src/main/java/org/apache/paimon/table/system/FilesTable.java
+++ b/paimon-core/src/main/java/org/apache/paimon/table/system/FilesTable.java
@@ -98,7 +98,9 @@ public class FilesTable implements ReadonlyTable {
                                     11, "min_value_stats", 
SerializationUtils.newStringType(false)),
                             new DataField(
                                     12, "max_value_stats", 
SerializationUtils.newStringType(false)),
-                            new DataField(13, "creation_time", 
DataTypes.TIMESTAMP_MILLIS())));
+                            new DataField(13, "min_sequence_number", new 
BigIntType(true)),
+                            new DataField(14, "max_sequence_number", new 
BigIntType(true)),
+                            new DataField(15, "creation_time", 
DataTypes.TIMESTAMP_MILLIS())));
 
     private final FileStoreTable storeTable;
 
@@ -337,6 +339,8 @@ public class FilesTable implements ReadonlyTable {
                         () -> 
BinaryString.fromString(statsGetter.nullValueCounts().toString()),
                         () -> 
BinaryString.fromString(statsGetter.lowerValueBounds().toString()),
                         () -> 
BinaryString.fromString(statsGetter.upperValueBounds().toString()),
+                        dataFileMeta::minSequenceNumber,
+                        dataFileMeta::maxSequenceNumber,
                         dataFileMeta::creationTime
                     };
 
diff --git 
a/paimon-core/src/test/java/org/apache/paimon/table/system/FilesTableTest.java 
b/paimon-core/src/test/java/org/apache/paimon/table/system/FilesTableTest.java
new file mode 100644
index 000000000..da8730861
--- /dev/null
+++ 
b/paimon-core/src/test/java/org/apache/paimon/table/system/FilesTableTest.java
@@ -0,0 +1,170 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.paimon.table.system;
+
+import org.apache.paimon.CoreOptions;
+import org.apache.paimon.catalog.Catalog;
+import org.apache.paimon.catalog.Identifier;
+import org.apache.paimon.data.BinaryString;
+import org.apache.paimon.data.GenericRow;
+import org.apache.paimon.data.InternalRow;
+import org.apache.paimon.fs.FileIO;
+import org.apache.paimon.fs.Path;
+import org.apache.paimon.fs.local.LocalFileIO;
+import org.apache.paimon.io.DataFileMeta;
+import org.apache.paimon.manifest.FileKind;
+import org.apache.paimon.manifest.ManifestEntry;
+import org.apache.paimon.operation.FileStoreScan;
+import org.apache.paimon.schema.Schema;
+import org.apache.paimon.schema.SchemaManager;
+import org.apache.paimon.schema.SchemaUtils;
+import org.apache.paimon.schema.TableSchema;
+import org.apache.paimon.table.FileStoreTable;
+import org.apache.paimon.table.FileStoreTableFactory;
+import org.apache.paimon.table.TableTestBase;
+import org.apache.paimon.types.DataTypes;
+import org.apache.paimon.utils.SnapshotManager;
+
+import org.junit.jupiter.api.BeforeEach;
+import org.junit.jupiter.api.Test;
+
+import java.io.FileNotFoundException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.List;
+
+import static org.assertj.core.api.Assertions.assertThat;
+import static org.assertj.core.api.Assertions.assertThatThrownBy;
+
+/** Tests for {@link FilesTable}. */
+public class FilesTableTest extends TableTestBase {
+    private static final String tableName = "MyTable";
+
+    private FileStoreTable table;
+    private FileStoreScan scan;
+    private FilesTable filesTable;
+    private SnapshotManager snapshotManager;
+
+    @BeforeEach
+    public void before() throws Exception {
+        FileIO fileIO = LocalFileIO.create();
+        Path tablePath = new Path(String.format("%s/%s.db/%s", warehouse, 
database, tableName));
+        Schema schema =
+                Schema.newBuilder()
+                        .column("pk", DataTypes.INT())
+                        .column("pt", DataTypes.INT())
+                        .column("col1", DataTypes.INT())
+                        .partitionKeys("pt")
+                        .primaryKey("pk", "pt")
+                        .option(CoreOptions.CHANGELOG_PRODUCER.key(), "input")
+                        .option(CoreOptions.BUCKET.key(), "2")
+                        .option(CoreOptions.SEQUENCE_FIELD.key(), "col1")
+                        .build();
+        TableSchema tableSchema =
+                SchemaUtils.forceCommit(new SchemaManager(fileIO, tablePath), 
schema);
+        table = FileStoreTableFactory.create(LocalFileIO.create(), tablePath, 
tableSchema);
+        scan = table.store().newScan();
+
+        Identifier filesTableId =
+                identifier(tableName + Catalog.SYSTEM_TABLE_SPLITTER + 
FilesTable.FILES);
+        filesTable = (FilesTable) catalog.getTable(filesTableId);
+        snapshotManager = new SnapshotManager(fileIO, tablePath);
+
+        // snapshot 1: append
+        write(table, GenericRow.of(1, 1, 1), GenericRow.of(1, 2, 5));
+
+        // snapshot 2: append
+        write(table, GenericRow.of(2, 1, 3), GenericRow.of(2, 2, 4));
+    }
+
+    @Test
+    public void testReadFilesFromLatest() throws Exception {
+        List<InternalRow> expectedRow = getExceptedResult(2L);
+        List<InternalRow> result = read(filesTable);
+        assertThat(result).containsExactlyInAnyOrderElementsOf(expectedRow);
+    }
+
+    @Test
+    public void testReadFilesFromSpecifiedSnapshot() throws Exception {
+        List<InternalRow> expectedRow = getExceptedResult(1L);
+        filesTable =
+                (FilesTable)
+                        filesTable.copy(
+                                
Collections.singletonMap(CoreOptions.SCAN_SNAPSHOT_ID.key(), "1"));
+        List<InternalRow> result = read(filesTable);
+        assertThat(result).containsExactlyInAnyOrderElementsOf(expectedRow);
+    }
+
+    @Test
+    public void testReadFilesFromNotExistSnapshot() {
+        filesTable =
+                (FilesTable)
+                        filesTable.copy(
+                                
Collections.singletonMap(CoreOptions.SCAN_SNAPSHOT_ID.key(), "3"));
+        assertThatThrownBy(() -> read(filesTable))
+                .hasRootCauseInstanceOf(FileNotFoundException.class);
+    }
+
+    private List<InternalRow> getExceptedResult(long snapshotId) {
+        if (!snapshotManager.snapshotExists(snapshotId)) {
+            return Collections.emptyList();
+        }
+
+        FileStoreScan.Plan plan = scan.withSnapshot(snapshotId).plan();
+
+        List<ManifestEntry> files = plan.files(FileKind.ADD);
+
+        List<InternalRow> expectedRow = new ArrayList<>();
+        for (ManifestEntry fileEntry : files) {
+            String partition = String.valueOf(fileEntry.partition().getInt(0));
+            DataFileMeta file = fileEntry.file();
+            String minKey = String.valueOf(file.minKey().getInt(0));
+            String maxKey = String.valueOf(file.maxKey().getInt(0));
+            String minSequenceNumber = 
String.valueOf(file.minSequenceNumber());
+            String maxSequenceNumber = 
String.valueOf(file.maxSequenceNumber());
+            expectedRow.add(
+                    GenericRow.of(
+                            BinaryString.fromString(Arrays.toString(new 
String[] {partition})),
+                            fileEntry.bucket(),
+                            BinaryString.fromString(file.fileName()),
+                            BinaryString.fromString("orc"),
+                            file.schemaId(),
+                            file.level(),
+                            file.rowCount(),
+                            file.fileSize(),
+                            BinaryString.fromString(Arrays.toString(new 
String[] {minKey})),
+                            BinaryString.fromString(Arrays.toString(new 
String[] {maxKey})),
+                            BinaryString.fromString(
+                                    String.format("{col1=%s, pk=%s, pt=%s}", 
0, 0, 0)),
+                            BinaryString.fromString(
+                                    String.format(
+                                            "{col1=%s, pk=%s, pt=%s}",
+                                            minSequenceNumber, minKey, 
partition)),
+                            BinaryString.fromString(
+                                    String.format(
+                                            "{col1=%s, pk=%s, pt=%s}",
+                                            maxSequenceNumber, maxKey, 
partition)),
+                            file.minSequenceNumber(),
+                            file.maxSequenceNumber(),
+                            file.creationTime()));
+        }
+        return expectedRow;
+    }
+}

Reply via email to