This is an automated email from the ASF dual-hosted git repository.
lzljs3620320 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-paimon.git
The following commit(s) were added to refs/heads/master by this push:
new cc3f8b245 [core] Introduce FileMonitorTable to stream read file
changes (#2101)
cc3f8b245 is described below
commit cc3f8b24542bd02bb497ad57c7e64a99fe06906f
Author: Jingsong Lee <[email protected]>
AuthorDate: Tue Oct 10 10:02:47 2023 +0800
[core] Introduce FileMonitorTable to stream read file changes (#2101)
---
.../main/java/org/apache/paimon/CoreOptions.java | 41 +--
.../AppendOnlyTableCompactionCoordinator.java | 4 +-
.../table/source/AbstractInnerTableScan.java | 22 +-
.../table/source/InnerStreamTableScanImpl.java | 19 +-
...UpScanner.java => AllDeltaFollowUpScanner.java} | 19 +-
.../table/source/snapshot/FollowUpScanner.java | 2 +-
.../table/source/snapshot/SnapshotReader.java | 4 +-
.../table/source/snapshot/SnapshotReaderImpl.java | 10 +-
.../apache/paimon/table/system/AuditLogTable.java | 4 +-
.../paimon/table/system/FileMonitorTable.java | 282 +++++++++++++++++++++
.../table/ChangelogWithKeyFileStoreTableTest.java | 80 ++++++
.../flink/source/CompactorSourceBuilder.java | 4 +-
.../flink/utils/MultiTablesCompactorUtil.java | 4 +-
13 files changed, 416 insertions(+), 79 deletions(-)
diff --git a/paimon-common/src/main/java/org/apache/paimon/CoreOptions.java
b/paimon-common/src/main/java/org/apache/paimon/CoreOptions.java
index 7e1f8498b..607772001 100644
--- a/paimon-common/src/main/java/org/apache/paimon/CoreOptions.java
+++ b/paimon-common/src/main/java/org/apache/paimon/CoreOptions.java
@@ -671,12 +671,13 @@ public class CoreOptions implements Serializable {
"Full compaction will be constantly triggered
after delta commits.");
@ExcludeFromDocumentation("Internal use only")
- public static final ConfigOption<StreamingCompactionType>
STREAMING_COMPACT =
- key("streaming-compact")
- .enumType(StreamingCompactionType.class)
- .defaultValue(StreamingCompactionType.NONE)
+ public static final ConfigOption<StreamScanMode> STREAM_SCAN_MODE =
+ key("stream-scan-mode")
+ .enumType(StreamScanMode.class)
+ .defaultValue(StreamScanMode.NONE)
.withDescription(
- "Only used to force TableScan to construct
suitable 'StartingUpScanner' and 'FollowUpScanner' dedicated streaming
compaction job.");
+ "Only used to force TableScan to construct
suitable 'StartingUpScanner' and 'FollowUpScanner' "
+ + "dedicated internal streaming scan.");
public static final ConfigOption<StreamingReadMode> STREAMING_READ_MODE =
key("streaming-read-mode")
@@ -1615,16 +1616,18 @@ public class CoreOptions implements Serializable {
}
}
- /** Compaction type when trigger a compaction action. */
- public enum StreamingCompactionType implements DescribedEnum {
- NONE("none", "Not a streaming compaction."),
- NORMAL("normal", "Compaction for traditional bucket table."),
- BUCKET_UNAWARE("unaware", "Compaction for unaware bucket table.");
+ /** Inner stream scan mode for some internal requirements. */
+ public enum StreamScanMode implements DescribedEnum {
+ NONE("none", "No requirement."),
+ COMPACT_BUCKET_TABLE("compact-bucket-table", "Compaction for
traditional bucket table."),
+ COMPACT_APPEND_NO_BUCKET(
+ "compact-append-no-bucket", "Compaction for append table with
bucket unaware."),
+ FILE_MONITOR("file-monitor", "Monitor data file changes.");
private final String value;
private final String description;
- StreamingCompactionType(String value, String description) {
+ StreamScanMode(String value, String description) {
this.value = value;
this.description = description;
}
@@ -1642,22 +1645,6 @@ public class CoreOptions implements Serializable {
public String getValue() {
return value;
}
-
- @VisibleForTesting
- public static StreamingCompactionType fromValue(String value) {
- for (StreamingCompactionType formatType :
StreamingCompactionType.values()) {
- if (formatType.value.equals(value)) {
- return formatType;
- }
- }
- throw new IllegalArgumentException(
- String.format(
- "Invalid format type %s, only support [%s]",
- value,
- StringUtils.join(
-
Arrays.stream(StreamingCompactionType.values()).iterator(),
- ",")));
- }
}
/** Specifies this scan type for incremental scan . */
diff --git
a/paimon-core/src/main/java/org/apache/paimon/append/AppendOnlyTableCompactionCoordinator.java
b/paimon-core/src/main/java/org/apache/paimon/append/AppendOnlyTableCompactionCoordinator.java
index cd284af7f..62a238419 100644
---
a/paimon-core/src/main/java/org/apache/paimon/append/AppendOnlyTableCompactionCoordinator.java
+++
b/paimon-core/src/main/java/org/apache/paimon/append/AppendOnlyTableCompactionCoordinator.java
@@ -172,8 +172,8 @@ public class AppendOnlyTableCompactionCoordinator {
return new HashMap<String, String>() {
{
put(
- CoreOptions.STREAMING_COMPACT.key(),
-
CoreOptions.StreamingCompactionType.BUCKET_UNAWARE.getValue());
+ CoreOptions.STREAM_SCAN_MODE.key(),
+
CoreOptions.StreamScanMode.COMPACT_APPEND_NO_BUCKET.getValue());
}
};
}
diff --git
a/paimon-core/src/main/java/org/apache/paimon/table/source/AbstractInnerTableScan.java
b/paimon-core/src/main/java/org/apache/paimon/table/source/AbstractInnerTableScan.java
index e90d59612..8df089b0b 100644
---
a/paimon-core/src/main/java/org/apache/paimon/table/source/AbstractInnerTableScan.java
+++
b/paimon-core/src/main/java/org/apache/paimon/table/source/AbstractInnerTableScan.java
@@ -79,20 +79,16 @@ public abstract class AbstractInnerTableScan implements
InnerTableScan {
protected StartingScanner createStartingScanner(boolean isStreaming) {
SnapshotManager snapshotManager = snapshotReader.snapshotManager();
- CoreOptions.StreamingCompactionType type =
- options.toConfiguration().get(CoreOptions.STREAMING_COMPACT);
+ CoreOptions.StreamScanMode type =
+ options.toConfiguration().get(CoreOptions.STREAM_SCAN_MODE);
switch (type) {
- case NORMAL:
- {
- checkArgument(
- isStreaming,
- "Set 'streaming-compact' in batch mode. This is
unexpected.");
- return new
ContinuousCompactorStartingScanner(snapshotManager);
- }
- case BUCKET_UNAWARE:
- {
- return new FullStartingScanner(snapshotManager);
- }
+ case COMPACT_BUCKET_TABLE:
+ checkArgument(
+ isStreaming, "Set 'streaming-compact' in batch mode.
This is unexpected.");
+ return new ContinuousCompactorStartingScanner(snapshotManager);
+ case COMPACT_APPEND_NO_BUCKET:
+ case FILE_MONITOR:
+ return new FullStartingScanner(snapshotManager);
}
// read from consumer id
diff --git
a/paimon-core/src/main/java/org/apache/paimon/table/source/InnerStreamTableScanImpl.java
b/paimon-core/src/main/java/org/apache/paimon/table/source/InnerStreamTableScanImpl.java
index 1de22a644..ca4bd8a8d 100644
---
a/paimon-core/src/main/java/org/apache/paimon/table/source/InnerStreamTableScanImpl.java
+++
b/paimon-core/src/main/java/org/apache/paimon/table/source/InnerStreamTableScanImpl.java
@@ -23,6 +23,7 @@ import org.apache.paimon.Snapshot;
import org.apache.paimon.consumer.Consumer;
import org.apache.paimon.operation.DefaultValueAssigner;
import org.apache.paimon.predicate.Predicate;
+import org.apache.paimon.table.source.snapshot.AllDeltaFollowUpScanner;
import org.apache.paimon.table.source.snapshot.BoundedChecker;
import
org.apache.paimon.table.source.snapshot.CompactionChangelogFollowUpScanner;
import
org.apache.paimon.table.source.snapshot.ContinuousAppendAndCompactFollowUpScanner;
@@ -185,17 +186,15 @@ public class InnerStreamTableScanImpl extends
AbstractInnerTableScan
}
private FollowUpScanner createFollowUpScanner() {
- CoreOptions.StreamingCompactionType type =
- options.toConfiguration().get(CoreOptions.STREAMING_COMPACT);
+ CoreOptions.StreamScanMode type =
+ options.toConfiguration().get(CoreOptions.STREAM_SCAN_MODE);
switch (type) {
- case NORMAL:
- {
- return new ContinuousCompactorFollowUpScanner();
- }
- case BUCKET_UNAWARE:
- {
- return new ContinuousAppendAndCompactFollowUpScanner();
- }
+ case COMPACT_BUCKET_TABLE:
+ return new ContinuousCompactorFollowUpScanner();
+ case COMPACT_APPEND_NO_BUCKET:
+ return new ContinuousAppendAndCompactFollowUpScanner();
+ case FILE_MONITOR:
+ return new AllDeltaFollowUpScanner();
}
CoreOptions.ChangelogProducer changelogProducer =
options.changelogProducer();
diff --git
a/paimon-core/src/main/java/org/apache/paimon/table/source/snapshot/FollowUpScanner.java
b/paimon-core/src/main/java/org/apache/paimon/table/source/snapshot/AllDeltaFollowUpScanner.java
similarity index 63%
copy from
paimon-core/src/main/java/org/apache/paimon/table/source/snapshot/FollowUpScanner.java
copy to
paimon-core/src/main/java/org/apache/paimon/table/source/snapshot/AllDeltaFollowUpScanner.java
index c7e0a24a8..8cafc4b6f 100644
---
a/paimon-core/src/main/java/org/apache/paimon/table/source/snapshot/FollowUpScanner.java
+++
b/paimon-core/src/main/java/org/apache/paimon/table/source/snapshot/AllDeltaFollowUpScanner.java
@@ -19,17 +19,18 @@
package org.apache.paimon.table.source.snapshot;
import org.apache.paimon.Snapshot;
-import org.apache.paimon.table.source.StreamTableScan;
-import org.apache.paimon.table.source.snapshot.SnapshotReader.Plan;
+import org.apache.paimon.table.source.ScanMode;
-/** Helper class for the follow-up planning of {@link StreamTableScan}. */
-public interface FollowUpScanner {
+/** {@link FollowUpScanner} for read all file changes. */
+public class AllDeltaFollowUpScanner implements FollowUpScanner {
- boolean shouldScanSnapshot(Snapshot snapshot);
-
- Plan scan(long snapshotId, SnapshotReader snapshotReader);
+ @Override
+ public boolean shouldScanSnapshot(Snapshot snapshot) {
+ return true;
+ }
- default Plan getOverwriteChangesPlan(long snapshotId, SnapshotReader
snapshotReader) {
- return
snapshotReader.withSnapshot(snapshotId).readOverwrittenChanges();
+ @Override
+ public SnapshotReader.Plan scan(long snapshotId, SnapshotReader
snapshotReader) {
+ return
snapshotReader.withMode(ScanMode.DELTA).withSnapshot(snapshotId).readChanges();
}
}
diff --git
a/paimon-core/src/main/java/org/apache/paimon/table/source/snapshot/FollowUpScanner.java
b/paimon-core/src/main/java/org/apache/paimon/table/source/snapshot/FollowUpScanner.java
index c7e0a24a8..ebdaa2fbb 100644
---
a/paimon-core/src/main/java/org/apache/paimon/table/source/snapshot/FollowUpScanner.java
+++
b/paimon-core/src/main/java/org/apache/paimon/table/source/snapshot/FollowUpScanner.java
@@ -30,6 +30,6 @@ public interface FollowUpScanner {
Plan scan(long snapshotId, SnapshotReader snapshotReader);
default Plan getOverwriteChangesPlan(long snapshotId, SnapshotReader
snapshotReader) {
- return
snapshotReader.withSnapshot(snapshotId).readOverwrittenChanges();
+ return snapshotReader.withSnapshot(snapshotId).readChanges();
}
}
diff --git
a/paimon-core/src/main/java/org/apache/paimon/table/source/snapshot/SnapshotReader.java
b/paimon-core/src/main/java/org/apache/paimon/table/source/snapshot/SnapshotReader.java
index 142d4b5fa..d53a7d2e3 100644
---
a/paimon-core/src/main/java/org/apache/paimon/table/source/snapshot/SnapshotReader.java
+++
b/paimon-core/src/main/java/org/apache/paimon/table/source/snapshot/SnapshotReader.java
@@ -60,8 +60,8 @@ public interface SnapshotReader {
/** Get splits plan from snapshot. */
Plan read();
- /** Get splits plan from an overwritten snapshot. */
- Plan readOverwrittenChanges();
+ /** Get splits plan from file changes. */
+ Plan readChanges();
Plan readIncrementalDiff(Snapshot before);
diff --git
a/paimon-core/src/main/java/org/apache/paimon/table/source/snapshot/SnapshotReaderImpl.java
b/paimon-core/src/main/java/org/apache/paimon/table/source/snapshot/SnapshotReaderImpl.java
index 391da875f..70673ca6d 100644
---
a/paimon-core/src/main/java/org/apache/paimon/table/source/snapshot/SnapshotReaderImpl.java
+++
b/paimon-core/src/main/java/org/apache/paimon/table/source/snapshot/SnapshotReaderImpl.java
@@ -231,18 +231,10 @@ public class SnapshotReaderImpl implements SnapshotReader
{
.collect(Collectors.toList());
}
- /** Get splits from an overwritten snapshot files. */
@Override
- public Plan readOverwrittenChanges() {
+ public Plan readChanges() {
withMode(ScanMode.DELTA);
FileStoreScan.Plan plan = scan.plan();
- long snapshotId = plan.snapshotId();
-
- Snapshot snapshot = snapshotManager.snapshot(snapshotId);
- if (snapshot.commitKind() != Snapshot.CommitKind.OVERWRITE) {
- throw new IllegalStateException(
- "Cannot read overwrite splits from a non-overwrite
snapshot.");
- }
Map<BinaryRow, Map<Integer, List<DataFileMeta>>> beforeFiles =
groupByPartFiles(plan.files(FileKind.DELETE));
diff --git
a/paimon-core/src/main/java/org/apache/paimon/table/system/AuditLogTable.java
b/paimon-core/src/main/java/org/apache/paimon/table/system/AuditLogTable.java
index f84ddcb59..c26dd7601 100644
---
a/paimon-core/src/main/java/org/apache/paimon/table/system/AuditLogTable.java
+++
b/paimon-core/src/main/java/org/apache/paimon/table/system/AuditLogTable.java
@@ -253,8 +253,8 @@ public class AuditLogTable implements DataTable,
ReadonlyTable {
}
@Override
- public Plan readOverwrittenChanges() {
- return snapshotReader.readOverwrittenChanges();
+ public Plan readChanges() {
+ return snapshotReader.readChanges();
}
@Override
diff --git
a/paimon-core/src/main/java/org/apache/paimon/table/system/FileMonitorTable.java
b/paimon-core/src/main/java/org/apache/paimon/table/system/FileMonitorTable.java
new file mode 100644
index 000000000..a43c5da5a
--- /dev/null
+++
b/paimon-core/src/main/java/org/apache/paimon/table/system/FileMonitorTable.java
@@ -0,0 +1,282 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.paimon.table.system;
+
+import org.apache.paimon.CoreOptions;
+import org.apache.paimon.annotation.Experimental;
+import org.apache.paimon.data.BinaryRow;
+import org.apache.paimon.data.GenericRow;
+import org.apache.paimon.data.InternalRow;
+import org.apache.paimon.disk.IOManager;
+import org.apache.paimon.fs.FileIO;
+import org.apache.paimon.fs.Path;
+import org.apache.paimon.io.DataFileMeta;
+import org.apache.paimon.io.DataFileMetaSerializer;
+import org.apache.paimon.predicate.Predicate;
+import org.apache.paimon.reader.RecordReader;
+import org.apache.paimon.table.DataTable;
+import org.apache.paimon.table.FileStoreTable;
+import org.apache.paimon.table.ReadonlyTable;
+import org.apache.paimon.table.source.DataSplit;
+import org.apache.paimon.table.source.InnerStreamTableScan;
+import org.apache.paimon.table.source.InnerTableRead;
+import org.apache.paimon.table.source.InnerTableScan;
+import org.apache.paimon.table.source.Split;
+import org.apache.paimon.table.source.TableRead;
+import org.apache.paimon.table.source.snapshot.SnapshotReader;
+import org.apache.paimon.types.BigIntType;
+import org.apache.paimon.types.DataType;
+import org.apache.paimon.types.IntType;
+import org.apache.paimon.types.RowType;
+import org.apache.paimon.utils.IteratorRecordReader;
+import org.apache.paimon.utils.SnapshotManager;
+import org.apache.paimon.utils.TagManager;
+
+import java.io.IOException;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+import static org.apache.paimon.CoreOptions.SCAN_BOUNDED_WATERMARK;
+import static org.apache.paimon.CoreOptions.STREAM_SCAN_MODE;
+import static org.apache.paimon.CoreOptions.StreamScanMode.FILE_MONITOR;
+import static org.apache.paimon.utils.SerializationUtils.deserializeBinaryRow;
+import static org.apache.paimon.utils.SerializationUtils.newBytesType;
+import static org.apache.paimon.utils.SerializationUtils.serializeBinaryRow;
+
+/** A table to produce modified files for snapshots. */
+@Experimental
+public class FileMonitorTable implements DataTable, ReadonlyTable {
+
+ private static final long serialVersionUID = 1L;
+
+ private final FileStoreTable wrapped;
+
+ private static final RowType ROW_TYPE =
+ RowType.of(
+ new DataType[] {
+ new BigIntType(false),
+ newBytesType(false),
+ new IntType(false),
+ newBytesType(false),
+ newBytesType(false)
+ },
+ new String[] {
+ "_SNAPSHOT_ID", "_PARTITION", "_BUCKET",
"_BEFORE_FILES", "_DATA_FILES"
+ });
+
+ public FileMonitorTable(FileStoreTable wrapped) {
+ Map<String, String> dynamicOptions = new HashMap<>();
+ dynamicOptions.put(STREAM_SCAN_MODE.key(), FILE_MONITOR.getValue());
+ dynamicOptions.put(SCAN_BOUNDED_WATERMARK.key(), null);
+ this.wrapped = wrapped.copy(dynamicOptions);
+ }
+
+ @Override
+ public Path location() {
+ return wrapped.location();
+ }
+
+ @Override
+ public SnapshotManager snapshotManager() {
+ return wrapped.snapshotManager();
+ }
+
+ @Override
+ public TagManager tagManager() {
+ return wrapped.tagManager();
+ }
+
+ @Override
+ public String name() {
+ return "__internal_file_monitor_" + wrapped.location().getName();
+ }
+
+ @Override
+ public RowType rowType() {
+ return ROW_TYPE;
+ }
+
+ @Override
+ public Map<String, String> options() {
+ return wrapped.options();
+ }
+
+ @Override
+ public List<String> primaryKeys() {
+ return Collections.emptyList();
+ }
+
+ @Override
+ public SnapshotReader newSnapshotReader() {
+ return wrapped.newSnapshotReader();
+ }
+
+ @Override
+ public InnerTableScan newScan() {
+ return wrapped.newScan();
+ }
+
+ @Override
+ public InnerStreamTableScan newStreamScan() {
+ return wrapped.newStreamScan();
+ }
+
+ @Override
+ public CoreOptions coreOptions() {
+ return wrapped.coreOptions();
+ }
+
+ @Override
+ public InnerTableRead newRead() {
+ return new BucketsRead();
+ }
+
+ @Override
+ public FileMonitorTable copy(Map<String, String> dynamicOptions) {
+ return new FileMonitorTable(wrapped.copy(dynamicOptions));
+ }
+
+ @Override
+ public FileIO fileIO() {
+ return wrapped.fileIO();
+ }
+
+ public static RowType getRowType() {
+ return ROW_TYPE;
+ }
+
+ private static class BucketsRead implements InnerTableRead {
+
+ @Override
+ public InnerTableRead withFilter(Predicate predicate) {
+ // filter is done by scan
+ return this;
+ }
+
+ @Override
+ public InnerTableRead withProjection(int[][] projection) {
+ throw new UnsupportedOperationException("BucketsRead does not
support projection");
+ }
+
+ @Override
+ public TableRead withIOManager(IOManager ioManager) {
+ return this;
+ }
+
+ @Override
+ public RecordReader<InternalRow> createReader(Split split) throws
IOException {
+ if (!(split instanceof DataSplit)) {
+ throw new IllegalArgumentException("Unsupported split: " +
split.getClass());
+ }
+
+ DataSplit dataSplit = (DataSplit) split;
+
+ FileChange change =
+ new FileChange(
+ dataSplit.snapshotId(),
+ dataSplit.partition(),
+ dataSplit.bucket(),
+ dataSplit.beforeFiles(),
+ dataSplit.dataFiles());
+
+ return new
IteratorRecordReader<>(Collections.singletonList(toRow(change)).iterator());
+ }
+ }
+
+ public static InternalRow toRow(FileChange change) throws IOException {
+ DataFileMetaSerializer fileSerializer = new DataFileMetaSerializer();
+ return GenericRow.of(
+ change.snapshotId(),
+ serializeBinaryRow(change.partition()),
+ change.bucket(),
+ fileSerializer.serializeList(change.beforeFiles()),
+ fileSerializer.serializeList(change.dataFiles()));
+ }
+
+ public static FileChange toFileChange(InternalRow row) throws IOException {
+ DataFileMetaSerializer fileSerializer = new DataFileMetaSerializer();
+ return new FileChange(
+ row.getLong(0),
+ deserializeBinaryRow(row.getBinary(1)),
+ row.getInt(2),
+ fileSerializer.deserializeList(row.getBinary(3)),
+ fileSerializer.deserializeList(row.getBinary(4)));
+ }
+
+ /** Pojo to record of file change. */
+ public static class FileChange {
+
+ private final long snapshotId;
+ private final BinaryRow partition;
+ private final int bucket;
+ private final List<DataFileMeta> beforeFiles;
+ private final List<DataFileMeta> dataFiles;
+
+ public FileChange(
+ long snapshotId,
+ BinaryRow partition,
+ int bucket,
+ List<DataFileMeta> beforeFiles,
+ List<DataFileMeta> dataFiles) {
+ this.snapshotId = snapshotId;
+ this.partition = partition;
+ this.bucket = bucket;
+ this.beforeFiles = beforeFiles;
+ this.dataFiles = dataFiles;
+ }
+
+ public long snapshotId() {
+ return snapshotId;
+ }
+
+ public BinaryRow partition() {
+ return partition;
+ }
+
+ public int bucket() {
+ return bucket;
+ }
+
+ public List<DataFileMeta> beforeFiles() {
+ return beforeFiles;
+ }
+
+ public List<DataFileMeta> dataFiles() {
+ return dataFiles;
+ }
+
+ @Override
+ public String toString() {
+ return "FileChange{"
+ + "snapshotId="
+ + snapshotId
+ + ", partition="
+ + partition
+ + ", bucket="
+ + bucket
+ + ", beforeFiles="
+ + beforeFiles
+ + ", dataFiles="
+ + dataFiles
+ + '}';
+ }
+ }
+}
diff --git
a/paimon-core/src/test/java/org/apache/paimon/table/ChangelogWithKeyFileStoreTableTest.java
b/paimon-core/src/test/java/org/apache/paimon/table/ChangelogWithKeyFileStoreTableTest.java
index 278305c38..ae5e6c7ef 100644
---
a/paimon-core/src/test/java/org/apache/paimon/table/ChangelogWithKeyFileStoreTableTest.java
+++
b/paimon-core/src/test/java/org/apache/paimon/table/ChangelogWithKeyFileStoreTableTest.java
@@ -54,6 +54,7 @@ import org.apache.paimon.table.source.TableRead;
import org.apache.paimon.table.source.TableScan;
import org.apache.paimon.table.source.snapshot.SnapshotReader;
import org.apache.paimon.table.system.AuditLogTable;
+import org.apache.paimon.table.system.FileMonitorTable;
import org.apache.paimon.types.DataType;
import org.apache.paimon.types.DataTypes;
import org.apache.paimon.types.RowKind;
@@ -62,6 +63,7 @@ import org.apache.paimon.utils.CompatibilityTestUtils;
import org.junit.jupiter.api.Test;
+import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
@@ -969,6 +971,84 @@ public class ChangelogWithKeyFileStoreTableTest extends
FileStoreTableTestBase {
.containsExactly("1|10|200|binary|varbinary|mapKey:mapVal|multiset");
}
+ @Test
+ public void testInnerStreamScanMode() throws Exception {
+ FileStoreTable table = createFileStoreTable();
+
+ FileMonitorTable monitorTable = new FileMonitorTable(table);
+ ReadBuilder readBuilder = monitorTable.newReadBuilder();
+ StreamTableScan scan = readBuilder.newStreamScan();
+ TableRead read = readBuilder.newRead();
+
+ // 1. first write
+
+ BatchWriteBuilder writeBuilder = table.newBatchWriteBuilder();
+ BatchTableWrite write = writeBuilder.newWrite();
+ BatchTableCommit commit = writeBuilder.newCommit();
+
+ write.write(rowData(1, 10, 100L));
+ write.write(rowData(1, 11, 101L));
+ commit.commit(write.prepareCommit());
+
+ List<InternalRow> results = new ArrayList<>();
+ read.createReader(scan.plan()).forEachRemaining(results::add);
+ read.createReader(scan.plan()).forEachRemaining(results::add);
+ assertThat(results).hasSize(1);
+ FileMonitorTable.FileChange change =
FileMonitorTable.toFileChange(results.get(0));
+ assertThat(change.beforeFiles()).hasSize(0);
+ assertThat(change.dataFiles()).hasSize(1);
+ results.clear();
+
+ // 2. second write and compact
+
+ write.close();
+ commit.close();
+ writeBuilder = table.newBatchWriteBuilder();
+ write = writeBuilder.newWrite();
+ commit = writeBuilder.newCommit();
+ write.write(rowData(1, 10, 100L));
+ write.write(rowData(1, 11, 101L));
+ write.compact(binaryRow(1), 0, true);
+ commit.commit(write.prepareCommit());
+
+ // 2.1 read add file
+
+ read.createReader(scan.plan()).forEachRemaining(results::add);
+ assertThat(results).hasSize(1);
+ change = FileMonitorTable.toFileChange(results.get(0));
+ assertThat(change.beforeFiles()).hasSize(0);
+ assertThat(change.dataFiles()).hasSize(1);
+ results.clear();
+
+ // 2.2 read compact
+
+ read.createReader(scan.plan()).forEachRemaining(results::add);
+ assertThat(results).hasSize(1);
+ change = FileMonitorTable.toFileChange(results.get(0));
+ assertThat(change.beforeFiles()).hasSize(2);
+ assertThat(change.dataFiles()).hasSize(1);
+ results.clear();
+
+ // 3 overwrite
+ write.close();
+ commit.close();
+ writeBuilder = table.newBatchWriteBuilder().withOverwrite();
+ write = writeBuilder.newWrite();
+ commit = writeBuilder.newCommit();
+ write.write(rowData(1, 10, 100L));
+ write.write(rowData(1, 11, 101L));
+ commit.commit(write.prepareCommit());
+
+ read.createReader(scan.plan()).forEachRemaining(results::add);
+ assertThat(results).hasSize(1);
+ change = FileMonitorTable.toFileChange(results.get(0));
+ assertThat(change.beforeFiles()).hasSize(1);
+ assertThat(change.dataFiles()).hasSize(1);
+
+ write.close();
+ commit.close();
+ }
+
@Override
protected FileStoreTable createFileStoreTable(Consumer<Options> configure)
throws Exception {
return createFileStoreTable(configure, ROW_TYPE);
diff --git
a/paimon-flink/paimon-flink-common/src/main/java/org/apache/paimon/flink/source/CompactorSourceBuilder.java
b/paimon-flink/paimon-flink-common/src/main/java/org/apache/paimon/flink/source/CompactorSourceBuilder.java
index 3a29d3521..fc6eda4c4 100644
---
a/paimon-flink/paimon-flink-common/src/main/java/org/apache/paimon/flink/source/CompactorSourceBuilder.java
+++
b/paimon-flink/paimon-flink-common/src/main/java/org/apache/paimon/flink/source/CompactorSourceBuilder.java
@@ -130,8 +130,8 @@ public class CompactorSourceBuilder {
return new HashMap<String, String>() {
{
put(
- CoreOptions.STREAMING_COMPACT.key(),
- CoreOptions.StreamingCompactionType.NORMAL.getValue());
+ CoreOptions.STREAM_SCAN_MODE.key(),
+
CoreOptions.StreamScanMode.COMPACT_BUCKET_TABLE.getValue());
put(CoreOptions.SCAN_BOUNDED_WATERMARK.key(), null);
}
};
diff --git
a/paimon-flink/paimon-flink-common/src/main/java/org/apache/paimon/flink/utils/MultiTablesCompactorUtil.java
b/paimon-flink/paimon-flink-common/src/main/java/org/apache/paimon/flink/utils/MultiTablesCompactorUtil.java
index d26a9fe12..2fa4b3798 100644
---
a/paimon-flink/paimon-flink-common/src/main/java/org/apache/paimon/flink/utils/MultiTablesCompactorUtil.java
+++
b/paimon-flink/paimon-flink-common/src/main/java/org/apache/paimon/flink/utils/MultiTablesCompactorUtil.java
@@ -39,8 +39,8 @@ public class MultiTablesCompactorUtil {
return new HashMap<String, String>() {
{
put(
- CoreOptions.STREAMING_COMPACT.key(),
-
CoreOptions.StreamingCompactionType.NORMAL.getValue());
+ CoreOptions.STREAM_SCAN_MODE.key(),
+
CoreOptions.StreamScanMode.COMPACT_BUCKET_TABLE.getValue());
put(CoreOptions.SCAN_BOUNDED_WATERMARK.key(), null);
put(CoreOptions.WRITE_ONLY.key(), "false");
}