This is an automated email from the ASF dual-hosted git repository. hope pushed a commit to branch release-1.4 in repository https://gitbox.apache.org/repos/asf/paimon.git
commit ca2534fad14c788eec576106b94586c2fd26e949 Author: Jingsong Lee <[email protected]> AuthorDate: Thu Mar 26 17:41:09 2026 +0800 [core] File paths in SimpleLsmKvDb should contain UUID (#7536) During construction, the ClusteringCompactManager obtains a temporary directory via ioManager.pickTempDir() and creates a SimpleLsmKvDb within that directory. When bucket > 1, multiple ClusteringCompactManager instances across different buckets may end up using the same temporary directory. The original SimpleLsmKvDb.newSstFile() method used sequentially numbered names like sst-000000.db, causing SST files from different instances to overwrite each other. This leads to corrupted key indexes and failed deduplication of primary keys (e.g., duplicate values 30 and 33 for key=3). --- .../paimon/lookup/sort/db/SimpleLsmKvDb.java | 5 ++- .../paimon/lookup/sort/db/SimpleLsmKvDbTest.java | 51 ++++++++++++++++++++++ .../paimon/crosspartition/GlobalIndexAssigner.java | 2 +- .../java/org/apache/paimon/disk/IOManager.java | 2 +- .../java/org/apache/paimon/disk/IOManagerImpl.java | 2 +- .../clustering/ClusteringCompactManager.java | 2 +- .../paimon/separated/ClusteringTableTest.java | 2 +- 7 files changed, 60 insertions(+), 6 deletions(-) diff --git a/paimon-common/src/main/java/org/apache/paimon/lookup/sort/db/SimpleLsmKvDb.java b/paimon-common/src/main/java/org/apache/paimon/lookup/sort/db/SimpleLsmKvDb.java index 8871fcab48..c5b8c53fd9 100644 --- a/paimon-common/src/main/java/org/apache/paimon/lookup/sort/db/SimpleLsmKvDb.java +++ b/paimon-common/src/main/java/org/apache/paimon/lookup/sort/db/SimpleLsmKvDb.java @@ -42,6 +42,7 @@ import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.TreeMap; +import java.util.UUID; /** * A simple LSM-Tree based KV database built on top of {@link SortLookupStoreFactory}. @@ -92,6 +93,7 @@ public class SimpleLsmKvDb implements Closeable { static final long PER_ENTRY_OVERHEAD = 160; private final File dataDirectory; + private final String uuid; private final SortLookupStoreFactory storeFactory; private final Comparator<MemorySlice> keyComparator; private final long memTableFlushThreshold; @@ -126,6 +128,7 @@ public class SimpleLsmKvDb implements Closeable { int level0FileNumCompactTrigger, int sizeRatio) { this.dataDirectory = dataDirectory; + this.uuid = UUID.randomUUID().toString(); this.storeFactory = storeFactory; this.keyComparator = keyComparator; this.memTableFlushThreshold = memTableFlushThreshold; @@ -540,7 +543,7 @@ public class SimpleLsmKvDb implements Closeable { private File newSstFile() { long sequence = fileSequence++; - return new File(dataDirectory, String.format("sst-%06d.db", sequence)); + return new File(dataDirectory, String.format("sst-%s-%06d.db", uuid, sequence)); } private void ensureOpen() { diff --git a/paimon-common/src/test/java/org/apache/paimon/lookup/sort/db/SimpleLsmKvDbTest.java b/paimon-common/src/test/java/org/apache/paimon/lookup/sort/db/SimpleLsmKvDbTest.java index 44d7d46ff5..92a5488591 100644 --- a/paimon-common/src/test/java/org/apache/paimon/lookup/sort/db/SimpleLsmKvDbTest.java +++ b/paimon-common/src/test/java/org/apache/paimon/lookup/sort/db/SimpleLsmKvDbTest.java @@ -1430,6 +1430,57 @@ public class SimpleLsmKvDbTest { } } + @Test + public void testTwoInstancesSameDirectoryNoFileCollision() throws IOException { + // Two SimpleLsmKvDb instances sharing the same dataDirectory should not interfere + // with each other because SST file names contain a unique UUID. + File sharedDir = new File(tempDir.toFile(), "shared-dir-db"); + + try (SimpleLsmKvDb db1 = + SimpleLsmKvDb.builder(sharedDir) + .memTableFlushThreshold(1024) + .blockSize(256) + .level0FileNumCompactTrigger(4) + .compressOptions(new CompressOptions("none", 1)) + .build(); + SimpleLsmKvDb db2 = + SimpleLsmKvDb.builder(sharedDir) + .memTableFlushThreshold(1024) + .blockSize(256) + .level0FileNumCompactTrigger(4) + .compressOptions(new CompressOptions("none", 1)) + .build()) { + + // Write different data to each instance + putString(db1, "key-a", "from-db1"); + putString(db1, "key-b", "from-db1"); + db1.flush(); + + putString(db2, "key-a", "from-db2"); + putString(db2, "key-c", "from-db2"); + db2.flush(); + + // Each instance should see only its own data + Assertions.assertEquals("from-db1", getString(db1, "key-a")); + Assertions.assertEquals("from-db1", getString(db1, "key-b")); + Assertions.assertNull(getString(db1, "key-c")); + + Assertions.assertEquals("from-db2", getString(db2, "key-a")); + Assertions.assertNull(getString(db2, "key-b")); + Assertions.assertEquals("from-db2", getString(db2, "key-c")); + + // Write more data and flush again to ensure no cross-contamination + putString(db1, "key-a", "updated-db1"); + db1.flush(); + + putString(db2, "key-a", "updated-db2"); + db2.flush(); + + Assertions.assertEquals("updated-db1", getString(db1, "key-a")); + Assertions.assertEquals("updated-db2", getString(db2, "key-a")); + } + } + @Test public void testBulkLoadFailsOnNonEmptyDb() throws IOException { try (SimpleLsmKvDb db = createDb()) { diff --git a/paimon-core/src/main/java/org/apache/paimon/crosspartition/GlobalIndexAssigner.java b/paimon-core/src/main/java/org/apache/paimon/crosspartition/GlobalIndexAssigner.java index c387f9abdf..9097c5c482 100644 --- a/paimon-core/src/main/java/org/apache/paimon/crosspartition/GlobalIndexAssigner.java +++ b/paimon-core/src/main/java/org/apache/paimon/crosspartition/GlobalIndexAssigner.java @@ -132,7 +132,7 @@ public class GlobalIndexAssigner implements Serializable, Closeable { this.extractor = new RowPartitionAllPrimaryKeyExtractor(table.schema()); this.keyPartExtractor = new KeyPartPartitionKeyExtractor(table.schema()); - String tmpDir = ioManager.pickRandomTempDir(); + String tmpDir = ioManager.pickTempDir(); this.path = new File(tmpDir, "rocksdb-" + UUID.randomUUID()); if (!this.path.mkdirs()) { throw new RuntimeException( diff --git a/paimon-core/src/main/java/org/apache/paimon/disk/IOManager.java b/paimon-core/src/main/java/org/apache/paimon/disk/IOManager.java index 4a35167e1e..352f8c4a73 100644 --- a/paimon-core/src/main/java/org/apache/paimon/disk/IOManager.java +++ b/paimon-core/src/main/java/org/apache/paimon/disk/IOManager.java @@ -38,7 +38,7 @@ public interface IOManager extends AutoCloseable { String[] tempDirs(); - String pickRandomTempDir(); + String pickTempDir(); Enumerator createChannelEnumerator(); diff --git a/paimon-core/src/main/java/org/apache/paimon/disk/IOManagerImpl.java b/paimon-core/src/main/java/org/apache/paimon/disk/IOManagerImpl.java index 57926d0154..c0028c6bc6 100644 --- a/paimon-core/src/main/java/org/apache/paimon/disk/IOManagerImpl.java +++ b/paimon-core/src/main/java/org/apache/paimon/disk/IOManagerImpl.java @@ -95,7 +95,7 @@ public class IOManagerImpl implements IOManager { } @Override - public String pickRandomTempDir() { + public String pickTempDir() { return pickRandomly(Arrays.asList(tempDirs())); } diff --git a/paimon-core/src/main/java/org/apache/paimon/mergetree/compact/clustering/ClusteringCompactManager.java b/paimon-core/src/main/java/org/apache/paimon/mergetree/compact/clustering/ClusteringCompactManager.java index 9e925d0764..12acea3be6 100644 --- a/paimon-core/src/main/java/org/apache/paimon/mergetree/compact/clustering/ClusteringCompactManager.java +++ b/paimon-core/src/main/java/org/apache/paimon/mergetree/compact/clustering/ClusteringCompactManager.java @@ -119,7 +119,7 @@ public class ClusteringCompactManager extends CompactFutureManager { valueType.getFieldTypes(), clusteringColumnIndexes, true); SimpleLsmKvDb kvDb = - SimpleLsmKvDb.builder(new File(ioManager.pickRandomTempDir())) + SimpleLsmKvDb.builder(new File(ioManager.pickTempDir())) .cacheManager(cacheManager) .keyComparator(new RowCompactedSerializer(keyType).createSliceComparator()) .build(); diff --git a/paimon-core/src/test/java/org/apache/paimon/separated/ClusteringTableTest.java b/paimon-core/src/test/java/org/apache/paimon/separated/ClusteringTableTest.java index aaf661be2f..353dd6d704 100644 --- a/paimon-core/src/test/java/org/apache/paimon/separated/ClusteringTableTest.java +++ b/paimon-core/src/test/java/org/apache/paimon/separated/ClusteringTableTest.java @@ -74,7 +74,7 @@ class ClusteringTableTest { .column("b", DataTypes.INT()) .primaryKey("a") .option(DELETION_VECTORS_ENABLED.key(), "true") - .option(BUCKET.key(), "1") + .option(BUCKET.key(), "2") .option(CLUSTERING_COLUMNS.key(), "b") .option(PK_CLUSTERING_OVERRIDE.key(), "true") .build();
