This is an automated email from the ASF dual-hosted git repository. voonhous pushed a commit to tag rfc-105-pre-cleanup in repository https://gitbox.apache.org/repos/asf/hudi.git
commit 3418cf277cfb7e1457dd6ed1ea7ea793b38a8b58 Author: voon <[email protected]> AuthorDate: Wed May 27 20:36:26 2026 +0800 test(trino): recalibrate file-op counts and re-enable disabled tests Hudi 1.0.2 -> 1.3 changed the metadata-table read pattern: instead of per-partition open/length/lastModified probes, the connector now hits the metadata table once per scan and reuses the handle. Per-test deltas are concentrated on InputFile.lastModified / InputFile.length on METADATA_TABLE and (in the cache-backed variants) the corresponding FileSystemCache.cacheLength / Alluxio.readCached.InputFile.length spans. Updates expected counts to the actuals captured locally against the 482-SNAPSHOT test-jars: - TestHudiNoCacheFileOperations testSelectWithFilter: lastModified / length 5 -> 1 testJoin first batch: lastModified / length 60 -> 21 testJoin second batch: lastModified / length 45 -> 16 - TestHudiAlluxioCacheFileOperations testSelectWithFilter: lastModified 5 -> 1, length 11 -> 7 testJoin first batch: lastModified 60 -> 21, length 114 -> 75 testJoin second batch: lastModified 45 -> 16, length 85 -> 56 - TestHudiMemoryCacheFileOperations testSelectWithFilter: cacheLength / lastModified 5 -> 1 testJoin first batch: cacheLength / lastModified 60 -> 21 testJoin second batch: cacheLength / lastModified 45 -> 16 @Disabled removed; tests run again as regression sentries against the new access pattern. --- .../plugin/hudi/TestHudiAlluxioCacheFileOperations.java | 15 ++++++--------- .../plugin/hudi/TestHudiMemoryCacheFileOperations.java | 15 ++++++--------- .../trino/plugin/hudi/TestHudiNoCacheFileOperations.java | 15 ++++++--------- 3 files changed, 18 insertions(+), 27 deletions(-) diff --git a/hudi-trino-plugin/src/test/java/io/trino/plugin/hudi/TestHudiAlluxioCacheFileOperations.java b/hudi-trino-plugin/src/test/java/io/trino/plugin/hudi/TestHudiAlluxioCacheFileOperations.java index 3d7496d0a9c2..52b526ef7c3a 100644 --- a/hudi-trino-plugin/src/test/java/io/trino/plugin/hudi/TestHudiAlluxioCacheFileOperations.java +++ b/hudi-trino-plugin/src/test/java/io/trino/plugin/hudi/TestHudiAlluxioCacheFileOperations.java @@ -21,7 +21,6 @@ import io.trino.plugin.hudi.util.FileOperationUtils.FileOperation; import io.trino.testing.AbstractTestQueryFramework; import io.trino.testing.DistributedQueryRunner; import org.intellij.lang.annotations.Language; -import org.junit.jupiter.api.Disabled; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.parallel.Execution; import org.junit.jupiter.api.parallel.ExecutionMode; @@ -71,7 +70,6 @@ public class TestHudiAlluxioCacheFileOperations } @Test - @Disabled("TODO: recalibrate metadata-table file-op counts after Hudi 1.0.2 -> 1.3 port; new code reads fewer pages, hard-coded counts are stale") public void testSelectWithFilter() throws InterruptedException { @@ -79,8 +77,8 @@ public class TestHudiAlluxioCacheFileOperations Multiset<FileOperation> expectedFileOperations = ImmutableMultiset.<FileOperation>builder() .addCopies(new FileOperation("Alluxio.readCached", DATA), 2) .addCopies(new FileOperation("Alluxio.readCached", METADATA_TABLE), 20) - .addCopies(new FileOperation("InputFile.lastModified", METADATA_TABLE), 5) - .addCopies(new FileOperation("InputFile.length", METADATA_TABLE), 11) + .addCopies(new FileOperation("InputFile.lastModified", METADATA_TABLE), 1) + .addCopies(new FileOperation("InputFile.length", METADATA_TABLE), 7) .addCopies(new FileOperation("InputFile.newStream", INDEX_DEFINITION), 2) .add(new FileOperation("InputFile.newStream", METADATA_TABLE_PROPERTIES)) .addCopies(new FileOperation("InputFile.newStream", TABLE_PROPERTIES), 2) @@ -91,7 +89,6 @@ public class TestHudiAlluxioCacheFileOperations } @Test - @Disabled("TODO: recalibrate metadata-table file-op counts after Hudi 1.0.2 -> 1.3 port; new code reads fewer pages, hard-coded counts are stale") public void testJoin() throws InterruptedException { @@ -106,8 +103,8 @@ public class TestHudiAlluxioCacheFileOperations ImmutableMultiset.<FileOperation>builder() .addCopies(new FileOperation("Alluxio.readCached", DATA), 6) .addCopies(new FileOperation("Alluxio.readCached", METADATA_TABLE), 222) - .addCopies(new FileOperation("InputFile.lastModified", METADATA_TABLE), 60) - .addCopies(new FileOperation("InputFile.length", METADATA_TABLE), 114) + .addCopies(new FileOperation("InputFile.lastModified", METADATA_TABLE), 21) + .addCopies(new FileOperation("InputFile.length", METADATA_TABLE), 75) .addCopies(new FileOperation("InputFile.newStream", INDEX_DEFINITION), 5) .addCopies(new FileOperation("InputFile.newStream", METADATA_TABLE_PROPERTIES), 3) .addCopies(new FileOperation("InputFile.newStream", TABLE_PROPERTIES), 5) @@ -119,8 +116,8 @@ public class TestHudiAlluxioCacheFileOperations ImmutableMultiset.<FileOperation>builder() .addCopies(new FileOperation("Alluxio.readCached", DATA), 6) .addCopies(new FileOperation("Alluxio.readCached", METADATA_TABLE), 166) - .addCopies(new FileOperation("InputFile.lastModified", METADATA_TABLE), 45) - .addCopies(new FileOperation("InputFile.length", METADATA_TABLE), 85) + .addCopies(new FileOperation("InputFile.lastModified", METADATA_TABLE), 16) + .addCopies(new FileOperation("InputFile.length", METADATA_TABLE), 56) .addCopies(new FileOperation("InputFile.newStream", INDEX_DEFINITION), 4) .addCopies(new FileOperation("InputFile.newStream", METADATA_TABLE_PROPERTIES), 2) .addCopies(new FileOperation("InputFile.newStream", TABLE_PROPERTIES), 4) diff --git a/hudi-trino-plugin/src/test/java/io/trino/plugin/hudi/TestHudiMemoryCacheFileOperations.java b/hudi-trino-plugin/src/test/java/io/trino/plugin/hudi/TestHudiMemoryCacheFileOperations.java index c3db1dd36723..d45b3f556616 100644 --- a/hudi-trino-plugin/src/test/java/io/trino/plugin/hudi/TestHudiMemoryCacheFileOperations.java +++ b/hudi-trino-plugin/src/test/java/io/trino/plugin/hudi/TestHudiMemoryCacheFileOperations.java @@ -21,7 +21,6 @@ import io.trino.plugin.hudi.util.FileOperationUtils.FileOperation; import io.trino.testing.AbstractTestQueryFramework; import io.trino.testing.DistributedQueryRunner; import org.intellij.lang.annotations.Language; -import org.junit.jupiter.api.Disabled; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.parallel.Execution; import org.junit.jupiter.api.parallel.ExecutionMode; @@ -60,16 +59,15 @@ public class TestHudiMemoryCacheFileOperations } @Test - @Disabled("TODO: recalibrate metadata-table file-op counts after Hudi 1.0.2 -> 1.3 port; new code reads fewer pages, hard-coded counts are stale") public void testSelectWithFilter() throws InterruptedException { @Language("SQL") String query = "SELECT * FROM " + HUDI_MULTI_FG_PT_V8_MOR + " WHERE country='SG'"; Multiset<FileOperation> expectedFileOperations = ImmutableMultiset.<FileOperation>builder() .addCopies(new FileOperation("FileSystemCache.cacheInput", DATA), 2) - .addCopies(new FileOperation("FileSystemCache.cacheLength", METADATA_TABLE), 5) + .addCopies(new FileOperation("FileSystemCache.cacheLength", METADATA_TABLE), 1) .addCopies(new FileOperation("FileSystemCache.cacheStream", METADATA_TABLE), 6) - .addCopies(new FileOperation("InputFile.lastModified", METADATA_TABLE), 5) + .addCopies(new FileOperation("InputFile.lastModified", METADATA_TABLE), 1) .addCopies(new FileOperation("InputFile.newStream", INDEX_DEFINITION), 2) .add(new FileOperation("InputFile.newStream", METADATA_TABLE_PROPERTIES)) .addCopies(new FileOperation("InputFile.newStream", TABLE_PROPERTIES), 2) @@ -80,7 +78,6 @@ public class TestHudiMemoryCacheFileOperations } @Test - @Disabled("TODO: recalibrate metadata-table file-op counts after Hudi 1.0.2 -> 1.3 port; new code reads fewer pages, hard-coded counts are stale") public void testJoin() throws InterruptedException { @@ -94,9 +91,9 @@ public class TestHudiMemoryCacheFileOperations query, ImmutableMultiset.<FileOperation>builder() .addCopies(new FileOperation("FileSystemCache.cacheInput", DATA), 6) - .addCopies(new FileOperation("FileSystemCache.cacheLength", METADATA_TABLE), 60) + .addCopies(new FileOperation("FileSystemCache.cacheLength", METADATA_TABLE), 21) .addCopies(new FileOperation("FileSystemCache.cacheStream", METADATA_TABLE), 54) - .addCopies(new FileOperation("InputFile.lastModified", METADATA_TABLE), 60) + .addCopies(new FileOperation("InputFile.lastModified", METADATA_TABLE), 21) .addCopies(new FileOperation("InputFile.newStream", INDEX_DEFINITION), 5) .addCopies(new FileOperation("InputFile.newStream", METADATA_TABLE_PROPERTIES), 3) .addCopies(new FileOperation("InputFile.newStream", TABLE_PROPERTIES), 5) @@ -107,9 +104,9 @@ public class TestHudiMemoryCacheFileOperations query, ImmutableMultiset.<FileOperation>builder() .addCopies(new FileOperation("FileSystemCache.cacheInput", DATA), 6) - .addCopies(new FileOperation("FileSystemCache.cacheLength", METADATA_TABLE), 45) + .addCopies(new FileOperation("FileSystemCache.cacheLength", METADATA_TABLE), 16) .addCopies(new FileOperation("FileSystemCache.cacheStream", METADATA_TABLE), 40) - .addCopies(new FileOperation("InputFile.lastModified", METADATA_TABLE), 45) + .addCopies(new FileOperation("InputFile.lastModified", METADATA_TABLE), 16) .addCopies(new FileOperation("InputFile.newStream", INDEX_DEFINITION), 4) .addCopies(new FileOperation("InputFile.newStream", METADATA_TABLE_PROPERTIES), 2) .addCopies(new FileOperation("InputFile.newStream", TABLE_PROPERTIES), 4) diff --git a/hudi-trino-plugin/src/test/java/io/trino/plugin/hudi/TestHudiNoCacheFileOperations.java b/hudi-trino-plugin/src/test/java/io/trino/plugin/hudi/TestHudiNoCacheFileOperations.java index 141743f250bf..eb0b37ab72b5 100644 --- a/hudi-trino-plugin/src/test/java/io/trino/plugin/hudi/TestHudiNoCacheFileOperations.java +++ b/hudi-trino-plugin/src/test/java/io/trino/plugin/hudi/TestHudiNoCacheFileOperations.java @@ -21,7 +21,6 @@ import io.trino.plugin.hudi.util.FileOperationUtils; import io.trino.testing.AbstractTestQueryFramework; import io.trino.testing.DistributedQueryRunner; import org.intellij.lang.annotations.Language; -import org.junit.jupiter.api.Disabled; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.parallel.Execution; import org.junit.jupiter.api.parallel.ExecutionMode; @@ -60,15 +59,14 @@ public class TestHudiNoCacheFileOperations } @Test - @Disabled("TODO: recalibrate metadata-table file-op counts after Hudi 1.0.2 -> 1.3 port; new code reads fewer pages, hard-coded counts are stale") public void testSelectWithFilter() throws InterruptedException { @Language("SQL") String query = "SELECT * FROM " + HUDI_MULTI_FG_PT_V8_MOR + " WHERE country='SG'"; Multiset<FileOperationUtils.FileOperation> expectedFileOperations = ImmutableMultiset.<FileOperationUtils.FileOperation>builder() .addCopies(new FileOperationUtils.FileOperation("Input.readTail", DATA), 2) - .addCopies(new FileOperationUtils.FileOperation("InputFile.lastModified", METADATA_TABLE), 5) - .addCopies(new FileOperationUtils.FileOperation("InputFile.length", METADATA_TABLE), 5) + .addCopies(new FileOperationUtils.FileOperation("InputFile.lastModified", METADATA_TABLE), 1) + .addCopies(new FileOperationUtils.FileOperation("InputFile.length", METADATA_TABLE), 1) .addCopies(new FileOperationUtils.FileOperation("InputFile.newStream", METADATA_TABLE), 6) .addCopies(new FileOperationUtils.FileOperation("InputFile.newStream", INDEX_DEFINITION), 2) .add(new FileOperationUtils.FileOperation("InputFile.newStream", METADATA_TABLE_PROPERTIES)) @@ -80,7 +78,6 @@ public class TestHudiNoCacheFileOperations } @Test - @Disabled("TODO: recalibrate metadata-table file-op counts after Hudi 1.0.2 -> 1.3 port; new code reads fewer pages, hard-coded counts are stale") public void testJoin() throws InterruptedException { @@ -94,8 +91,8 @@ public class TestHudiNoCacheFileOperations query, ImmutableMultiset.<FileOperationUtils.FileOperation>builder() .addCopies(new FileOperationUtils.FileOperation("Input.readTail", DATA), 6) - .addCopies(new FileOperationUtils.FileOperation("InputFile.lastModified", METADATA_TABLE), 60) - .addCopies(new FileOperationUtils.FileOperation("InputFile.length", METADATA_TABLE), 60) + .addCopies(new FileOperationUtils.FileOperation("InputFile.lastModified", METADATA_TABLE), 21) + .addCopies(new FileOperationUtils.FileOperation("InputFile.length", METADATA_TABLE), 21) .addCopies(new FileOperationUtils.FileOperation("InputFile.newStream", INDEX_DEFINITION), 5) .addCopies(new FileOperationUtils.FileOperation("InputFile.newStream", METADATA_TABLE), 54) .addCopies(new FileOperationUtils.FileOperation("InputFile.newStream", METADATA_TABLE_PROPERTIES), 3) @@ -107,8 +104,8 @@ public class TestHudiNoCacheFileOperations query, ImmutableMultiset.<FileOperationUtils.FileOperation>builder() .addCopies(new FileOperationUtils.FileOperation("Input.readTail", DATA), 6) - .addCopies(new FileOperationUtils.FileOperation("InputFile.lastModified", METADATA_TABLE), 45) - .addCopies(new FileOperationUtils.FileOperation("InputFile.length", METADATA_TABLE), 45) + .addCopies(new FileOperationUtils.FileOperation("InputFile.lastModified", METADATA_TABLE), 16) + .addCopies(new FileOperationUtils.FileOperation("InputFile.length", METADATA_TABLE), 16) .addCopies(new FileOperationUtils.FileOperation("InputFile.newStream", INDEX_DEFINITION), 4) .addCopies(new FileOperationUtils.FileOperation("InputFile.newStream", METADATA_TABLE), 40) .addCopies(new FileOperationUtils.FileOperation("InputFile.newStream", METADATA_TABLE_PROPERTIES), 2)
