This is an automated email from the ASF dual-hosted git repository. yihua pushed a commit to branch release-1.2.0 in repository https://gitbox.apache.org/repos/asf/hudi.git
commit 0be0fbae87854cb48b1e09e5bdca75b0328c078b Author: Xinli Shang <[email protected]> AuthorDate: Mon May 18 13:40:56 2026 -0700 test(trino): de-flake TestHudi*FileOperations by polling for span stability (#18766) Co-authored-by: Xinli Shang <[email protected]> --- .../hudi/TestHudiAlluxioCacheFileOperations.java | 30 +++++++++++++++++++--- .../hudi/TestHudiMemoryCacheFileOperations.java | 30 +++++++++++++++++++--- .../plugin/hudi/TestHudiNoCacheFileOperations.java | 30 +++++++++++++++++++--- 3 files changed, 81 insertions(+), 9 deletions(-) diff --git a/hudi-trino-plugin/src/test/java/io/trino/plugin/hudi/TestHudiAlluxioCacheFileOperations.java b/hudi-trino-plugin/src/test/java/io/trino/plugin/hudi/TestHudiAlluxioCacheFileOperations.java index ea35183ec4ed..4395a9c1ddff 100644 --- a/hudi-trino-plugin/src/test/java/io/trino/plugin/hudi/TestHudiAlluxioCacheFileOperations.java +++ b/hudi-trino-plugin/src/test/java/io/trino/plugin/hudi/TestHudiAlluxioCacheFileOperations.java @@ -158,9 +158,33 @@ public class TestHudiAlluxioCacheFileOperations { DistributedQueryRunner queryRunner = getDistributedQueryRunner(); queryRunner.executeWithPlan(queryRunner.getDefaultSession(), query); - // Allow time for table stats computation to finish before validation. - Thread.sleep(1000L); - assertMultisetsEqual(getFileOperations(queryRunner), expectedCacheAccesses); + // Async table-stats computation can outlive the synchronous query and emit spans into + // the exporter after execute returns. A fixed Thread.sleep races with this — when + // stats from query N is still running while query N+1's measurement happens, spans + // leak across the boundary and counts get scrambled (the symmetric off-by-N failure + // across paired tests). Poll until the span set is stable for two consecutive reads. + Multiset<FileOperation> actual = waitForStableSpans(queryRunner); + assertMultisetsEqual(actual, expectedCacheAccesses); + } + + /** + * Returns the file-operation span set once two consecutive reads (200ms apart) agree. + * Bounded by a 30-second ceiling so a runaway test fails loudly instead of hanging. + */ + private static Multiset<FileOperation> waitForStableSpans(QueryRunner queryRunner) + throws InterruptedException + { + long deadlineMillis = System.currentTimeMillis() + 30_000L; + Multiset<FileOperation> previous = null; + while (System.currentTimeMillis() < deadlineMillis) { + Thread.sleep(200L); + Multiset<FileOperation> current = getFileOperations(queryRunner); + if (previous != null && current.equals(previous)) { + return current; + } + previous = current; + } + return previous != null ? previous : getFileOperations(queryRunner); } public static Multiset<FileOperation> getFileOperations(QueryRunner queryRunner) diff --git a/hudi-trino-plugin/src/test/java/io/trino/plugin/hudi/TestHudiMemoryCacheFileOperations.java b/hudi-trino-plugin/src/test/java/io/trino/plugin/hudi/TestHudiMemoryCacheFileOperations.java index 3f2cb7d77173..ed362391b017 100644 --- a/hudi-trino-plugin/src/test/java/io/trino/plugin/hudi/TestHudiMemoryCacheFileOperations.java +++ b/hudi-trino-plugin/src/test/java/io/trino/plugin/hudi/TestHudiMemoryCacheFileOperations.java @@ -140,9 +140,33 @@ public class TestHudiMemoryCacheFileOperations { DistributedQueryRunner queryRunner = getDistributedQueryRunner(); queryRunner.executeWithPlan(queryRunner.getDefaultSession(), query); - // Allow time for table stats computation to finish before validation. - Thread.sleep(1000L); - assertMultisetsEqual(getFileOperations(queryRunner), expectedCacheAccesses); + // Async table-stats computation can outlive the synchronous query and emit spans into + // the exporter after execute returns. A fixed Thread.sleep races with this — when + // stats from query N is still running while query N+1's measurement happens, spans + // leak across the boundary and counts get scrambled (the symmetric off-by-N failure + // across paired tests). Poll until the span set is stable for two consecutive reads. + Multiset<FileOperation> actual = waitForStableSpans(queryRunner); + assertMultisetsEqual(actual, expectedCacheAccesses); + } + + /** + * Returns the file-operation span set once two consecutive reads (200ms apart) agree. + * Bounded by a 30-second ceiling so a runaway test fails loudly instead of hanging. + */ + private static Multiset<FileOperation> waitForStableSpans(QueryRunner queryRunner) + throws InterruptedException + { + long deadlineMillis = System.currentTimeMillis() + 30_000L; + Multiset<FileOperation> previous = null; + while (System.currentTimeMillis() < deadlineMillis) { + Thread.sleep(200L); + Multiset<FileOperation> current = getFileOperations(queryRunner); + if (previous != null && current.equals(previous)) { + return current; + } + previous = current; + } + return previous != null ? previous : getFileOperations(queryRunner); } private static Multiset<FileOperation> getFileOperations(QueryRunner queryRunner) diff --git a/hudi-trino-plugin/src/test/java/io/trino/plugin/hudi/TestHudiNoCacheFileOperations.java b/hudi-trino-plugin/src/test/java/io/trino/plugin/hudi/TestHudiNoCacheFileOperations.java index 58541011c238..9d6a6a8a5200 100644 --- a/hudi-trino-plugin/src/test/java/io/trino/plugin/hudi/TestHudiNoCacheFileOperations.java +++ b/hudi-trino-plugin/src/test/java/io/trino/plugin/hudi/TestHudiNoCacheFileOperations.java @@ -140,9 +140,33 @@ public class TestHudiNoCacheFileOperations { DistributedQueryRunner queryRunner = getDistributedQueryRunner(); queryRunner.executeWithPlan(queryRunner.getDefaultSession(), query); - // Allow time for table stats computation to finish before validation. - Thread.sleep(1000L); - assertMultisetsEqual(getFileOperations(queryRunner), expectedCacheAccesses); + // Async table-stats computation can outlive the synchronous query and emit spans into + // the exporter after execute returns. A fixed Thread.sleep races with this — when + // stats from query N is still running while query N+1's measurement happens, spans + // leak across the boundary and counts get scrambled (the symmetric off-by-N failure + // across paired tests). Poll until the span set is stable for two consecutive reads. + Multiset<FileOperationUtils.FileOperation> actual = waitForStableSpans(queryRunner); + assertMultisetsEqual(actual, expectedCacheAccesses); + } + + /** + * Returns the file-operation span set once two consecutive reads (200ms apart) agree. + * Bounded by a 30-second ceiling so a runaway test fails loudly instead of hanging. + */ + private static Multiset<FileOperationUtils.FileOperation> waitForStableSpans(QueryRunner queryRunner) + throws InterruptedException + { + long deadlineMillis = System.currentTimeMillis() + 30_000L; + Multiset<FileOperationUtils.FileOperation> previous = null; + while (System.currentTimeMillis() < deadlineMillis) { + Thread.sleep(200L); + Multiset<FileOperationUtils.FileOperation> current = getFileOperations(queryRunner); + if (previous != null && current.equals(previous)) { + return current; + } + previous = current; + } + return previous != null ? previous : getFileOperations(queryRunner); } private static Multiset<FileOperationUtils.FileOperation> getFileOperations(QueryRunner queryRunner)
