This is an automated email from the ASF dual-hosted git repository.
yihua pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hudi.git
The following commit(s) were added to refs/heads/master by this push:
new 9026c7defd5d test(trino): de-flake TestHudi*FileOperations by polling
for span stability (#18766)
9026c7defd5d is described below
commit 9026c7defd5d012d825996ac6f1eebef23079467
Author: Xinli Shang <[email protected]>
AuthorDate: Mon May 18 13:40:56 2026 -0700
test(trino): de-flake TestHudi*FileOperations by polling for span stability
(#18766)
Co-authored-by: Xinli Shang <[email protected]>
---
.../hudi/TestHudiAlluxioCacheFileOperations.java | 30 +++++++++++++++++++---
.../hudi/TestHudiMemoryCacheFileOperations.java | 30 +++++++++++++++++++---
.../plugin/hudi/TestHudiNoCacheFileOperations.java | 30 +++++++++++++++++++---
3 files changed, 81 insertions(+), 9 deletions(-)
diff --git
a/hudi-trino-plugin/src/test/java/io/trino/plugin/hudi/TestHudiAlluxioCacheFileOperations.java
b/hudi-trino-plugin/src/test/java/io/trino/plugin/hudi/TestHudiAlluxioCacheFileOperations.java
index ea35183ec4ed..4395a9c1ddff 100644
---
a/hudi-trino-plugin/src/test/java/io/trino/plugin/hudi/TestHudiAlluxioCacheFileOperations.java
+++
b/hudi-trino-plugin/src/test/java/io/trino/plugin/hudi/TestHudiAlluxioCacheFileOperations.java
@@ -158,9 +158,33 @@ public class TestHudiAlluxioCacheFileOperations
{
DistributedQueryRunner queryRunner = getDistributedQueryRunner();
queryRunner.executeWithPlan(queryRunner.getDefaultSession(), query);
- // Allow time for table stats computation to finish before validation.
- Thread.sleep(1000L);
- assertMultisetsEqual(getFileOperations(queryRunner),
expectedCacheAccesses);
+ // Async table-stats computation can outlive the synchronous query and
emit spans into
+ // the exporter after execute returns. A fixed Thread.sleep races with
this — when
+ // stats from query N is still running while query N+1's measurement
happens, spans
+ // leak across the boundary and counts get scrambled (the symmetric
off-by-N failure
+ // across paired tests). Poll until the span set is stable for two
consecutive reads.
+ Multiset<FileOperation> actual = waitForStableSpans(queryRunner);
+ assertMultisetsEqual(actual, expectedCacheAccesses);
+ }
+
+ /**
+ * Returns the file-operation span set once two consecutive reads (200ms
apart) agree.
+ * Bounded by a 30-second ceiling so a runaway test fails loudly instead
of hanging.
+ */
+ private static Multiset<FileOperation> waitForStableSpans(QueryRunner
queryRunner)
+ throws InterruptedException
+ {
+ long deadlineMillis = System.currentTimeMillis() + 30_000L;
+ Multiset<FileOperation> previous = null;
+ while (System.currentTimeMillis() < deadlineMillis) {
+ Thread.sleep(200L);
+ Multiset<FileOperation> current = getFileOperations(queryRunner);
+ if (previous != null && current.equals(previous)) {
+ return current;
+ }
+ previous = current;
+ }
+ return previous != null ? previous : getFileOperations(queryRunner);
}
public static Multiset<FileOperation> getFileOperations(QueryRunner
queryRunner)
diff --git
a/hudi-trino-plugin/src/test/java/io/trino/plugin/hudi/TestHudiMemoryCacheFileOperations.java
b/hudi-trino-plugin/src/test/java/io/trino/plugin/hudi/TestHudiMemoryCacheFileOperations.java
index 3f2cb7d77173..ed362391b017 100644
---
a/hudi-trino-plugin/src/test/java/io/trino/plugin/hudi/TestHudiMemoryCacheFileOperations.java
+++
b/hudi-trino-plugin/src/test/java/io/trino/plugin/hudi/TestHudiMemoryCacheFileOperations.java
@@ -140,9 +140,33 @@ public class TestHudiMemoryCacheFileOperations
{
DistributedQueryRunner queryRunner = getDistributedQueryRunner();
queryRunner.executeWithPlan(queryRunner.getDefaultSession(), query);
- // Allow time for table stats computation to finish before validation.
- Thread.sleep(1000L);
- assertMultisetsEqual(getFileOperations(queryRunner),
expectedCacheAccesses);
+ // Async table-stats computation can outlive the synchronous query and
emit spans into
+ // the exporter after execute returns. A fixed Thread.sleep races with
this — when
+ // stats from query N is still running while query N+1's measurement
happens, spans
+ // leak across the boundary and counts get scrambled (the symmetric
off-by-N failure
+ // across paired tests). Poll until the span set is stable for two
consecutive reads.
+ Multiset<FileOperation> actual = waitForStableSpans(queryRunner);
+ assertMultisetsEqual(actual, expectedCacheAccesses);
+ }
+
+ /**
+ * Returns the file-operation span set once two consecutive reads (200ms
apart) agree.
+ * Bounded by a 30-second ceiling so a runaway test fails loudly instead
of hanging.
+ */
+ private static Multiset<FileOperation> waitForStableSpans(QueryRunner
queryRunner)
+ throws InterruptedException
+ {
+ long deadlineMillis = System.currentTimeMillis() + 30_000L;
+ Multiset<FileOperation> previous = null;
+ while (System.currentTimeMillis() < deadlineMillis) {
+ Thread.sleep(200L);
+ Multiset<FileOperation> current = getFileOperations(queryRunner);
+ if (previous != null && current.equals(previous)) {
+ return current;
+ }
+ previous = current;
+ }
+ return previous != null ? previous : getFileOperations(queryRunner);
}
private static Multiset<FileOperation> getFileOperations(QueryRunner
queryRunner)
diff --git
a/hudi-trino-plugin/src/test/java/io/trino/plugin/hudi/TestHudiNoCacheFileOperations.java
b/hudi-trino-plugin/src/test/java/io/trino/plugin/hudi/TestHudiNoCacheFileOperations.java
index 58541011c238..9d6a6a8a5200 100644
---
a/hudi-trino-plugin/src/test/java/io/trino/plugin/hudi/TestHudiNoCacheFileOperations.java
+++
b/hudi-trino-plugin/src/test/java/io/trino/plugin/hudi/TestHudiNoCacheFileOperations.java
@@ -140,9 +140,33 @@ public class TestHudiNoCacheFileOperations
{
DistributedQueryRunner queryRunner = getDistributedQueryRunner();
queryRunner.executeWithPlan(queryRunner.getDefaultSession(), query);
- // Allow time for table stats computation to finish before validation.
- Thread.sleep(1000L);
- assertMultisetsEqual(getFileOperations(queryRunner),
expectedCacheAccesses);
+ // Async table-stats computation can outlive the synchronous query and
emit spans into
+ // the exporter after execute returns. A fixed Thread.sleep races with
this — when
+ // stats from query N is still running while query N+1's measurement
happens, spans
+ // leak across the boundary and counts get scrambled (the symmetric
off-by-N failure
+ // across paired tests). Poll until the span set is stable for two
consecutive reads.
+ Multiset<FileOperationUtils.FileOperation> actual =
waitForStableSpans(queryRunner);
+ assertMultisetsEqual(actual, expectedCacheAccesses);
+ }
+
+ /**
+ * Returns the file-operation span set once two consecutive reads (200ms
apart) agree.
+ * Bounded by a 30-second ceiling so a runaway test fails loudly instead
of hanging.
+ */
+ private static Multiset<FileOperationUtils.FileOperation>
waitForStableSpans(QueryRunner queryRunner)
+ throws InterruptedException
+ {
+ long deadlineMillis = System.currentTimeMillis() + 30_000L;
+ Multiset<FileOperationUtils.FileOperation> previous = null;
+ while (System.currentTimeMillis() < deadlineMillis) {
+ Thread.sleep(200L);
+ Multiset<FileOperationUtils.FileOperation> current =
getFileOperations(queryRunner);
+ if (previous != null && current.equals(previous)) {
+ return current;
+ }
+ previous = current;
+ }
+ return previous != null ? previous : getFileOperations(queryRunner);
}
private static Multiset<FileOperationUtils.FileOperation>
getFileOperations(QueryRunner queryRunner)