This is an automated email from the ASF dual-hosted git repository.

joemcdonnell pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit 1d0b2ef0c593145c476d52610c0f4ec2c69c8be7
Author: Joe McDonnell <joemcdonn...@cloudera.com>
AuthorDate: Mon Jun 23 20:31:00 2025 -0700

    IMPALA-14164: Fix timeout for fragments in flight in TestScratchDir
    
    On release builds, some tests in TestScratchDir have started hitting
    a timeout waiting for num-fragments-in-flight to reach 2. The code
    to wait for the metric sleeps one second between samples. If one of
    the query fragments starts and finishes during that second, the test
    will never see a sample containing two in-flight fragments. This
    happens on release builds because they are faster and more likely to
    complete within that second.
    
    This removes the code that waits for num-fragments-in-flight. All the
    tests have subsequent calls waiting for the scratch usage to reach a
    certain value. This will properly wait for the fragment to start up
    on its own. The num-fragments-in-flight wait doesn't add anything.
    
    Testing:
     - Ran custom_cluster/test_scratch_disk.py multiple times with a
       release build
    
    Change-Id: Ic8c573affc033056ba465c42bd420d5c1d3ba15c
    Reviewed-on: http://gerrit.cloudera.org:8080/23081
    Reviewed-by: Impala Public Jenkins <impala-public-jenk...@cloudera.com>
    Tested-by: Impala Public Jenkins <impala-public-jenk...@cloudera.com>
---
 tests/custom_cluster/test_scratch_disk.py | 19 -------------------
 1 file changed, 19 deletions(-)

diff --git a/tests/custom_cluster/test_scratch_disk.py 
b/tests/custom_cluster/test_scratch_disk.py
index 87dc3baf8..3a57f5a1e 100644
--- a/tests/custom_cluster/test_scratch_disk.py
+++ b/tests/custom_cluster/test_scratch_disk.py
@@ -30,7 +30,6 @@ import time
 from tests.common.custom_cluster_test_suite import CustomClusterTestSuite
 from tests.common.skip import SkipIf
 from tests.util.hdfs_util import NAMENODE
-from tests.verifiers.metric_verifier import MetricVerifier
 
 
 class TestScratchDir(CustomClusterTestSuite):
@@ -234,8 +233,6 @@ class TestScratchDir(CustomClusterTestSuite):
     impalad = self.cluster.impalads[0]
     client = impalad.service.create_hs2_client()
     handle = self.execute_query_async_using_client(client, self.spill_query, 
vector)
-    verifier = MetricVerifier(impalad.service)
-    verifier.wait_for_metric("impala-server.num-fragments-in-flight", 2)
     for i in range(5):
       impalad.service.wait_for_metric_value(
           'tmp-file-mgr.scratch-space-bytes-used.dir-' + str(i), 1, 
allow_greater=True)
@@ -264,8 +261,6 @@ class TestScratchDir(CustomClusterTestSuite):
     impalad = self.cluster.impalads[0]
     client = impalad.service.create_hs2_client()
     handle = self.execute_query_async_using_client(client, self.spill_query, 
vector)
-    verifier = MetricVerifier(impalad.service)
-    verifier.wait_for_metric("impala-server.num-fragments-in-flight", 2)
     # dir1 and dir3 have highest priority and will be used as scratch disk.
     impalad.service.wait_for_metric_value(
         'tmp-file-mgr.scratch-space-bytes-used.dir-1', 1, allow_greater=True)
@@ -333,8 +328,6 @@ class TestScratchDir(CustomClusterTestSuite):
     impalad = self.cluster.impalads[0]
     client = impalad.service.create_hs2_client()
     handle = self.execute_query_async_using_client(client, self.spill_query, 
vector)
-    verifier = MetricVerifier(impalad.service)
-    verifier.wait_for_metric("impala-server.num-fragments-in-flight", 2)
     # Dir0 is the remote directory.
     impalad.service.wait_for_metric_value(
         'tmp-file-mgr.scratch-space-bytes-used.dir-0', 1, allow_greater=True)
@@ -364,8 +357,6 @@ class TestScratchDir(CustomClusterTestSuite):
     impalad = self.cluster.impalads[0]
     client = impalad.service.create_hs2_client()
     handle = self.execute_query_async_using_client(client, self.spill_query, 
vector)
-    verifier = MetricVerifier(impalad.service)
-    verifier.wait_for_metric("impala-server.num-fragments-in-flight", 2)
     # Local directory always ranks before the remote one, so dir0 is the local 
directory.
     # Only spill to dir0 because it has enough space for the spilling.
     impalad.service.wait_for_metric_value(
@@ -398,8 +389,6 @@ class TestScratchDir(CustomClusterTestSuite):
     impalad = self.cluster.impalads[0]
     client = impalad.service.create_hs2_client()
     handle = self.execute_query_async_using_client(client, self.spill_query, 
vector)
-    verifier = MetricVerifier(impalad.service)
-    verifier.wait_for_metric("impala-server.num-fragments-in-flight", 2)
     # Local directory always ranks before the remote one, so dir0 is the local 
directory.
     # The query spills to both dir0 and dir1. By default the remote file is 
16MB each,
     # so the value of metrics1 should be at least one file size.
@@ -431,8 +420,6 @@ class TestScratchDir(CustomClusterTestSuite):
     impalad = self.cluster.impalads[0]
     client = impalad.service.create_hs2_client()
     handle = self.execute_query_async_using_client(client, self.spill_query, 
vector)
-    verifier = MetricVerifier(impalad.service)
-    verifier.wait_for_metric("impala-server.num-fragments-in-flight", 2)
     # The query spills to the remote directories and creates remote files,
     # so that the size is bigger than 0, and be integer times of remote file 
size.
     impalad.service.wait_for_metric_value(
@@ -515,8 +502,6 @@ class TestScratchDir(CustomClusterTestSuite):
     impalad = self.cluster.impalads[0]
     client = impalad.service.create_hs2_client()
     handle = self.execute_query_async_using_client(client, self.spill_query, 
vector)
-    verifier = MetricVerifier(impalad.service)
-    verifier.wait_for_metric("impala-server.num-fragments-in-flight", 2)
     results = client.fetch(self.spill_query, handle)
     assert results.success
     metrics0 = self.get_metric(
@@ -544,8 +529,6 @@ class TestScratchDir(CustomClusterTestSuite):
     impalad = self.cluster.impalads[0]
     client = impalad.service.create_hs2_client()
     self.execute_query_async_using_client(client, self.spill_query_big_table, 
vector)
-    verifier = MetricVerifier(impalad.service)
-    verifier.wait_for_metric("impala-server.num-fragments-in-flight", 2)
     # Dir0 is the remote directory.
     impalad.service.wait_for_metric_value(
         'tmp-file-mgr.scratch-space-bytes-used.dir-0', 1, allow_greater=True)
@@ -585,8 +568,6 @@ class TestScratchDir(CustomClusterTestSuite):
     impalad = self.cluster.impalads[0]
     client = impalad.service.create_hs2_client()
     self.execute_query_async_using_client(client, self.spill_query_big_table, 
vector)
-    verifier = MetricVerifier(impalad.service)
-    verifier.wait_for_metric("impala-server.num-fragments-in-flight", 2)
     # Dir0 is the remote directory.
     impalad.service.wait_for_metric_value(
         'tmp-file-mgr.scratch-space-bytes-used.dir-0', 1, allow_greater=True)

Reply via email to