This is an automated email from the ASF dual-hosted git repository. joemcdonnell pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/impala.git
commit 1d0b2ef0c593145c476d52610c0f4ec2c69c8be7 Author: Joe McDonnell <joemcdonn...@cloudera.com> AuthorDate: Mon Jun 23 20:31:00 2025 -0700 IMPALA-14164: Fix timeout for fragments in flight in TestScratchDir On release builds, some tests in TestScratchDir have started hitting a timeout waiting for num-fragments-in-flight to reach 2. The code to wait for the metric sleeps one second between samples. If one of the query fragments starts and finishes during that second, the test will never see a sample containing two in-flight fragments. This happens on release builds because they are faster and more likely to complete within that second. This removes the code that waits for num-fragments-in-flight. All the tests have subsequent calls waiting for the scratch usage to reach a certain value. This will properly wait for the fragment to start up on its own. The num-fragments-in-flight wait doesn't add anything. Testing: - Ran custom_cluster/test_scratch_disk.py multiple times with a release build Change-Id: Ic8c573affc033056ba465c42bd420d5c1d3ba15c Reviewed-on: http://gerrit.cloudera.org:8080/23081 Reviewed-by: Impala Public Jenkins <impala-public-jenk...@cloudera.com> Tested-by: Impala Public Jenkins <impala-public-jenk...@cloudera.com> --- tests/custom_cluster/test_scratch_disk.py | 19 ------------------- 1 file changed, 19 deletions(-) diff --git a/tests/custom_cluster/test_scratch_disk.py b/tests/custom_cluster/test_scratch_disk.py index 87dc3baf8..3a57f5a1e 100644 --- a/tests/custom_cluster/test_scratch_disk.py +++ b/tests/custom_cluster/test_scratch_disk.py @@ -30,7 +30,6 @@ import time from tests.common.custom_cluster_test_suite import CustomClusterTestSuite from tests.common.skip import SkipIf from tests.util.hdfs_util import NAMENODE -from tests.verifiers.metric_verifier import MetricVerifier class TestScratchDir(CustomClusterTestSuite): @@ -234,8 +233,6 @@ class TestScratchDir(CustomClusterTestSuite): impalad = self.cluster.impalads[0] client = impalad.service.create_hs2_client() handle = self.execute_query_async_using_client(client, self.spill_query, vector) - verifier = MetricVerifier(impalad.service) - verifier.wait_for_metric("impala-server.num-fragments-in-flight", 2) for i in range(5): impalad.service.wait_for_metric_value( 'tmp-file-mgr.scratch-space-bytes-used.dir-' + str(i), 1, allow_greater=True) @@ -264,8 +261,6 @@ class TestScratchDir(CustomClusterTestSuite): impalad = self.cluster.impalads[0] client = impalad.service.create_hs2_client() handle = self.execute_query_async_using_client(client, self.spill_query, vector) - verifier = MetricVerifier(impalad.service) - verifier.wait_for_metric("impala-server.num-fragments-in-flight", 2) # dir1 and dir3 have highest priority and will be used as scratch disk. impalad.service.wait_for_metric_value( 'tmp-file-mgr.scratch-space-bytes-used.dir-1', 1, allow_greater=True) @@ -333,8 +328,6 @@ class TestScratchDir(CustomClusterTestSuite): impalad = self.cluster.impalads[0] client = impalad.service.create_hs2_client() handle = self.execute_query_async_using_client(client, self.spill_query, vector) - verifier = MetricVerifier(impalad.service) - verifier.wait_for_metric("impala-server.num-fragments-in-flight", 2) # Dir0 is the remote directory. impalad.service.wait_for_metric_value( 'tmp-file-mgr.scratch-space-bytes-used.dir-0', 1, allow_greater=True) @@ -364,8 +357,6 @@ class TestScratchDir(CustomClusterTestSuite): impalad = self.cluster.impalads[0] client = impalad.service.create_hs2_client() handle = self.execute_query_async_using_client(client, self.spill_query, vector) - verifier = MetricVerifier(impalad.service) - verifier.wait_for_metric("impala-server.num-fragments-in-flight", 2) # Local directory always ranks before the remote one, so dir0 is the local directory. # Only spill to dir0 because it has enough space for the spilling. impalad.service.wait_for_metric_value( @@ -398,8 +389,6 @@ class TestScratchDir(CustomClusterTestSuite): impalad = self.cluster.impalads[0] client = impalad.service.create_hs2_client() handle = self.execute_query_async_using_client(client, self.spill_query, vector) - verifier = MetricVerifier(impalad.service) - verifier.wait_for_metric("impala-server.num-fragments-in-flight", 2) # Local directory always ranks before the remote one, so dir0 is the local directory. # The query spills to both dir0 and dir1. By default the remote file is 16MB each, # so the value of metrics1 should be at least one file size. @@ -431,8 +420,6 @@ class TestScratchDir(CustomClusterTestSuite): impalad = self.cluster.impalads[0] client = impalad.service.create_hs2_client() handle = self.execute_query_async_using_client(client, self.spill_query, vector) - verifier = MetricVerifier(impalad.service) - verifier.wait_for_metric("impala-server.num-fragments-in-flight", 2) # The query spills to the remote directories and creates remote files, # so that the size is bigger than 0, and be integer times of remote file size. impalad.service.wait_for_metric_value( @@ -515,8 +502,6 @@ class TestScratchDir(CustomClusterTestSuite): impalad = self.cluster.impalads[0] client = impalad.service.create_hs2_client() handle = self.execute_query_async_using_client(client, self.spill_query, vector) - verifier = MetricVerifier(impalad.service) - verifier.wait_for_metric("impala-server.num-fragments-in-flight", 2) results = client.fetch(self.spill_query, handle) assert results.success metrics0 = self.get_metric( @@ -544,8 +529,6 @@ class TestScratchDir(CustomClusterTestSuite): impalad = self.cluster.impalads[0] client = impalad.service.create_hs2_client() self.execute_query_async_using_client(client, self.spill_query_big_table, vector) - verifier = MetricVerifier(impalad.service) - verifier.wait_for_metric("impala-server.num-fragments-in-flight", 2) # Dir0 is the remote directory. impalad.service.wait_for_metric_value( 'tmp-file-mgr.scratch-space-bytes-used.dir-0', 1, allow_greater=True) @@ -585,8 +568,6 @@ class TestScratchDir(CustomClusterTestSuite): impalad = self.cluster.impalads[0] client = impalad.service.create_hs2_client() self.execute_query_async_using_client(client, self.spill_query_big_table, vector) - verifier = MetricVerifier(impalad.service) - verifier.wait_for_metric("impala-server.num-fragments-in-flight", 2) # Dir0 is the remote directory. impalad.service.wait_for_metric_value( 'tmp-file-mgr.scratch-space-bytes-used.dir-0', 1, allow_greater=True)