This is an automated email from the ASF dual-hosted git repository.

stigahuang pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit 4273bcb65dfbf09bcce475430d316334e7d894e1
Author: stiga-huang <[email protected]>
AuthorDate: Mon Feb 3 09:22:28 2025 +0800

    IMPALA-13727: Fix wrong regex for hostname in test_multiple_blocks_mt_dop
    
    The test uses r'host=(\S+:[0-9]*)' to match the hosts of all the
    fragment instances in query profile, i.e.
      host_list = re.findall(r'host=(\S+:[0-9]*)', result.runtime_profile)
    This is able to find the instance name in lines like
      Instance d843c27e276bfa7a:a86450f600000006 (host=name:27002):
    and get "name:27002" from it.
    
    However, the runtime profile we get right after fetching the query
    result and before closing the query might have a section of total time
    after the fragment instance host, e.g.
      Instance d843c27e276bfa7a:a86450f600000006 (host=name:27002):(Total: 
19.999ms, non-child: 0.000ns, % non-child: 0.00%)
    
    The regex pattern will match "name:27002):Total:" in the string. If not
    all the instances have this "Total" section, the instance names mismatch
    and the test will fail. See the query profile attached in the JIRA as an
    example. Not sure how this happens. Maybe it's not guaranteed that the
    profile is ready immediately when the last result row is fetched. There
    might be a delay for the coordinator to process the final update of the
    profile from executors. I think it's ok to fix the test first.
    
    This fixes the regex pattern to be r'\(host=([^:]+:[0-9]*)\)' so it can
    exactly match the instance name.
    
    Tests
     - Verified the fix locally
    
    Change-Id: If70daa344403f2ae8617bf86e7896c2bbfd9e736
    Reviewed-on: http://gerrit.cloudera.org:8080/22457
    Reviewed-by: Impala Public Jenkins <[email protected]>
    Tested-by: Impala Public Jenkins <[email protected]>
---
 tests/query_test/test_scanners.py | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/tests/query_test/test_scanners.py 
b/tests/query_test/test_scanners.py
index c8b703381..85b53951e 100644
--- a/tests/query_test/test_scanners.py
+++ b/tests/query_test/test_scanners.py
@@ -877,19 +877,20 @@ class TestParquet(ImpalaTestSuite):
       num_rows_read_list = re.findall(r'RowsRead: [0-9.K]* \(([0-9]*)\)',
         result.runtime_profile)
       # The extra fragment is the "Averaged Fragment"
-      assert len(num_rows_read_list) == 7
-      assert len(ranges_complete_list) == 7
+      assert len(num_rows_read_list) == 7, result.runtime_profile
+      assert len(ranges_complete_list) == 7, result.runtime_profile
 
       # Extract the host for each fragment instance. The first is the 
coordinator
       # fragment instance.
-      host_list = re.findall(r'host=(\S+:[0-9]*)', result.runtime_profile)
-      assert len(host_list) == 7
+      host_list = re.findall(r'\(host=([^:]+:[0-9]*)\)', 
result.runtime_profile)
+      assert len(host_list) == 7, result.runtime_profile
+      assert len(set(host_list)) == 3, result.runtime_profile
 
       total_rows_read = 0
       # Skip the Averaged Fragment; it comes first in the runtime profile.
       for num_row_read in num_rows_read_list[1:]:
         total_rows_read += int(num_row_read)
-      assert total_rows_read == TOTAL_ROWS
+      assert total_rows_read == TOTAL_ROWS, result.runtime_profile
 
       # Again skip the Averaged Fragment; it comes first in the runtime 
profile.
       # With mt_dop 2, every backend will have 2 instances.
@@ -900,7 +901,8 @@ class TestParquet(ImpalaTestSuite):
           ranges_per_host[host] = 0
         ranges_per_host[host] += int(ranges_complete_list[i])
       for host in ranges_per_host:
-        assert ranges_per_host[host] == 2
+        assert ranges_per_host[host] == 2, ("ScanRangesComplete for " + host
+             + " should be 2 in profile:\n" + result.runtime_profile)
     finally:
       self.client.clear_configuration()
 

Reply via email to