This is an automated email from the ASF dual-hosted git repository.

michaelsmith pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit 9f50b5d2395e8789e2af0d67379a07f7355560c3
Author: Riza Suminto <[email protected]>
AuthorDate: Mon Nov 13 18:16:42 2023 -0800

    IMPALA-12528: (addendum) test non-reserved bytes against parquet
    
    test_hdfs_scanner_thread_non_reserved_bytes is still flaky due to the
    inconsistent number of data files of tpch_text_gzip.lineitem table
    created during data loading. This patch further deflake this test by
    changing its target table to tpch_parquet.lineitem. This table is
    selected because it consist of 3 parquet snappy files, which in turn are
    consistently planned as 1 scan range per file. This also raise the delay
    injection from 100ms to 500ms.
    
    Testing:
    - Confirm that tpch_parquet.lineitem always populated with 3 parquet
      snappy files in HDFS minicluster by repeating the following command:
      ./bin/load-data.py -f -w tpch --table_formats=parquet/none \
        --table_name=lineitem
    - Loop test_hdfs_scanner_thread_non_reserved_bytes 100 times against
      HDFS and OZONE.
    
    Change-Id: I2f464b1d1e6b676bf9e1376afd4497cb27cd4e23
    Reviewed-on: http://gerrit.cloudera.org:8080/20705
    Reviewed-by: Impala Public Jenkins <[email protected]>
    Tested-by: Impala Public Jenkins <[email protected]>
---
 .../hdfs-scanner-thread-non-reserved-bytes.test    | 24 +++++++++++-----------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git 
a/testdata/workloads/functional-query/queries/QueryTest/hdfs-scanner-thread-non-reserved-bytes.test
 
b/testdata/workloads/functional-query/queries/QueryTest/hdfs-scanner-thread-non-reserved-bytes.test
index 5e2b61bee..ba2d87aab 100644
--- 
a/testdata/workloads/functional-query/queries/QueryTest/hdfs-scanner-thread-non-reserved-bytes.test
+++ 
b/testdata/workloads/functional-query/queries/QueryTest/hdfs-scanner-thread-non-reserved-bytes.test
@@ -1,11 +1,11 @@
 ====
 ---- QUERY
 # IMPALA-11068: without tuning hdfs_scanner_non_reserved_bytes, this query can 
launch
-# up to 3 threads (compressed_text_est_bytes ~ 211MB).
+# up to 3 threads (with hdfs_scanner_thread_max_estimated_bytes default to 
32MB).
 set num_nodes=1;
-set mem_limit=750m;
-set debug_action="HDFS_SCANNER_THREAD_OBTAINED_RANGE:SLEEP@100";
-select count(*) from tpch_text_gzip.lineitem where l_comment = 'telets. 
quickly ';
+set mem_limit=130m;
+set debug_action="HDFS_SCANNER_THREAD_OBTAINED_RANGE:SLEEP@500";
+select count(*) from tpch_parquet.lineitem where l_comment = 'telets. quickly 
';
 ---- RESULTS:
 4
 ---- RUNTIME_PROFILE
@@ -15,10 +15,10 @@ aggregation(SUM, NumScannerThreadsStarted): 3
 # IMPALA-11068: raising hdfs_scanner_non_reserved_bytes above 
compressed_text_est_bytes
 # will reduce NumScannerThreadsStarted.
 set num_nodes=1;
-set mem_limit=750m;
-set debug_action="HDFS_SCANNER_THREAD_OBTAINED_RANGE:SLEEP@100";
-set hdfs_scanner_non_reserved_bytes=320m;
-select count(*) from tpch_text_gzip.lineitem where l_comment = 'telets. 
quickly ';
+set mem_limit=130m;
+set debug_action="HDFS_SCANNER_THREAD_OBTAINED_RANGE:SLEEP@500";
+set hdfs_scanner_non_reserved_bytes=48m;
+select count(*) from tpch_parquet.lineitem where l_comment = 'telets. quickly 
';
 ---- RESULTS:
 4
 ---- RUNTIME_PROFILE
@@ -28,10 +28,10 @@ aggregation(SUM, NumScannerThreadsStarted): 2
 # IMPALA-11068: high hdfs_scanner_non_reserved_bytes does not impact the first 
scanner
 # thread.
 set num_nodes=1;
-set mem_limit=750m;
-set debug_action="HDFS_SCANNER_THREAD_OBTAINED_RANGE:SLEEP@100";
-set hdfs_scanner_non_reserved_bytes=2g;
-select count(*) from tpch_text_gzip.lineitem where l_comment = 'telets. 
quickly ';
+set mem_limit=130m;
+set debug_action="HDFS_SCANNER_THREAD_OBTAINED_RANGE:SLEEP@500";
+set hdfs_scanner_non_reserved_bytes=64m;
+select count(*) from tpch_parquet.lineitem where l_comment = 'telets. quickly 
';
 ---- RESULTS:
 4
 ---- RUNTIME_PROFILE

Reply via email to