This is an automated email from the ASF dual-hosted git repository.

stigahuang pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit f1de0c392f4c5fe1df653956595a6f6699dd100e
Author: Riza Suminto <[email protected]>
AuthorDate: Tue Dec 17 11:53:43 2024 -0800

    IMPALA-13636: Fix target file_format in TestTpcdsInsert
    
    TestTpcdsInsert creates a temporary table to test insert functionality.
    It has three problems:
    
    1. It does not use unique_database parameter, so the temporary table is
       not cleaned up after test finished.
    2. It ignores file_format from test vector, causing inconsistency in the
       temporary table's file format. str_insert is always in PARQUET format,
       while store_sales_insert is always in TEXTFILE format.
    3. text file_format dimension is never exercised, because
       --workload_exploration_strategy in run-all-tests.sh does not
       explicitly list tpcds-insert workload.
    
    This patch fixes all three problems and few flake8 warnings in
    test_tpcds_queries.py.
    
    Testing:
    - Run bin/run-all-tests.sh with
      EXPLORATION_STRATEGY=exhaustive
      EE_TEST=true
      EE_TEST_FILES="query_test/test_tpcds_queries.py::TestTpcdsInsert"
      Verified that the temporary table format follows file_format
      dimension.
    
    Change-Id: Iea621ec1d6a53eba9558b0daa3a4cc97fbcc67ae
    Reviewed-on: http://gerrit.cloudera.org:8080/22291
    Reviewed-by: Michael Smith <[email protected]>
    Reviewed-by: Csaba Ringhofer <[email protected]>
    Tested-by: Riza Suminto <[email protected]>
---
 bin/run-all-tests.sh                               |  3 +-
 buildall.sh                                        |  2 +-
 .../tpcds-insert/queries/expr-insert.test          |  7 ++---
 .../tpcds-insert/queries/partitioned-insert.test   | 14 ++++-----
 tests/query_test/test_tpcds_queries.py             | 34 +++++++++++++---------
 5 files changed, 32 insertions(+), 28 deletions(-)

diff --git a/bin/run-all-tests.sh b/bin/run-all-tests.sh
index b90132132..9e53aef2c 100755
--- a/bin/run-all-tests.sh
+++ b/bin/run-all-tests.sh
@@ -193,7 +193,8 @@ TIMEOUT_PID=$!
 COMMON_PYTEST_ARGS="--maxfail=${MAX_PYTEST_FAILURES} 
--exploration_strategy=core"`
     `" --workload_exploration_strategy="`
         `"functional-query:${EXPLORATION_STRATEGY},"`
-        `"targeted-stress:${EXPLORATION_STRATEGY}"
+        `"targeted-stress:${EXPLORATION_STRATEGY},"`
+        `"tpcds-insert:${EXPLORATION_STRATEGY}"
 if [[ "${EXPLORATION_STRATEGY}" == "core" ]]; then
   # Skip the stress test in core - all stress tests are in exhaustive and
   # pytest startup takes a significant amount of time.
diff --git a/buildall.sh b/buildall.sh
index 2a96741de..bca291789 100755
--- a/buildall.sh
+++ b/buildall.sh
@@ -253,7 +253,7 @@ do
            "test execution time)"
       echo "[-testexhaustive] : Run tests in 'exhaustive' mode, which 
significantly"\
            "increases test execution time. ONLY APPLIES to suites with 
workloads:"\
-           "functional-query, targeted-stress"
+           "functional-query, targeted-stress, tpcds-insert"
       echo "[-testdata] : Loads test data. Implied as true if -snapshot_file 
is"\
            "specified. If -snapshot_file is not specified, data will be 
regenerated."
       echo "[-snapshot_file <file name>] : Load test data from a snapshot file"
diff --git a/testdata/workloads/tpcds-insert/queries/expr-insert.test 
b/testdata/workloads/tpcds-insert/queries/expr-insert.test
index 10840290f..d38416cca 100644
--- a/testdata/workloads/tpcds-insert/queries/expr-insert.test
+++ b/testdata/workloads/tpcds-insert/queries/expr-insert.test
@@ -1,16 +1,13 @@
 ====
----- QUERY: TPDCS-STR-INSERT-DROP
-DROP TABLE IF EXISTS str_insert
-====
 ---- QUERY: TPDCS-STR-INSERT-SETUP
-CREATE TABLE str_insert (s string) STORED AS PARQUET
+CREATE TABLE str_insert (s string) STORED AS $FILE_FORMAT
 ---- RESULTS
 'Table has been created.'
 ====
 ---- QUERY: TPDCS-STR-INSERT-CASE
 INSERT INTO str_insert
 SELECT case when ss_promo_sk % 2 = 0 then 'even' else 'odd' end
-FROM store_sales
+FROM tpcds.store_sales
 ---- RESULTS
 : 2880404
 ====
diff --git a/testdata/workloads/tpcds-insert/queries/partitioned-insert.test 
b/testdata/workloads/tpcds-insert/queries/partitioned-insert.test
index a9571eb27..166a99c36 100644
--- a/testdata/workloads/tpcds-insert/queries/partitioned-insert.test
+++ b/testdata/workloads/tpcds-insert/queries/partitioned-insert.test
@@ -1,23 +1,21 @@
 ====
----- QUERY: TPCDS-SS-INSERT-SETUP1
-DROP TABLE IF EXISTS store_sales_insert
-====
 ---- QUERY: TPCDS-SS-INSERT-SETUP2
-CREATE TABLE store_sales_insert LIKE store_sales
+CREATE TABLE store_sales_insert LIKE tpcds.store_sales
+STORED AS $FILE_FORMAT
 ---- RESULTS
 'Table has been created.'
 ====
 ---- QUERY: TPCDS-SS-INSERT-DAY
 # Insert a day's worth of data
 INSERT OVERWRITE TABLE store_sales_insert PARTITION (ss_sold_date_sk)
-SELECT * FROM store_sales
+SELECT * FROM tpcds.store_sales
 WHERE ss_sold_date_sk = 2451239
 ---- RESULTS
 ss_sold_date_sk=2451239: 847
 ====
 ---- QUERY: TPCDS-SS-INSERT-MONTH
 INSERT OVERWRITE TABLE store_sales_insert PARTITION (ss_sold_date_sk)
-SELECT * FROM store_sales
+SELECT * FROM tpcds.store_sales
 WHERE ss_sold_date_sk >= 2451270 and ss_sold_date_sk <= 2451299
 ---- RESULTS
 ss_sold_date_sk=2451270: 822
@@ -53,7 +51,7 @@ ss_sold_date_sk=2451299: 703
 ====
 ---- QUERY: TPCDS-SS-INSERT-QUARTER
 INSERT OVERWRITE TABLE store_sales_insert PARTITION (ss_sold_date_sk)
-SELECT * FROM store_sales
+SELECT * FROM tpcds.store_sales
 WHERE ss_sold_date_sk >= 2451423 and ss_sold_date_sk <= 2451544
 ---- RESULTS
 ss_sold_date_sk=2451423: 1881
@@ -181,7 +179,7 @@ ss_sold_date_sk=2451544: 3046
 ====
 ---- QUERY: TPCDS-SS-INSERT-YEAR
 INSERT OVERWRITE TABLE store_sales_insert PARTITION (ss_sold_date_sk)
-SELECT * FROM store_sales
+SELECT * FROM tpcds.store_sales
 WHERE ss_sold_date_sk >= 2451545 and ss_sold_date_sk < 2451911
 ---- RESULTS
 ss_sold_date_sk=2451545: 2743
diff --git a/tests/query_test/test_tpcds_queries.py 
b/tests/query_test/test_tpcds_queries.py
index 7e3e3e3fc..723d4f49f 100644
--- a/tests/query_test/test_tpcds_queries.py
+++ b/tests/query_test/test_tpcds_queries.py
@@ -27,10 +27,12 @@ from tests.common.skip import (
     SkipIfBuildType,
     SkipIfDockerizedCluster)
 from tests.common.test_dimensions import (
+    FILE_FORMAT_TO_STORED_AS_MAP,
     add_mandatory_exec_option,
     create_single_exec_option_dimension,
     is_supported_insert_format)
 
+
 class TestTpcdsQuery(ImpalaTestSuite):
   @classmethod
   def get_workload(cls):
@@ -40,9 +42,9 @@ class TestTpcdsQuery(ImpalaTestSuite):
   def add_test_dimensions(cls):
     super(TestTpcdsQuery, cls).add_test_dimensions()
     cls.ImpalaTestMatrix.add_constraint(lambda v:
-        v.get_value('table_format').file_format not in ['rc', 'hbase', 'kudu'] 
and
-        v.get_value('table_format').compression_codec in ['none', 'snap'] and
-        v.get_value('table_format').compression_type != 'record')
+        v.get_value('table_format').file_format not in ['rc', 'hbase', 'kudu']
+        and v.get_value('table_format').compression_codec in ['none', 'snap']
+        and v.get_value('table_format').compression_type != 'record')
     add_mandatory_exec_option(cls, 'decimal_v2', 0)
 
     if cls.exploration_strategy() != 'exhaustive':
@@ -345,9 +347,9 @@ class TestTpcdsDecimalV2Query(ImpalaTestSuite):
   def add_test_dimensions(cls):
     super(TestTpcdsDecimalV2Query, cls).add_test_dimensions()
     cls.ImpalaTestMatrix.add_constraint(lambda v:
-        v.get_value('table_format').file_format not in ['rc', 'hbase', 'kudu'] 
and
-        v.get_value('table_format').compression_codec in ['none', 'snap'] and
-        v.get_value('table_format').compression_type != 'record')
+        v.get_value('table_format').file_format not in ['rc', 'hbase', 'kudu']
+        and v.get_value('table_format').compression_codec in ['none', 'snap']
+        and v.get_value('table_format').compression_type != 'record')
 
     if cls.exploration_strategy() != 'exhaustive':
       # Cut down on the execution time for these tests in core by running only
@@ -704,11 +706,17 @@ class TestTpcdsInsert(ImpalaTestSuite):
     cls.ImpalaTestMatrix.add_constraint(lambda v:
         is_supported_insert_format(v.get_value('table_format')))
 
-  def test_tpcds_partitioned_insert(self, vector):
-    self.run_test_case('partitioned-insert', vector)
+  def create_test_file_vars(self, vector):
+    stored_as = 
FILE_FORMAT_TO_STORED_AS_MAP[vector.get_value('table_format').file_format]
+    return {'$FILE_FORMAT': stored_as}
+
+  def test_tpcds_partitioned_insert(self, vector, unique_database):
+    self.run_test_case('partitioned-insert', vector, unique_database,
+        test_file_vars=self.create_test_file_vars(vector))
 
-  def test_expr_insert(self, vector):
-    self.run_test_case('expr-insert', vector)
+  def test_expr_insert(self, vector, unique_database):
+    self.run_test_case('expr-insert', vector, unique_database,
+        test_file_vars=self.create_test_file_vars(vector))
 
 
 class TestTpcdsUnmodified(ImpalaTestSuite):
@@ -720,9 +728,9 @@ class TestTpcdsUnmodified(ImpalaTestSuite):
   def add_test_dimensions(cls):
     super(TestTpcdsUnmodified, cls).add_test_dimensions()
     cls.ImpalaTestMatrix.add_constraint(lambda v:
-        v.get_value('table_format').file_format not in ['rc', 'hbase', 'kudu'] 
and
-        v.get_value('table_format').compression_codec in ['none', 'snap'] and
-        v.get_value('table_format').compression_type != 'record')
+        v.get_value('table_format').file_format not in ['rc', 'hbase', 'kudu']
+        and v.get_value('table_format').compression_codec in ['none', 'snap']
+        and v.get_value('table_format').compression_type != 'record')
 
     if cls.exploration_strategy() != 'exhaustive':
       # Cut down on the execution time for these tests in core by running only

Reply via email to