This is an automated email from the ASF dual-hosted git repository. michaelsmith pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/impala.git
commit 8cd4a1e4e5cbf426294c1158936402bf21433f3c Author: Michael Smith <[email protected]> AuthorDate: Wed Nov 16 11:56:20 2022 -0800 IMPALA-11584: Enable minicluster tests for Ozone Enables tests guarded by SkipIfNotHdfsMinicluster to run on Ozone as well as HDFS. Plans are still skipped for Ozone because there's Ozone-specific text in the plan output. Updates explain output to allow for Ozone, which has a block size of 256MB instead of 128MB. One of the partitions read in test_explain is ~180MB, straddling the difference between Ozone and HDFS. Testing: ran affected tests with Ozone. Change-Id: I6b06ceacf951dbc966aa409cf24a310c9676fe7f Reviewed-on: http://gerrit.cloudera.org:8080/19250 Tested-by: Impala Public Jenkins <[email protected]> Reviewed-by: Joe McDonnell <[email protected]> --- .../queries/QueryTest/explain-level0.test | 4 ++-- .../queries/QueryTest/explain-level1.test | 4 ++-- .../queries/QueryTest/explain-level2.test | 6 +++--- .../queries/QueryTest/explain-level3.test | 6 +++--- .../QueryTest/mt-dop-parquet-scheduling.test | 24 +++++++++++----------- tests/common/skip.py | 12 +++++------ tests/custom_cluster/test_hdfs_timeout.py | 4 +++- tests/custom_cluster/test_scheduler_locality.py | 4 ++-- tests/query_test/test_mem_usage_scaling.py | 3 ++- tests/query_test/test_scanners.py | 1 + 10 files changed, 35 insertions(+), 33 deletions(-) diff --git a/testdata/workloads/functional-query/queries/QueryTest/explain-level0.test b/testdata/workloads/functional-query/queries/QueryTest/explain-level0.test index 6ee4c0edb..aa80888dc 100644 --- a/testdata/workloads/functional-query/queries/QueryTest/explain-level0.test +++ b/testdata/workloads/functional-query/queries/QueryTest/explain-level0.test @@ -12,8 +12,8 @@ row_regex:.*Per-Host Resource Estimates: Memory=[0-9.]*MB.* '04:EXCHANGE [UNPARTITIONED]' '02:HASH JOIN [INNER JOIN, BROADCAST]' '|--03:EXCHANGE [BROADCAST]' -'| 01:SCAN HDFS [tpch.orders]' -'00:SCAN HDFS [tpch.lineitem]' +'| 01:SCAN $FILESYSTEM_NAME [tpch.orders]' +'00:SCAN $FILESYSTEM_NAME [tpch.lineitem]' ==== ---- QUERY # Tests the warning about missing table stats in the explain header. diff --git a/testdata/workloads/functional-query/queries/QueryTest/explain-level1.test b/testdata/workloads/functional-query/queries/QueryTest/explain-level1.test index 476dabc43..26d4aef9d 100644 --- a/testdata/workloads/functional-query/queries/QueryTest/explain-level1.test +++ b/testdata/workloads/functional-query/queries/QueryTest/explain-level1.test @@ -19,11 +19,11 @@ row_regex:.*row-size=.* cardinality=.* '|' '|--03:EXCHANGE [BROADCAST]' '| |' -'| 01:SCAN HDFS [tpch.orders]' +'| 01:SCAN $FILESYSTEM_NAME [tpch.orders]' row_regex:.*partitions=1/1 files=1 size=.* row_regex:.*row-size=.* cardinality=.* '|' -'00:SCAN HDFS [tpch.lineitem]' +'00:SCAN $FILESYSTEM_NAME [tpch.lineitem]' row_regex:.*partitions=1/1 files=1 size=.* ' runtime filters: RF000 -> l_orderkey' row_regex:.*row-size=.* cardinality=.* diff --git a/testdata/workloads/functional-query/queries/QueryTest/explain-level2.test b/testdata/workloads/functional-query/queries/QueryTest/explain-level2.test index 75444c03e..8b72f4a69 100644 --- a/testdata/workloads/functional-query/queries/QueryTest/explain-level2.test +++ b/testdata/workloads/functional-query/queries/QueryTest/explain-level2.test @@ -36,9 +36,9 @@ row_regex:.*mem-estimate=[0-9.]*[A-Z]*B mem-reservation=[0-9.]*[A-Z]*B thread-re '| | tuple-ids=1 row-size=171B cardinality=1.50M' '| | in pipelines: 01(GETNEXT)' '| |' -'| F01:PLAN FRAGMENT [RANDOM] hosts=2 instances=2' +row_regex:.*F01:PLAN FRAGMENT \[RANDOM\] hosts=[1-2] instances=[1-2] row_regex:.*Per-Host Resources: mem-estimate=[0-9.]*[A-Z]*B mem-reservation=[0-9.]*[A-Z]*B thread-reservation=.* -'| 01:SCAN HDFS [tpch.orders, RANDOM]' +'| 01:SCAN $FILESYSTEM_NAME [tpch.orders, RANDOM]' row_regex:.*partitions=1/1 files=1 size=.* '| stored statistics:' row_regex:.*table: rows=[0-9.]*[A-Z]* size=.* @@ -48,7 +48,7 @@ row_regex:.*mem-estimate=[0-9.]*[A-Z]*B mem-reservation=[0-9.]*[A-Z]*B thread-re '| tuple-ids=1 row-size=171B cardinality=1.50M' '| in pipelines: 01(GETNEXT)' '|' -'00:SCAN HDFS [tpch.lineitem, RANDOM]' +'00:SCAN $FILESYSTEM_NAME [tpch.lineitem, RANDOM]' row_regex:.*partitions=1/1 files=1 size=.* ' runtime filters: RF000[bloom] -> l_orderkey' ' stored statistics:' diff --git a/testdata/workloads/functional-query/queries/QueryTest/explain-level3.test b/testdata/workloads/functional-query/queries/QueryTest/explain-level3.test index 1865c9c50..bf953b209 100644 --- a/testdata/workloads/functional-query/queries/QueryTest/explain-level3.test +++ b/testdata/workloads/functional-query/queries/QueryTest/explain-level3.test @@ -38,7 +38,7 @@ row_regex:.*mem-estimate=[0-9.]*[A-Z]*B mem-reservation=[0-9.]*[A-Z]*B thread-re ' | tuple-ids=1 row-size=171B cardinality=1.50M' ' | in pipelines: 01(GETNEXT)' ' |' -' 00:SCAN HDFS [tpch.lineitem, RANDOM]' +' 00:SCAN $FILESYSTEM_NAME [tpch.lineitem, RANDOM]' row_regex:.*partitions=1/1 files=1 size=.* ' runtime filters: RF000[bloom] -> l_orderkey' ' stored statistics:' @@ -50,11 +50,11 @@ row_regex:.*mem-estimate=[0-9.]*[A-Z]*B mem-reservation=[0-9.]*[A-Z]*B thread-re ' tuple-ids=0 row-size=231B cardinality=6.00M' ' in pipelines: 00(GETNEXT)' '' -'F01:PLAN FRAGMENT [RANDOM] hosts=2 instances=2' +row_regex:.*F01:PLAN FRAGMENT \[RANDOM\] hosts=[1-2] instances=[1-2] row_regex:.*Per-Host Resources: mem-estimate=[0-9.]*[A-Z]*B mem-reservation=[0-9.]*[A-Z]*B thread-reservation=.* ' DATASTREAM SINK [FRAGMENT=F00, EXCHANGE=03, BROADCAST]' row_regex:.* | mem-estimate=[0-9.]*[A-Z]*B mem-reservation=[0-9.]*[A-Z]*B thread-reservation=0 -' 01:SCAN HDFS [tpch.orders, RANDOM]' +' 01:SCAN $FILESYSTEM_NAME [tpch.orders, RANDOM]' row_regex:.*partitions=1/1 files=1 size=.* ' stored statistics:' row_regex:.*table: rows=[0-9.]*[A-Z]* size=.* diff --git a/testdata/workloads/functional-query/queries/QueryTest/mt-dop-parquet-scheduling.test b/testdata/workloads/functional-query/queries/QueryTest/mt-dop-parquet-scheduling.test index 800fb67fe..3b382015f 100644 --- a/testdata/workloads/functional-query/queries/QueryTest/mt-dop-parquet-scheduling.test +++ b/testdata/workloads/functional-query/queries/QueryTest/mt-dop-parquet-scheduling.test @@ -44,9 +44,9 @@ row_regex:.*AdmissionSlots: 4 .* row_regex:.*F04:ROOT * 1 * 1 .* row_regex:.*04:AGGREGATE * 3 * 12 .* row_regex:.*00:UNION * 3 * 12 * -row_regex:.*02:SCAN HDFS * 3 * 12 .*alltypessmall.* -row_regex:.*03:SCAN HDFS * 3 * 12 .*alltypestiny.* -row_regex:.*01:SCAN HDFS * 3 * 12 .*alltypes.* +row_regex:.*02:SCAN (HDFS|OZONE) * 3 * 12 .*alltypessmall.* +row_regex:.*03:SCAN (HDFS|OZONE) * 3 * 12 .*alltypestiny.* +row_regex:.*01:SCAN (HDFS|OZONE) * 3 * 12 .*alltypes.* ==== ---- QUERY # Same idea, but with smallest scan first to check that the scheduler is taking the @@ -64,9 +64,9 @@ row_regex:.*AdmissionSlots: 4 .* row_regex:.*F04:ROOT * 1 * 1 .* row_regex:.*04:AGGREGATE * 3 * 12 .* row_regex:.*00:UNION * 3 * 12 * -row_regex:.*02:SCAN HDFS * 3 * 12 .*alltypessmall.* -row_regex:.*03:SCAN HDFS * 3 * 12 .*alltypes.* -row_regex:.*01:SCAN HDFS * 3 * 12 .*alltypestiny.* +row_regex:.*02:SCAN (HDFS|OZONE) * 3 * 12 .*alltypessmall.* +row_regex:.*03:SCAN (HDFS|OZONE) * 3 * 12 .*alltypes.* +row_regex:.*01:SCAN (HDFS|OZONE) * 3 * 12 .*alltypestiny.* ==== ---- QUERY # This query should have one scan and one exchange in the interior fragment. @@ -85,8 +85,8 @@ row_regex:.*04:AGGREGATE * 3 * 12 .* row_regex:.*06:AGGREGATE * 3 * 12 .* row_regex:.*03:AGGREGATE * 3 * 12 .* row_regex:.*00:UNION * 3 * 12 * -row_regex:.*02:SCAN HDFS * 3 * 12 .*alltypes.* -row_regex:.*01:SCAN HDFS * 3 * 12 .*alltypestiny.* +row_regex:.*02:SCAN (HDFS|OZONE) * 3 * 12 .*alltypes.* +row_regex:.*01:SCAN (HDFS|OZONE) * 3 * 12 .*alltypestiny.* ==== ---- QUERY # This query should have one scan and one exchange in the interior fragment. @@ -107,8 +107,8 @@ row_regex:.*04:AGGREGATE * 3 * 12 .* row_regex:.*06:AGGREGATE * 3 * 12 .* row_regex:.*03:AGGREGATE * 3 * 4 .* row_regex:.*00:UNION * 3 * 12 * -row_regex:.*02:SCAN HDFS * 3 * 4 .*alltypestiny.* -row_regex:.*01:SCAN HDFS * 3 * 12 .*alltypes.* +row_regex:.*02:SCAN (HDFS|OZONE) * 3 * 4 .*alltypestiny.* +row_regex:.*01:SCAN (HDFS|OZONE) * 3 * 12 .*alltypes.* ==== ---- QUERY # This query should have one scan and two exchanges in the interior fragment. @@ -128,6 +128,6 @@ row_regex:.*AdmissionSlots: 2.* row_regex:.*00:UNION * 3 * 6 .* row_regex:.*08:AGGREGATE * 3 * 6 .* row_regex:.*03:AGGREGATE * 3 * 6 .* -row_regex:.*04:SCAN HDFS * 3 * 6 .* -row_regex:.*01:SCAN HDFS * 3 * 6 .* +row_regex:.*04:SCAN (HDFS|OZONE) * 3 * 6 .* +row_regex:.*01:SCAN (HDFS|OZONE) * 3 * 6 .* ==== diff --git a/tests/common/skip.py b/tests/common/skip.py index e5b856485..46fc303fa 100644 --- a/tests/common/skip.py +++ b/tests/common/skip.py @@ -55,10 +55,6 @@ class SkipIfFS: hdfs_block_size = pytest.mark.skipif(not IS_HDFS, reason="Size of block reported to Impala is not ~128MB") hdfs_acls = pytest.mark.skipif(not IS_HDFS, reason="HDFS acls are not supported") - # TODO: IMPALA-11584: see if this can be collapsed into SkipIfNotHdfsMinicluster - always_remote = pytest.mark.skipif(IS_EC or not (IS_HDFS or IS_OZONE) - or IMPALA_TEST_CLUSTER_PROPERTIES.is_remote_cluster(), - reason="Only HDFS and Ozone tests are run co-located") # Special case product limitations. empty_directory = pytest.mark.skipif(IS_S3, @@ -73,6 +69,8 @@ class SkipIfFS: read_past_eof = pytest.mark.skipif(IS_S3 or IS_GCS, reason="IMPALA-2512") large_block_size = pytest.mark.skipif(IS_OZONE or IS_EC, reason="block size is larger than 128MB") + read_speed_dependent = pytest.mark.skipif(not IS_HDFS or IS_EC, + reason="success depends on fast scan node performance") # These need test infra work to re-enable. hive = pytest.mark.skipif(not IS_HDFS, reason="Hive doesn't work") @@ -133,10 +131,10 @@ class SkipIfLocal: class SkipIfNotHdfsMinicluster: # These are skipped when not running against a local HDFS mini-cluster. plans = pytest.mark.skipif( - not IS_HDFS or IMPALA_TEST_CLUSTER_PROPERTIES.is_remote_cluster(), + not (IS_HDFS or IS_OZONE) or IMPALA_TEST_CLUSTER_PROPERTIES.is_remote_cluster(), reason="Test assumes plans from local HDFS mini-cluster") - tuned_for_minicluster = pytest.mark.skipif( - not IS_HDFS or IS_EC or IMPALA_TEST_CLUSTER_PROPERTIES.is_remote_cluster(), + tuned_for_minicluster = pytest.mark.skipif(not (IS_HDFS or IS_OZONE) + or IS_EC or IMPALA_TEST_CLUSTER_PROPERTIES.is_remote_cluster(), reason="Test is tuned for 3-node HDFS minicluster with no EC") scheduling = pytest.mark.skipif( not (IS_HDFS or IS_OZONE) or IS_EC or pytest.config.option.testing_remote_cluster, diff --git a/tests/custom_cluster/test_hdfs_timeout.py b/tests/custom_cluster/test_hdfs_timeout.py index 0967427f7..9e9b84a96 100644 --- a/tests/custom_cluster/test_hdfs_timeout.py +++ b/tests/custom_cluster/test_hdfs_timeout.py @@ -22,6 +22,7 @@ import time from tests.common.custom_cluster_test_suite import CustomClusterTestSuite from tests.common.skip import SkipIfNotHdfsMinicluster from subprocess import check_call +from tests.util.filesystem_utils import IS_OZONE from tests.util.shell_util import exec_process @@ -43,7 +44,8 @@ class TestHdfsTimeouts(CustomClusterTestSuite): # Find the NameNode's pid via pgrep. This would raise an error if it did not # find a pid, so there is at least one match. - rc, pgrep_output, stderr = exec_process("pgrep -f namenode.NameNode") + data_api_name = 'OzoneManager' if IS_OZONE else 'namenode.NameNode' + rc, pgrep_output, stderr = exec_process("pgrep -f {}".format(data_api_name)) assert rc == 0, \ "Error finding NameNode pid\nstdout={0}\nstderr={1}".format(pgrep_output, stderr) # In our test environment, this should only match one pid diff --git a/tests/custom_cluster/test_scheduler_locality.py b/tests/custom_cluster/test_scheduler_locality.py index 0ab1dc301..adfbcd581 100644 --- a/tests/custom_cluster/test_scheduler_locality.py +++ b/tests/custom_cluster/test_scheduler_locality.py @@ -19,14 +19,14 @@ from tests.common.custom_cluster_test_suite import CustomClusterTestSuite from tests.common.network import get_external_ip -from tests.common.skip import SkipIfFS +from tests.common.skip import SkipIfNotHdfsMinicluster LOCAL_ASSIGNMENTS_METRIC = "simple-scheduler.local-assignments.total" TOTAL_ASSIGNMENTS_METRIC = "simple-scheduler.assignments.total" [email protected]_remote [email protected]_for_minicluster class TestSchedulerLocality(CustomClusterTestSuite): """Tests for local and remote disk scheduling.""" diff --git a/tests/query_test/test_mem_usage_scaling.py b/tests/query_test/test_mem_usage_scaling.py index ceb1e8498..8910aa336 100644 --- a/tests/query_test/test_mem_usage_scaling.py +++ b/tests/query_test/test_mem_usage_scaling.py @@ -23,7 +23,7 @@ from tests.common.test_dimensions import (create_avro_snappy_dimension, create_parquet_dimension) from tests.common.impala_cluster import ImpalaCluster from tests.common.impala_test_suite import ImpalaTestSuite -from tests.common.skip import SkipIfNotHdfsMinicluster +from tests.common.skip import SkipIfNotHdfsMinicluster, SkipIfFS from tests.common.test_dimensions import create_single_exec_option_dimension from tests.common.test_vector import ImpalaTestDimension from tests.verifiers.metric_verifier import MetricVerifier @@ -400,6 +400,7 @@ class TestHashJoinMemLimit(ImpalaTestSuite): @SkipIfNotHdfsMinicluster.tuned_for_minicluster [email protected]_speed_dependent class TestExchangeMemUsage(ImpalaTestSuite): """Targeted test for exchange memory limits.""" diff --git a/tests/query_test/test_scanners.py b/tests/query_test/test_scanners.py index 22dca16f4..76ed7a254 100644 --- a/tests/query_test/test_scanners.py +++ b/tests/query_test/test_scanners.py @@ -1125,6 +1125,7 @@ class TestParquet(ImpalaTestSuite): for summary in page_size_summaries: assert not self._is_summary_stats_counter_empty(summary) + @SkipIfFS.hdfs_small_block @SkipIfNotHdfsMinicluster.tuned_for_minicluster def test_bytes_read_per_column(self, vector): """IMPALA-6964: Test that the counter Parquet[Un]compressedBytesReadPerColumn is
