This is an automated email from the ASF dual-hosted git repository. michaelsmith pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/impala.git
commit 5e694568d5e837e6f6648bed573bb3b60c7d5a92 Author: Michael Smith <[email protected]> AuthorDate: Thu Mar 2 15:47:48 2023 -0800 IMPALA-11966: Enable cache_ozone_file_handles by default Updates Ozone dependency to 1.3.0 to address HDDS-7135 and enables cache_ozone_file_handles by default for a ~10% improvement on TPC-DS query time. Updates the Ozone CDP dependency for HDDS-8095. Fix for it will be available in Ozone 1.4.0, so testing with TDE currently requires the CDP build. Testing: - ran backend, e2e, and custom cluster test suites with Ozone Change-Id: Icc66551f9b87af785a1c30b516ac39f4640638fe Reviewed-on: http://gerrit.cloudera.org:8080/19573 Reviewed-by: Impala Public Jenkins <[email protected]> Tested-by: Impala Public Jenkins <[email protected]> --- be/src/runtime/io/disk-io-mgr.cc | 2 +- bin/impala-config.sh | 24 ++++++++++++------------ tests/custom_cluster/test_hdfs_fd_caching.py | 16 +++++++--------- 3 files changed, 20 insertions(+), 22 deletions(-) diff --git a/be/src/runtime/io/disk-io-mgr.cc b/be/src/runtime/io/disk-io-mgr.cc index 29babb3e0..fae86e948 100644 --- a/be/src/runtime/io/disk-io-mgr.cc +++ b/be/src/runtime/io/disk-io-mgr.cc @@ -199,7 +199,7 @@ DEFINE_bool(cache_s3_file_handles, true, "Enable the file handle cache for " DEFINE_bool(cache_abfs_file_handles, true, "Enable the file handle cache for " "ABFS files."); -DEFINE_bool(cache_ozone_file_handles, false, "Enable the file handle cache for Ozone " +DEFINE_bool(cache_ozone_file_handles, true, "Enable the file handle cache for Ozone " "files."); DECLARE_int64(min_buffer_size); diff --git a/bin/impala-config.sh b/bin/impala-config.sh index 2470ebb15..acfd43210 100755 --- a/bin/impala-config.sh +++ b/bin/impala-config.sh @@ -213,26 +213,26 @@ fi : ${IMPALA_TOOLCHAIN_HOST:=native-toolchain.s3.amazonaws.com} export IMPALA_TOOLCHAIN_HOST -export CDP_BUILD_NUMBER=38235009 +export CDP_BUILD_NUMBER=39127492 export CDP_MAVEN_REPOSITORY=\ "https://${IMPALA_TOOLCHAIN_HOST}/build/cdp_components/${CDP_BUILD_NUMBER}/maven" -export CDP_AVRO_JAVA_VERSION=1.8.2.7.2.17.0-127 -export CDP_HADOOP_VERSION=3.1.1.7.2.17.0-127 -export CDP_HBASE_VERSION=2.4.6.7.2.17.0-127 -export CDP_HIVE_VERSION=3.1.3000.7.2.17.0-127 -export CDP_ICEBERG_VERSION=1.1.0.7.2.17.0-127 -export CDP_KNOX_VERSION=1.3.0.7.2.17.0-127 -export CDP_OZONE_VERSION=1.3.0.7.2.17.0-127 -export CDP_PARQUET_VERSION=1.10.99.7.2.17.0-127 -export CDP_RANGER_VERSION=2.3.0.7.2.17.0-127 -export CDP_TEZ_VERSION=0.9.1.7.2.17.0-127 +export CDP_AVRO_JAVA_VERSION=1.8.2.7.2.17.0-160 +export CDP_HADOOP_VERSION=3.1.1.7.2.17.0-160 +export CDP_HBASE_VERSION=2.4.6.7.2.17.0-160 +export CDP_HIVE_VERSION=3.1.3000.7.2.17.0-160 +export CDP_ICEBERG_VERSION=1.1.0.7.2.17.0-160 +export CDP_KNOX_VERSION=1.3.0.7.2.17.0-160 +export CDP_OZONE_VERSION=1.3.0.7.2.17.0-160 +export CDP_PARQUET_VERSION=1.10.99.7.2.17.0-160 +export CDP_RANGER_VERSION=2.3.0.7.2.17.0-160 +export CDP_TEZ_VERSION=0.9.1.7.2.17.0-160 # Ref: https://infra.apache.org/release-download-pages.html#closer : ${APACHE_MIRROR:="https://www.apache.org/dyn/closer.cgi"} export APACHE_MIRROR export APACHE_HIVE_VERSION=3.1.3 export APACHE_HIVE_STORAGE_API_VERSION=2.7.0 -export APACHE_OZONE_VERSION=1.2.1 +export APACHE_OZONE_VERSION=1.3.0 export ARCH_NAME=$(uname -p) diff --git a/tests/custom_cluster/test_hdfs_fd_caching.py b/tests/custom_cluster/test_hdfs_fd_caching.py index b5e5db5e8..9cb6936a2 100644 --- a/tests/custom_cluster/test_hdfs_fd_caching.py +++ b/tests/custom_cluster/test_hdfs_fd_caching.py @@ -125,8 +125,7 @@ class TestHdfsFdCaching(CustomClusterTestSuite): @pytest.mark.execute_serially @CustomClusterTestSuite.with_args( impalad_args="--max_cached_file_handles=16" - " --unused_file_handle_timeout_sec=18446744073709551600" - " --cache_ozone_file_handles=true", + " --unused_file_handle_timeout_sec=18446744073709551600", catalogd_args="--load_catalog_in_background=false") def test_caching_enabled(self, vector): """ @@ -146,8 +145,7 @@ class TestHdfsFdCaching(CustomClusterTestSuite): @pytest.mark.execute_serially @CustomClusterTestSuite.with_args( - impalad_args="--max_cached_file_handles=16 --unused_file_handle_timeout_sec=5" - " --cache_ozone_file_handles=true", + impalad_args="--max_cached_file_handles=16 --unused_file_handle_timeout_sec=5", catalogd_args="--load_catalog_in_background=false") def test_caching_with_eviction(self, vector): """Test of the HDFS file handle cache with unused file handle eviction enabled""" @@ -162,7 +160,7 @@ class TestHdfsFdCaching(CustomClusterTestSuite): @pytest.mark.execute_serially @CustomClusterTestSuite.with_args( - impalad_args="--max_cached_file_handles=0 --cache_ozone_file_handles=true", + impalad_args="--max_cached_file_handles=0", catalogd_args="--load_catalog_in_background=false") def test_caching_disabled_by_param(self, vector): """Test that the HDFS file handle cache is disabled when the parameter is zero""" @@ -173,7 +171,8 @@ class TestHdfsFdCaching(CustomClusterTestSuite): @pytest.mark.execute_serially @CustomClusterTestSuite.with_args( impalad_args="--cache_remote_file_handles=false --cache_s3_file_handles=false " - "--cache_abfs_file_handles=false --hostname=" + get_external_ip(), + "--cache_abfs_file_handles=false --cache_ozone_file_handles=false " + "--hostname=" + get_external_ip(), catalogd_args="--load_catalog_in_background=false") def test_remote_caching_disabled_by_param(self, vector): """Test that the file handle cache is disabled for remote files when disabled""" @@ -183,8 +182,7 @@ class TestHdfsFdCaching(CustomClusterTestSuite): @pytest.mark.execute_serially @CustomClusterTestSuite.with_args( - impalad_args="--max_cached_file_handles=0 --cache_ozone_file_handles=true " - "--hostname=" + get_external_ip(), + impalad_args="--max_cached_file_handles=0 --hostname=" + get_external_ip(), catalogd_args="--load_catalog_in_background=false") def test_remote_caching_disabled_by_global_param(self, vector): """Test that the file handle cache is disabled for remote files when all caching is @@ -196,7 +194,7 @@ class TestHdfsFdCaching(CustomClusterTestSuite): @pytest.mark.execute_serially @CustomClusterTestSuite.with_args( impalad_args="--max_cached_file_handles=16 --unused_file_handle_timeout_sec=5 " - "--always_use_data_cache=true --cache_ozone_file_handles=true", + "--always_use_data_cache=true", start_args="--data_cache_dir=/tmp --data_cache_size=500MB", catalogd_args="--load_catalog_in_background=false") def test_no_fd_caching_on_cached_data(self, vector):
