This is an automated email from the ASF dual-hosted git repository. stigahuang pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/impala.git
commit 24a8a29120606cdc7205b09a5bd53d52d357539a Author: Michael Smith <[email protected]> AuthorDate: Wed May 3 14:51:03 2023 -0700 IMPALA-12115: Put each filesystem in a different directory Uses different node directories and databases for each filesystem so we don't need to recreate them from scratch when switching. Preserves current defaults so developers with a default HDFS minicluster don't need to recreate it after this patch. Any other cluster (Ozone, erasure-coded, S3) will need to be recreated. Starting with a new filesystem requires running 0. ./testdata/bin/kill-all.sh if an old cluster is running 1. create-test-configuration.sh. Add -create_metastore -create_ranger_policy_db for first run. 2. ./testdata/bin/run-all.sh 3. start-impala-cluster.py or "buildall.sh -noclean -start_minicluster -start_impala_cluster". Add "-format" for the first run to create HMS and Ranger DBs. IMPALA_CLUSTER_LOGS_DIR is shared for all clusters. Symlinks to the minicluster are recreated by create-test-configuration.sh. Change-Id: I8c89156fd1cefbb752fee3070e10bb08fbf80e07 Reviewed-on: http://gerrit.cloudera.org:8080/19841 Reviewed-by: Michael Smith <[email protected]> Tested-by: Michael Smith <[email protected]> --- bin/impala-config.sh | 20 +++++++++++++++----- testdata/cluster/.gitignore | 1 + testdata/cluster/admin | 9 +++++---- 3 files changed, 21 insertions(+), 9 deletions(-) diff --git a/bin/impala-config.sh b/bin/impala-config.sh index 06b1caa3f..88ac95d8d 100755 --- a/bin/impala-config.sh +++ b/bin/impala-config.sh @@ -596,15 +596,25 @@ export EXTERNAL_LISTEN_HOST="${EXTERNAL_LISTEN_HOST-0.0.0.0}" export DEFAULT_FS="${DEFAULT_FS-hdfs://${INTERNAL_LISTEN_HOST}:20500}" export WAREHOUSE_LOCATION_PREFIX="${WAREHOUSE_LOCATION_PREFIX-}" export LOCAL_FS="file:${WAREHOUSE_LOCATION_PREFIX}" -export IMPALA_CLUSTER_NODES_DIR="${IMPALA_CLUSTER_NODES_DIR-$IMPALA_HOME/testdata/cluster/cdh$CDH_MAJOR_VERSION}" +# Use different node directories for each filesystem so we don't need to recreate them +# from scratch when switching. +UNIQUE_FS_LABEL= +if [[ "${TARGET_FILESYSTEM}" != "hdfs" ]]; then + UNIQUE_FS_LABEL="${UNIQUE_FS_LABEL}-${TARGET_FILESYSTEM}" +fi +if [[ "${ERASURE_CODING}" = true ]]; then + UNIQUE_FS_LABEL="${UNIQUE_FS_LABEL}-ec" +fi +DEFAULT_NODES_DIR="$IMPALA_HOME/testdata/cluster/cdh$CDH_MAJOR_VERSION$UNIQUE_FS_LABEL" +export IMPALA_CLUSTER_NODES_DIR="${IMPALA_CLUSTER_NODES_DIR-$DEFAULT_NODES_DIR}" -ESCAPED_IMPALA_HOME=$(sed "s/[^0-9a-zA-Z]/_/g" <<< "$IMPALA_HOME") +ESCAPED_DB_UID=$(sed "s/[^0-9a-zA-Z]/_/g" <<< "$UNIQUE_FS_LABEL$IMPALA_HOME") if $USE_APACHE_HIVE; then export HIVE_HOME="$APACHE_COMPONENTS_HOME/apache-hive-${IMPALA_HIVE_VERSION}-bin" export HIVE_SRC_DIR="$APACHE_COMPONENTS_HOME/apache-hive-${IMPALA_HIVE_VERSION}-src" # if apache hive is being used change the metastore db name, so we don't have to # format the metastore db everytime we switch between hive versions - export METASTORE_DB=${METASTORE_DB-"$(cut -c-59 <<< HMS$ESCAPED_IMPALA_HOME)_apache"} + export METASTORE_DB=${METASTORE_DB-"$(cut -c-59 <<< HMS$ESCAPED_DB_UID)_apache"} else export HIVE_HOME=${HIVE_HOME_OVERRIDE:-\ "$CDP_COMPONENTS_HOME/apache-hive-${IMPALA_HIVE_VERSION}-bin"} @@ -612,7 +622,7 @@ else "${CDP_COMPONENTS_HOME}/hive-${IMPALA_HIVE_VERSION}"} # Previously, there were multiple configurations and the "_cdp" included below # allowed the two to be distinct. We keep this "_cdp" for historical reasons. - export METASTORE_DB=${METASTORE_DB-"$(cut -c-59 <<< HMS$ESCAPED_IMPALA_HOME)_cdp"} + export METASTORE_DB=${METASTORE_DB-"$(cut -c-59 <<< HMS$ESCAPED_DB_UID)_cdp"} fi # Set the path to the hive_metastore.thrift which is used to build thrift code export HIVE_METASTORE_THRIFT_DIR=${HIVE_METASTORE_THRIFT_DIR_OVERRIDE:-\ @@ -627,7 +637,7 @@ fi # Set the Hive binaries in the path export PATH="$HIVE_HOME/bin:$HBASE_HOME/bin:$OZONE_HOME/bin:$PATH" -RANGER_POLICY_DB=${RANGER_POLICY_DB-$(cut -c-63 <<< ranger$ESCAPED_IMPALA_HOME)} +RANGER_POLICY_DB=${RANGER_POLICY_DB-$(cut -c-63 <<< ranger$ESCAPED_DB_UID)} # The DB script in Ranger expects the database name to be in lower case. export RANGER_POLICY_DB=$(echo ${RANGER_POLICY_DB} | tr '[:upper:]' '[:lower:]') diff --git a/testdata/cluster/.gitignore b/testdata/cluster/.gitignore index 0c35f61e8..af83e70da 100644 --- a/testdata/cluster/.gitignore +++ b/testdata/cluster/.gitignore @@ -2,6 +2,7 @@ /cdh5 /cdh6 /cdh7 +/cdh7-* /ranger/setup/impala_user.json /ranger/setup/impala_group.json /ranger/setup/impala_group_owner.json diff --git a/testdata/cluster/admin b/testdata/cluster/admin index af44ed565..8766a89b1 100755 --- a/testdata/cluster/admin +++ b/testdata/cluster/admin @@ -231,11 +231,12 @@ function create_cluster { # Add some easy access links closer to IMPALA_HOME EASY_ACCESS_LOG_LINK="$EASY_ACCESS_LOG_DIR/cdh$CDH_MAJOR_VERSION-$NODE" - if [[ ! -e "$EASY_ACCESS_LOG_LINK" ]]; then - mkdir -p "$EASY_ACCESS_LOG_DIR" - ln -s "$NODE_DIR/var/log" "$EASY_ACCESS_LOG_DIR" - mv "$IMPALA_CLUSTER_LOGS_DIR/log" "$EASY_ACCESS_LOG_LINK" + if [[ -e "$EASY_ACCESS_LOG_LINK" ]]; then + rm "${EASY_ACCESS_LOG_LINK}" fi + mkdir -p "$EASY_ACCESS_LOG_DIR" + ln -s "$NODE_DIR/var/log" "$EASY_ACCESS_LOG_DIR" + mv "$EASY_ACCESS_LOG_DIR/log" "$EASY_ACCESS_LOG_LINK" # Template population DATANODE_PORT=$((DATANODE_FREE_PORT_START++))
