anujmodi2021 commented on code in PR #7564: URL: https://github.com/apache/hadoop/pull/7564#discussion_r2026199497
########## hadoop-tools/hadoop-azure/dev-support/testrun-scripts/config_support.sh: ########## @@ -0,0 +1,140 @@ +#!/usr/bin/env bash + +set -eo pipefail + +FILE=$1 + +if [ ! -f "$FILE" ]; then + echo "Error: File '$FILE' not found. Exiting...." + exit 1 +fi + +if [[ "$1" != *.xml ]]; then + echo "The file provided is not an XML file. Exiting...." + exit 1 +fi + +OUTPUT_FILE="abfs-converted-config.xml" +cp "$FILE" "$OUTPUT_FILE" + +contactTeamMsg="For any queries or support, kindly reach out to us at 'aska...@microsoft.com'." +endpoint=".dfs." +printf "Select 'HNS' if you're migrating to ABFS driver with Hierarchical Namespace enabled account, + or 'Non-HNS' if you're migrating with Non-Hierarchical Namespace (FNS) account. \n" +printf "WARNING: Please ensure the correct option is chosen as it will affect the configuration changes made to the file. \n" +printf "If you are unsure, follow the instructions below to check from Azure Portal: \n" +printf "* Go to the Azure Portal and navigate to your storage account. \n" +printf "* In the left-hand menu, select 'Overview' section and look for 'Properties'. \n" +printf "* Under 'Blob service', check if 'Hierarchical namespace' is enabled or disabled. \n" +echo "$contactTeamMsg" +select namespaceType in "HNS" "NonHNS" +do + case $namespaceType in + HNS) + xmlstarlet ed -L -i '//configuration/property[1]' -t elem -n property -v '' \ + -s '//configuration/property[1]' -t elem -n name -v 'fs.azure.account.hns.enabled' \ + -s '//configuration/property[1]' -t elem -n value -v 'true' "$OUTPUT_FILE" + break; + ;; + NonHNS) + endpoint=".blob." + break; + ;; + *) + echo "Invalid selection. Please try again. Exiting..." + exit 1; + ;; + esac +done + +# Mapping for renaming configurations +declare -A rename_configs_map=( + ["autothrottling.enable"]="enable.autothrottling" #fs.azure.autothrottling.enable to fs.azure.enable.autothrottling + ["rename.dir"]="rename.key" # fs.azure.atomic.rename.dir to fs.azure.atomic.rename.key + ["block.blob.buffered.pread.disable"]="buffered.pread.disable" #fs.azure.block.blob.buffered.pread.disable to fs.azure.buffered.pread.disable + ["fs.azure.sas"]="fs.azure.sas.fixed.token." #fs.azure.sas.CONTAINER_NAME.ACCOUNT_NAME to fs.azure.sas.fixed.token.CONTAINER_NAME.ACCOUNT_NAME + ["check.block.md5"]="enable.checksum.validation" #fs.azure.check.block.md5 to fs.azure.enable.checksum.validation +) + +# Configs not supported in ABFS +unsupported_configs_list=( + "fs.azure.page.blob.dir" + "fs.azure.block.blob.with.compaction.dir" + "fs.azure.store.blob.md5" +) + +# Configurations not required in ABFS Driver and can be removed +obsolete_configs_list=( + "azure.authorization" #fs.azure.authorization, fs.azure.authorization.caching.enable , fs.azure.authorization.caching.maxentries, fs.azure.authorization.cacheentry.expiry.period, fs.azure.authorization.remote.service.urls + "azure.selfthrottling" #fs.azure.selfthrottling.enable, fs.azure.selfthrottling.read.factor, fs.azure.selfthrottling.write.factor + "azure.saskey" #fs.azure.saskey.cacheentry.expiry.period , fs.azure.saskey.usecontainersaskeyforallaccess + "copyblob.retry" #fs.azure.io.copyblob.retry.min.backoff.interval, fs.azure.io.copyblob.retry.max.backoff.interval, fs.azure.io.copyblob.retry.backoff.interval, fs.azure.io.copyblob.retry.max.retries + "service.urls" #fs.azure.cred.service.urls , fs.azure.delegation.token.service.urls, fs.azure.authorization.remote.service.urls + "blob.metadata.key.case.sensitive" #fs.azure.blob.metadata.key.case.sensitive + "cacheentry.expiry.period" #fs.azure.cacheentry.expiry.period + "chmod.allowed.userlist" #fs.azure.chmod.allowed.userlist + "chown.allowed.userlist" #fs.azure.chown.allowed.userlist + "daemon.userlist" #fs.azure.daemon.userlist + "delete.threads" #fs.azure.delete.threads + "enable.kerberos.support" #fs.azure.enable.kerberos.support + "flatlist.enable" #fs.azure.flatlist.enable + "fsck.temp.expiry.seconds" #fs.azure.fsck.temp.expiry.seconds + "local.sas.key.mode" #fs.azure.local.sas.key.mode + "override.canonical.service.name" #fs.azure.override.canonical.service.name + "permissions.supergroup" #fs.azure.permissions.supergroup + "rename.threads" #fs.azure.rename.threads + "secure.mode" #fs.azure.secure.mode + "skip.metrics" #fs.azure.skip.metrics + "storage.client.logging" #fs.azure.storage.client.logging + "storage.emulator.account.name" #fs.azure.storage.emulator.account.name + "storage.timeout" #fs.azure.storage.timeout +) + +# Stop the script if any unsupported config is found +for key in "${unsupported_configs_list[@]}"; do + if grep -q "$key" "$OUTPUT_FILE"; then + echo "FAILURE: Remove the following configuration from file and rerun: '$key'" Review Comment: Tell user why to remove? "Unsupported Config found" ########## hadoop-tools/hadoop-azure/dev-support/testrun-scripts/config_support.sh: ########## @@ -0,0 +1,140 @@ +#!/usr/bin/env bash + +set -eo pipefail + +FILE=$1 + +if [ ! -f "$FILE" ]; then + echo "Error: File '$FILE' not found. Exiting...." + exit 1 +fi + +if [[ "$1" != *.xml ]]; then + echo "The file provided is not an XML file. Exiting...." + exit 1 +fi + +OUTPUT_FILE="abfs-converted-config.xml" +cp "$FILE" "$OUTPUT_FILE" + +contactTeamMsg="For any queries or support, kindly reach out to us at 'aska...@microsoft.com'." +endpoint=".dfs." +printf "Select 'HNS' if you're migrating to ABFS driver with Hierarchical Namespace enabled account, + or 'Non-HNS' if you're migrating with Non-Hierarchical Namespace (FNS) account. \n" +printf "WARNING: Please ensure the correct option is chosen as it will affect the configuration changes made to the file. \n" +printf "If you are unsure, follow the instructions below to check from Azure Portal: \n" +printf "* Go to the Azure Portal and navigate to your storage account. \n" +printf "* In the left-hand menu, select 'Overview' section and look for 'Properties'. \n" +printf "* Under 'Blob service', check if 'Hierarchical namespace' is enabled or disabled. \n" +echo "$contactTeamMsg" +select namespaceType in "HNS" "NonHNS" +do + case $namespaceType in + HNS) + xmlstarlet ed -L -i '//configuration/property[1]' -t elem -n property -v '' \ + -s '//configuration/property[1]' -t elem -n name -v 'fs.azure.account.hns.enabled' \ + -s '//configuration/property[1]' -t elem -n value -v 'true' "$OUTPUT_FILE" + break; + ;; + NonHNS) + endpoint=".blob." + break; + ;; + *) + echo "Invalid selection. Please try again. Exiting..." + exit 1; + ;; + esac +done + +# Mapping for renaming configurations +declare -A rename_configs_map=( + ["autothrottling.enable"]="enable.autothrottling" #fs.azure.autothrottling.enable to fs.azure.enable.autothrottling + ["rename.dir"]="rename.key" # fs.azure.atomic.rename.dir to fs.azure.atomic.rename.key + ["block.blob.buffered.pread.disable"]="buffered.pread.disable" #fs.azure.block.blob.buffered.pread.disable to fs.azure.buffered.pread.disable + ["fs.azure.sas"]="fs.azure.sas.fixed.token." #fs.azure.sas.CONTAINER_NAME.ACCOUNT_NAME to fs.azure.sas.fixed.token.CONTAINER_NAME.ACCOUNT_NAME + ["check.block.md5"]="enable.checksum.validation" #fs.azure.check.block.md5 to fs.azure.enable.checksum.validation +) + +# Configs not supported in ABFS +unsupported_configs_list=( + "fs.azure.page.blob.dir" + "fs.azure.block.blob.with.compaction.dir" + "fs.azure.store.blob.md5" +) + +# Configurations not required in ABFS Driver and can be removed +obsolete_configs_list=( + "azure.authorization" #fs.azure.authorization, fs.azure.authorization.caching.enable , fs.azure.authorization.caching.maxentries, fs.azure.authorization.cacheentry.expiry.period, fs.azure.authorization.remote.service.urls + "azure.selfthrottling" #fs.azure.selfthrottling.enable, fs.azure.selfthrottling.read.factor, fs.azure.selfthrottling.write.factor + "azure.saskey" #fs.azure.saskey.cacheentry.expiry.period , fs.azure.saskey.usecontainersaskeyforallaccess + "copyblob.retry" #fs.azure.io.copyblob.retry.min.backoff.interval, fs.azure.io.copyblob.retry.max.backoff.interval, fs.azure.io.copyblob.retry.backoff.interval, fs.azure.io.copyblob.retry.max.retries + "service.urls" #fs.azure.cred.service.urls , fs.azure.delegation.token.service.urls, fs.azure.authorization.remote.service.urls + "blob.metadata.key.case.sensitive" #fs.azure.blob.metadata.key.case.sensitive + "cacheentry.expiry.period" #fs.azure.cacheentry.expiry.period + "chmod.allowed.userlist" #fs.azure.chmod.allowed.userlist + "chown.allowed.userlist" #fs.azure.chown.allowed.userlist + "daemon.userlist" #fs.azure.daemon.userlist + "delete.threads" #fs.azure.delete.threads + "enable.kerberos.support" #fs.azure.enable.kerberos.support + "flatlist.enable" #fs.azure.flatlist.enable + "fsck.temp.expiry.seconds" #fs.azure.fsck.temp.expiry.seconds + "local.sas.key.mode" #fs.azure.local.sas.key.mode + "override.canonical.service.name" #fs.azure.override.canonical.service.name + "permissions.supergroup" #fs.azure.permissions.supergroup + "rename.threads" #fs.azure.rename.threads + "secure.mode" #fs.azure.secure.mode + "skip.metrics" #fs.azure.skip.metrics + "storage.client.logging" #fs.azure.storage.client.logging + "storage.emulator.account.name" #fs.azure.storage.emulator.account.name + "storage.timeout" #fs.azure.storage.timeout +) + +# Stop the script if any unsupported config is found +for key in "${unsupported_configs_list[@]}"; do + if grep -q "$key" "$OUTPUT_FILE"; then + echo "FAILURE: Remove the following configuration from file and rerun: '$key'" + failure=true + fi +done + +if [ "$failure" = true ]; then + echo "$contactTeamMsg" + echo "Exiting..." + exit 1 +fi + +# Renaming the configs +for old in "${!rename_configs_map[@]}"; do + new="${rename_configs_map[$old]}" + xmlstarlet ed -L -u "//property/name[contains(., '$old')]" -x "concat(substring-before(., '$old'), + '$new', substring-after(., '$old'))" "$OUTPUT_FILE" +done + +# Remove the obsolete configs +for key in "${obsolete_configs_list[@]}"; do + xmlstarlet ed -L -d "//property[name[contains(text(), '$key')]]" "$OUTPUT_FILE" +done + +# Change the endpoints to DFS if migrating to HNS +if [ "$endpoint" = ".dfs." ]; then + xmlstarlet ed -L -u "//property/name[contains(., '.blob.')]" -x "concat(substring-before(., '.blob.'), + '$endpoint', substring-after(., '.blob.'))" "$OUTPUT_FILE" +fi + +# Change the value of fs.defaultFS +if xmlstarlet sel -t -v "//property[name='fs.defaultFS']/value" "$OUTPUT_FILE" | grep -q "."; then + if xmlstarlet sel -t -v "//property[name='fs.defaultFS']/value" "$OUTPUT_FILE" | grep -q ".blob."; then + xmlstarlet ed -L -u "//property[name='fs.defaultFS']/value" -x "concat('abfs', substring-before(substring-after(., 'wasb'), '@'), + '@', substring-before(substring-after(., '@'), '.blob.'), '$endpoint', 'core.windows.net')" "$OUTPUT_FILE" + else + echo "ERROR: 'fs.defaultFS' does not have 'Blob' as endpoint. Exiting..." + echo "$contactTeamMsg" + exit 1 + fi +fi + +# Remove the property block if any name tag is empty +xmlstarlet ed -L -d "//property[not(name) or name='']" "$OUTPUT_FILE" + +echo "Updated file: $OUTPUT_FILE" Review Comment: Is this script tested? ########## hadoop-tools/hadoop-azure/src/site/markdown/wasbToAbfsMigration.md: ########## @@ -0,0 +1,64 @@ +# WASB to ABFS Configuration Conversion Script + +To support customer onboard for migration from WASB to ABFS driver, we've +introduced a script to help with the configuration changes required +for the same. + +## Introduction + +ABFS driver has now built support for +FNS accounts (over BlobEndpoint that WASB Driver uses) using the ABFS scheme. Review Comment: Adda link here to fns_blob.md file ########## hadoop-tools/hadoop-azure/src/site/markdown/wasbToAbfsMigration.md: ########## @@ -0,0 +1,64 @@ +# WASB to ABFS Configuration Conversion Script + +To support customer onboard for migration from WASB to ABFS driver, we've +introduced a script to help with the configuration changes required +for the same. + +## Introduction + +ABFS driver has now built support for +FNS accounts (over BlobEndpoint that WASB Driver uses) using the ABFS scheme. + +The legacy WASB driver has been **deprecated** and is no longer recommended for Review Comment: Add a link to deprecated_wasb.md here. May be first documentation PR needs to be merged ########## hadoop-tools/hadoop-azure/dev-support/testrun-scripts/config_support.sh: ########## @@ -0,0 +1,140 @@ +#!/usr/bin/env bash + +set -eo pipefail + +FILE=$1 + +if [ ! -f "$FILE" ]; then + echo "Error: File '$FILE' not found. Exiting...." + exit 1 +fi + +if [[ "$1" != *.xml ]]; then + echo "The file provided is not an XML file. Exiting...." + exit 1 +fi + +OUTPUT_FILE="abfs-converted-config.xml" +cp "$FILE" "$OUTPUT_FILE" + +contactTeamMsg="For any queries or support, kindly reach out to us at 'aska...@microsoft.com'." +endpoint=".dfs." +printf "Select 'HNS' if you're migrating to ABFS driver with Hierarchical Namespace enabled account, + or 'Non-HNS' if you're migrating with Non-Hierarchical Namespace (FNS) account. \n" Review Comment: Nit: Statement construction should be same for both the options. `Select 'HNS' if you're migrating to ABFS driver for Hierarchical Namespace enabled account, or 'Non-HNS' if you're migrating to ABFS driver for Non-Hierarchical Namespace (FNS) account. \n` ########## hadoop-tools/hadoop-azure/dev-support/testrun-scripts/config_support.sh: ########## @@ -0,0 +1,140 @@ +#!/usr/bin/env bash + +set -eo pipefail + +FILE=$1 + +if [ ! -f "$FILE" ]; then + echo "Error: File '$FILE' not found. Exiting...." + exit 1 +fi + +if [[ "$1" != *.xml ]]; then + echo "The file provided is not an XML file. Exiting...." + exit 1 +fi + +OUTPUT_FILE="abfs-converted-config.xml" +cp "$FILE" "$OUTPUT_FILE" + +contactTeamMsg="For any queries or support, kindly reach out to us at 'aska...@microsoft.com'." +endpoint=".dfs." +printf "Select 'HNS' if you're migrating to ABFS driver with Hierarchical Namespace enabled account, + or 'Non-HNS' if you're migrating with Non-Hierarchical Namespace (FNS) account. \n" +printf "WARNING: Please ensure the correct option is chosen as it will affect the configuration changes made to the file. \n" +printf "If you are unsure, follow the instructions below to check from Azure Portal: \n" +printf "* Go to the Azure Portal and navigate to your storage account. \n" +printf "* In the left-hand menu, select 'Overview' section and look for 'Properties'. \n" +printf "* Under 'Blob service', check if 'Hierarchical namespace' is enabled or disabled. \n" +echo "$contactTeamMsg" +select namespaceType in "HNS" "NonHNS" +do + case $namespaceType in + HNS) + xmlstarlet ed -L -i '//configuration/property[1]' -t elem -n property -v '' \ + -s '//configuration/property[1]' -t elem -n name -v 'fs.azure.account.hns.enabled' \ + -s '//configuration/property[1]' -t elem -n value -v 'true' "$OUTPUT_FILE" + break; + ;; + NonHNS) + endpoint=".blob." + break; + ;; + *) + echo "Invalid selection. Please try again. Exiting..." + exit 1; + ;; + esac +done + +# Mapping for renaming configurations +declare -A rename_configs_map=( + ["autothrottling.enable"]="enable.autothrottling" #fs.azure.autothrottling.enable to fs.azure.enable.autothrottling + ["rename.dir"]="rename.key" # fs.azure.atomic.rename.dir to fs.azure.atomic.rename.key + ["block.blob.buffered.pread.disable"]="buffered.pread.disable" #fs.azure.block.blob.buffered.pread.disable to fs.azure.buffered.pread.disable + ["fs.azure.sas"]="fs.azure.sas.fixed.token." #fs.azure.sas.CONTAINER_NAME.ACCOUNT_NAME to fs.azure.sas.fixed.token.CONTAINER_NAME.ACCOUNT_NAME + ["check.block.md5"]="enable.checksum.validation" #fs.azure.check.block.md5 to fs.azure.enable.checksum.validation +) + +# Configs not supported in ABFS +unsupported_configs_list=( + "fs.azure.page.blob.dir" + "fs.azure.block.blob.with.compaction.dir" + "fs.azure.store.blob.md5" +) + +# Configurations not required in ABFS Driver and can be removed +obsolete_configs_list=( Review Comment: Where is the real mapping for supported configs defined? -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: common-issues-unsubscr...@hadoop.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: common-issues-unsubscr...@hadoop.apache.org For additional commands, e-mail: common-issues-h...@hadoop.apache.org