manika137 commented on code in PR #7564:
URL: https://github.com/apache/hadoop/pull/7564#discussion_r2028082288


##########
hadoop-tools/hadoop-azure/dev-support/testrun-scripts/config_support.sh:
##########
@@ -0,0 +1,140 @@
+#!/usr/bin/env bash
+
+set -eo pipefail
+
+FILE=$1
+
+if [ ! -f "$FILE" ]; then
+    echo "Error: File '$FILE' not found. Exiting...."
+    exit 1
+fi
+
+if [[ "$1" != *.xml ]]; then
+    echo "The file provided is not an XML file. Exiting...."
+    exit 1
+fi
+
+OUTPUT_FILE="abfs-converted-config.xml"
+cp "$FILE" "$OUTPUT_FILE"
+
+contactTeamMsg="For any queries or support, kindly reach out to us at 
'aska...@microsoft.com'."
+endpoint=".dfs."
+printf "Select 'HNS' if you're migrating to ABFS driver with Hierarchical 
Namespace enabled account,
+            or 'Non-HNS' if you're migrating with Non-Hierarchical Namespace 
(FNS) account. \n"
+printf "WARNING: Please ensure the correct option is chosen as it will affect 
the configuration changes made to the file. \n"
+printf "If you are unsure, follow the instructions below to check from Azure 
Portal: \n"
+printf "* Go to the Azure Portal and navigate to your storage account. \n"
+printf "* In the left-hand menu, select 'Overview' section and look for 
'Properties'. \n"
+printf "* Under 'Blob service', check if 'Hierarchical namespace' is enabled 
or disabled. \n"
+echo "$contactTeamMsg"
+select namespaceType in "HNS" "NonHNS"
+do
+    case $namespaceType in
+        HNS)
+            xmlstarlet ed -L -i '//configuration/property[1]' -t elem -n 
property -v '' \
+              -s '//configuration/property[1]' -t elem -n name -v 
'fs.azure.account.hns.enabled' \
+              -s '//configuration/property[1]' -t elem -n value -v 'true' 
"$OUTPUT_FILE"
+            break;
+            ;;
+        NonHNS)
+            endpoint=".blob."
+            break;
+            ;;
+          *)
+            echo "Invalid selection. Please try again. Exiting..."
+            exit 1;
+            ;;
+    esac
+done
+
+# Mapping for renaming configurations
+declare -A rename_configs_map=(
+    ["autothrottling.enable"]="enable.autothrottling" 
#fs.azure.autothrottling.enable  to fs.azure.enable.autothrottling
+    ["rename.dir"]="rename.key" # fs.azure.atomic.rename.dir to 
fs.azure.atomic.rename.key
+    ["block.blob.buffered.pread.disable"]="buffered.pread.disable" 
#fs.azure.block.blob.buffered.pread.disable to fs.azure.buffered.pread.disable
+    ["fs.azure.sas"]="fs.azure.sas.fixed.token." 
#fs.azure.sas.CONTAINER_NAME.ACCOUNT_NAME to 
fs.azure.sas.fixed.token.CONTAINER_NAME.ACCOUNT_NAME
+    ["check.block.md5"]="enable.checksum.validation" #fs.azure.check.block.md5 
to fs.azure.enable.checksum.validation
+)
+
+# Configs not supported in ABFS
+unsupported_configs_list=(
+    "fs.azure.page.blob.dir"
+    "fs.azure.block.blob.with.compaction.dir"
+    "fs.azure.store.blob.md5"
+)
+
+# Configurations not required in ABFS Driver and can be removed
+obsolete_configs_list=(
+    "azure.authorization" #fs.azure.authorization, 
fs.azure.authorization.caching.enable , 
fs.azure.authorization.caching.maxentries, 
fs.azure.authorization.cacheentry.expiry.period, 
fs.azure.authorization.remote.service.urls
+    "azure.selfthrottling" #fs.azure.selfthrottling.enable, 
fs.azure.selfthrottling.read.factor, fs.azure.selfthrottling.write.factor
+    "azure.saskey" #fs.azure.saskey.cacheentry.expiry.period , 
fs.azure.saskey.usecontainersaskeyforallaccess
+    "copyblob.retry" #fs.azure.io.copyblob.retry.min.backoff.interval, 
fs.azure.io.copyblob.retry.max.backoff.interval, 
fs.azure.io.copyblob.retry.backoff.interval, 
fs.azure.io.copyblob.retry.max.retries
+    "service.urls" #fs.azure.cred.service.urls , 
fs.azure.delegation.token.service.urls, 
fs.azure.authorization.remote.service.urls
+    "blob.metadata.key.case.sensitive" 
#fs.azure.blob.metadata.key.case.sensitive
+    "cacheentry.expiry.period" #fs.azure.cacheentry.expiry.period
+    "chmod.allowed.userlist" #fs.azure.chmod.allowed.userlist
+    "chown.allowed.userlist" #fs.azure.chown.allowed.userlist
+    "daemon.userlist" #fs.azure.daemon.userlist
+    "delete.threads" #fs.azure.delete.threads
+    "enable.kerberos.support" #fs.azure.enable.kerberos.support
+    "flatlist.enable" #fs.azure.flatlist.enable
+    "fsck.temp.expiry.seconds" #fs.azure.fsck.temp.expiry.seconds
+    "local.sas.key.mode" #fs.azure.local.sas.key.mode
+    "override.canonical.service.name" #fs.azure.override.canonical.service.name
+    "permissions.supergroup" #fs.azure.permissions.supergroup
+    "rename.threads" #fs.azure.rename.threads
+    "secure.mode" #fs.azure.secure.mode
+    "skip.metrics" #fs.azure.skip.metrics
+    "storage.client.logging" #fs.azure.storage.client.logging
+    "storage.emulator.account.name" #fs.azure.storage.emulator.account.name
+    "storage.timeout" #fs.azure.storage.timeout
+)
+
+# Stop the script if any unsupported config is found
+for key in "${unsupported_configs_list[@]}"; do
+    if grep -q "$key" "$OUTPUT_FILE"; then
+        echo "FAILURE: Remove the following configuration from file and rerun: 
'$key'"
+        failure=true
+    fi
+done
+
+if [ "$failure" = true ]; then
+    echo "$contactTeamMsg"
+    echo "Exiting..."
+    exit 1
+fi
+
+# Renaming the configs
+for old in "${!rename_configs_map[@]}"; do
+    new="${rename_configs_map[$old]}"
+    xmlstarlet ed -L -u "//property/name[contains(., '$old')]" -x 
"concat(substring-before(., '$old'),
+     '$new', substring-after(., '$old'))" "$OUTPUT_FILE"
+done
+
+# Remove the obsolete configs
+for key in "${obsolete_configs_list[@]}"; do
+    xmlstarlet ed -L -d "//property[name[contains(text(), '$key')]]" 
"$OUTPUT_FILE"
+done
+
+# Change the endpoints to DFS if migrating to HNS
+if [ "$endpoint" = ".dfs." ]; then
+    xmlstarlet ed -L -u "//property/name[contains(., '.blob.')]" -x 
"concat(substring-before(., '.blob.'),
+     '$endpoint', substring-after(., '.blob.'))" "$OUTPUT_FILE"
+fi
+
+# Change the value of fs.defaultFS
+if xmlstarlet sel -t -v "//property[name='fs.defaultFS']/value" "$OUTPUT_FILE" 
| grep -q "."; then
+    if xmlstarlet sel -t -v "//property[name='fs.defaultFS']/value" 
"$OUTPUT_FILE" | grep -q ".blob."; then
+        xmlstarlet ed -L -u "//property[name='fs.defaultFS']/value" -x 
"concat('abfs', substring-before(substring-after(., 'wasb'), '@'),
+         '@', substring-before(substring-after(., '@'), '.blob.'), 
'$endpoint', 'core.windows.net')" "$OUTPUT_FILE"
+    else
+        echo "ERROR: 'fs.defaultFS' does not have 'Blob' as endpoint. 
Exiting..."
+        echo "$contactTeamMsg"
+        exit 1
+    fi
+fi
+
+# Remove the property block if any name tag is empty
+xmlstarlet ed -L -d "//property[not(name) or name='']" "$OUTPUT_FILE"
+
+echo "Updated file: $OUTPUT_FILE"

Review Comment:
   Yes, tested with a sample config file to test the correct working and that 
the config values stay intact
   



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: common-issues-unsubscr...@hadoop.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: common-issues-unsubscr...@hadoop.apache.org
For additional commands, e-mail: common-issues-h...@hadoop.apache.org

Reply via email to