Hello Ottomata,

I'd like you to do a code review.  Please visit

    https://gerrit.wikimedia.org/r/177716

to review the following change.

Change subject: Drop jars that are not on all worker nodes from Oozie's Hive 
config
......................................................................

Drop jars that are not on all worker nodes from Oozie's Hive config

Stat1002 includes

  file:///srv/deployment/analytics/refinery/artifacts/refinery-hive.jar

in its hive-site.xml's hive.aux.jars.path. But we cannot use such a
config for Oozie's Hive config, as worker nodes do not have this
jar. Hence, Oozie jobs would fail.

So we strip such jars from Oozie's Hive config upon deploying
Refinery.

Bug: T76806
Change-Id: Ic24607e98457c9243064a2cec12165c2a36d1230
---
M bin/refinery-deploy-to-hdfs
1 file changed, 32 insertions(+), 4 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/analytics/refinery 
refs/changes/16/177716/1

diff --git a/bin/refinery-deploy-to-hdfs b/bin/refinery-deploy-to-hdfs
index b5ce60c..6120f7b 100755
--- a/bin/refinery-deploy-to-hdfs
+++ b/bin/refinery-deploy-to-hdfs
@@ -86,17 +86,26 @@
 
 run_hdfs() {
     local SKIP_RUNNING_COMMAND="$DRY_RUN"
+    local LOG_INVOCATION="yes"
+    if [ "$1" = "--without-log" ]
+    then
+        shift
+        LOG_INVOCATION="no"
+    fi
     if [ "$1" = "--run-also-for-dry-run" ]
     then
         shift
         SKIP_RUNNING_COMMAND="no"
     fi
     local CMD=( hdfs "$@" )
-    if [ "$DRY_RUN" = "no" ]
+    if [ "$LOG_INVOCATION" = "yes" ]
     then
-        verbose_log "${CMD[@]}"
-    else
-        log "Dry run:" "${CMD[@]}"
+        if [ "$DRY_RUN" = "no" ]
+        then
+            verbose_log "${CMD[@]}"
+        else
+            log "Dry run:" "${CMD[@]}"
+        fi
     fi
     if [ "$SKIP_RUNNING_COMMAND" = "no" ]
     then
@@ -183,6 +192,23 @@
         error "Oozie's hive config at '$OOZIES_HIVE_CONFIG_FILE_RELR' matches \
 password. Aborting to avoid accidentally deploying passwords."
     fi
+}
+
+monkey_patch_oozies_hive_config() {
+    local TARGET_HDFSFILE_ABS="$1"
+    shift
+
+    # We strip out references to
+    #   file:///srv/deployment/
+    # as they won't be around on all worker nodes. Currently, this
+    # addresses removing refinery-hive.jar from the plain filesystem
+    # location. When not removing it, Oozie's Hive jobs would try to
+    # pick it up, and consequently fail on the worker node, as they do
+    # not have that jar.
+    run_hdfs dfs -put -f <( \
+        run_hdfs --without-log dfs -cat "$TARGET_HDFSFILE_ABS" \
+        | sed -e 's@\(\(>\)\|,\)file:///srv/deployment/[^<,]*\([<,]\)@\2\3@'
+    ) "$TARGET_HDFSFILE_ABS"
 }
 
 describe_deployment() {
@@ -275,6 +301,8 @@
 # Note that we only copy *, and not dot-files, hence excluding '.git'.
 run_hdfs dfs -put -f * "$VERSIONED_TMP_TARGET_HDFSDIR_ABS"
 
+monkey_patch_oozies_hive_config 
"$VERSIONED_TMP_TARGET_HDFSDIR_ABS/$OOZIES_HIVE_CONFIG_FILE_RELR"
+
 describe_deployment "$VERSIONED_TMP_TARGET_HDFSDIR_ABS/.deployment" "$@"
 
 # Bringing the temporary directory into place

-- 
To view, visit https://gerrit.wikimedia.org/r/177716
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: Ic24607e98457c9243064a2cec12165c2a36d1230
Gerrit-PatchSet: 1
Gerrit-Project: analytics/refinery
Gerrit-Branch: master
Gerrit-Owner: QChris <[email protected]>
Gerrit-Reviewer: Ottomata <[email protected]>

_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to