This is an automated email from the ASF dual-hosted git repository.

zuston pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-uniffle.git


The following commit(s) were added to refs/heads/master by this push:
     new a9cf2c32b [#1369] feat: Provide distribution with Hadoop dependencies 
(#1379)
a9cf2c32b is described below

commit a9cf2c32b0401276a777fad09f96b9ca8c759975
Author: Junfan Zhang <[email protected]>
AuthorDate: Wed Dec 20 09:46:43 2023 +0800

    [#1369] feat: Provide distribution with Hadoop dependencies (#1379)
    
    ### What changes were proposed in this pull request?
    
    provide distribution jars with hadoop
    
    ### Why are the changes needed?
    
    For: #1369
    
    1. For `MEMORY_LOCALFILE` mode, there is no need to set HADOOP_HOME, which 
is hard for some users to set up this environment.
    2. Using the embedded hadoop jars, it could has the higher version of 
hadoop to use the latest features.
    
    ### Does this PR introduce _any_ user-facing change?
    
    Yes
    
    ### How was this patch tested?
    
    Internal tests.
---
 README.md                   |  6 ++++++
 bin/start-coordinator.sh    | 16 ++++++++++++----
 bin/start-shuffle-server.sh | 16 ++++++++++++----
 bin/utils.sh                | 22 +++++++++++-----------
 pom.xml                     |  6 ++++++
 5 files changed, 47 insertions(+), 19 deletions(-)

diff --git a/README.md b/README.md
index 6fb4a6d83..c3ee24cba 100644
--- a/README.md
+++ b/README.md
@@ -134,10 +134,16 @@ Package will build against Hadoop 2.8.5 in default. If 
you want to build package
 
     ./build_distribution.sh --hadoop-profile 'hadoop3.2'
 
+Package with hadoop jars, If you want to build package against Hadoop 3.2.1, 
run:
+
+    ./build_distribution.sh --hadoop-profile 'hadoop3.2' 
-Phadoop-dependencies-included
+
 rss-xxx.tgz will be generated for deployment
 
 ## Deploy
 
+If you have packaged tgz with hadoop jars, the env of `HADOOP_HOME` is needn't 
specified in `rss-env.sh`.
+
 ### Deploy Coordinator
 
 1. unzip package to RSS_HOME
diff --git a/bin/start-coordinator.sh b/bin/start-coordinator.sh
index 142f8d6ad..3e320134a 100755
--- a/bin/start-coordinator.sh
+++ b/bin/start-coordinator.sh
@@ -34,12 +34,11 @@ OUT_PATH="${RSS_LOG_DIR}/coordinator.out"
 
 MAIN_CLASS="org.apache.uniffle.coordinator.CoordinatorServer"
 
-HADOOP_DEPENDENCY="$("$HADOOP_HOME/bin/hadoop" classpath --glob)"
-
 echo "Check process existence"
 is_jvm_process_running "$JPS" $MAIN_CLASS
 
 CLASSPATH=""
+JAVA_LIB_PATH=""
 
 for file in $(ls ${JAR_DIR}/coordinator/*.jar 2>/dev/null); do
   CLASSPATH=$CLASSPATH:$file
@@ -48,8 +47,17 @@ done
 mkdir -p "${RSS_LOG_DIR}"
 mkdir -p "${RSS_PID_DIR}"
 
-CLASSPATH=$CLASSPATH:$HADOOP_CONF_DIR:$HADOOP_DEPENDENCY
-JAVA_LIB_PATH="-Djava.library.path=$HADOOP_HOME/lib/native"
+set +u
+if [ $HADOOP_HOME ]; then
+  HADOOP_DEPENDENCY="$("$HADOOP_HOME/bin/hadoop" classpath --glob)"
+  CLASSPATH=$CLASSPATH:$HADOOP_DEPENDENCY
+  JAVA_LIB_PATH="-Djava.library.path=$HADOOP_HOME/lib/native"
+fi
+
+if [ $HADOOP_CONF_DIR ]; then
+  CLASSPATH=$CLASSPATH:$HADOOP_CONF_DIR
+fi
+set -u
 
 echo "class path is $CLASSPATH"
 
diff --git a/bin/start-shuffle-server.sh b/bin/start-shuffle-server.sh
index 7568bfe42..5fb85a885 100755
--- a/bin/start-shuffle-server.sh
+++ b/bin/start-shuffle-server.sh
@@ -51,14 +51,13 @@ export MALLOC_ARENA_MAX=${MALLOC_ARENA_MAX:-4}
 
 MAIN_CLASS="org.apache.uniffle.server.ShuffleServer"
 
-HADOOP_DEPENDENCY="$("$HADOOP_HOME/bin/hadoop" classpath --glob)"
-
 echo "Check process existence"
 RPC_PORT=`grep '^rss.rpc.server.port' $SHUFFLE_SERVER_CONF_FILE |awk '{print 
$2}'`
 is_port_in_use $RPC_PORT
 
 
 CLASSPATH=""
+JAVA_LIB_PATH=""
 
 for file in $(ls ${JAR_DIR}/server/*.jar 2>/dev/null); do
   CLASSPATH=$CLASSPATH:$file
@@ -67,8 +66,17 @@ done
 mkdir -p "${RSS_LOG_DIR}"
 mkdir -p "${RSS_PID_DIR}"
 
-CLASSPATH=$CLASSPATH:$HADOOP_CONF_DIR:$HADOOP_DEPENDENCY
-JAVA_LIB_PATH="-Djava.library.path=$HADOOP_HOME/lib/native"
+set +u
+if [ $HADOOP_HOME ]; then
+  HADOOP_DEPENDENCY="$("$HADOOP_HOME/bin/hadoop" classpath --glob)"
+  CLASSPATH=$CLASSPATH:$HADOOP_DEPENDENCY
+  JAVA_LIB_PATH="-Djava.library.path=$HADOOP_HOME/lib/native"
+fi
+
+if [ "$HADOOP_CONF_DIR" ]; then
+  CLASSPATH=$CLASSPATH:$HADOOP_CONF_DIR
+fi
+set -u
 
 echo "class path is $CLASSPATH"
 
diff --git a/bin/utils.sh b/bin/utils.sh
index 809ee40c0..f30df831f 100644
--- a/bin/utils.sh
+++ b/bin/utils.sh
@@ -177,14 +177,6 @@ function load_rss_env {
     echo "No env JAVA_HOME."
     exit 1
   fi
-  if [[ -z "$HADOOP_HOME" ]]; then
-    if [[ $is_dashboard -eq 1 ]]; then
-      echo "Dashboard need not HADOOP_HOME."
-    else
-      echo "No env HADOOP_HOME."
-      exit 1
-    fi
-  fi
 
   # export default value
   set +o nounset
@@ -194,7 +186,7 @@ function load_rss_env {
   if [ -z "$RSS_CONF_DIR" ]; then
     RSS_CONF_DIR="${RSS_HOME}/conf"
   fi
-  if [ -z "$HADOOP_CONF_DIR" ]; then
+  if [ -z "$HADOOP_CONF_DIR" ] && [ "$HADOOP_HOME" ]; then
     HADOOP_CONF_DIR="${HADOOP_HOME}/etc/hadoop"
   fi
   if [ -z "$RSS_LOG_DIR" ]; then
@@ -213,10 +205,18 @@ function load_rss_env {
   # If UNIFFLE_SHELL_SCRIPT_DEBUG is false, we do not print Env information.
   if [[ "${UNIFFLE_SHELL_SCRIPT_DEBUG}" = true ]]; then
     echo "Using Java from ${JAVA_HOME}"
-    echo "Using Hadoop from ${HADOOP_HOME}"
     echo "Using RSS from ${RSS_HOME}"
     echo "Using RSS conf from ${RSS_CONF_DIR}"
-    echo "Using Hadoop conf from ${HADOOP_CONF_DIR}"
+
+    set +u
+    if [ $HADOOP_HOME ]; then
+      echo "Using Hadoop from ${HADOOP_HOME}"
+    fi
+    if [ $HADOOP_CONF_DIR ]; then
+      echo "Using Hadoop conf from ${HADOOP_CONF_DIR}"
+    fi
+    set -u
+
     echo "Write log file to ${RSS_LOG_DIR}"
     echo "Write pid file to ${RSS_PID_DIR}"
   fi
diff --git a/pom.xml b/pom.xml
index ddcf695b1..d34f81b15 100644
--- a/pom.xml
+++ b/pom.xml
@@ -2174,6 +2174,12 @@
         <module>deploy/kubernetes</module>
       </modules>
     </profile>
+    <profile>
+      <id>hadoop-dependencies-included</id>
+      <properties>
+        <hadoop.scope>compile</hadoop.scope>
+      </properties>
+    </profile>
     <profile>
       <id>hadoop2.8</id>
       <properties>

Reply via email to