This is an automated email from the ASF dual-hosted git repository.
zuston pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-uniffle.git
The following commit(s) were added to refs/heads/master by this push:
new a9cf2c32b [#1369] feat: Provide distribution with Hadoop dependencies
(#1379)
a9cf2c32b is described below
commit a9cf2c32b0401276a777fad09f96b9ca8c759975
Author: Junfan Zhang <[email protected]>
AuthorDate: Wed Dec 20 09:46:43 2023 +0800
[#1369] feat: Provide distribution with Hadoop dependencies (#1379)
### What changes were proposed in this pull request?
provide distribution jars with hadoop
### Why are the changes needed?
For: #1369
1. For `MEMORY_LOCALFILE` mode, there is no need to set HADOOP_HOME, which
is hard for some users to set up this environment.
2. Using the embedded hadoop jars, it could has the higher version of
hadoop to use the latest features.
### Does this PR introduce _any_ user-facing change?
Yes
### How was this patch tested?
Internal tests.
---
README.md | 6 ++++++
bin/start-coordinator.sh | 16 ++++++++++++----
bin/start-shuffle-server.sh | 16 ++++++++++++----
bin/utils.sh | 22 +++++++++++-----------
pom.xml | 6 ++++++
5 files changed, 47 insertions(+), 19 deletions(-)
diff --git a/README.md b/README.md
index 6fb4a6d83..c3ee24cba 100644
--- a/README.md
+++ b/README.md
@@ -134,10 +134,16 @@ Package will build against Hadoop 2.8.5 in default. If
you want to build package
./build_distribution.sh --hadoop-profile 'hadoop3.2'
+Package with hadoop jars, If you want to build package against Hadoop 3.2.1,
run:
+
+ ./build_distribution.sh --hadoop-profile 'hadoop3.2'
-Phadoop-dependencies-included
+
rss-xxx.tgz will be generated for deployment
## Deploy
+If you have packaged tgz with hadoop jars, the env of `HADOOP_HOME` is needn't
specified in `rss-env.sh`.
+
### Deploy Coordinator
1. unzip package to RSS_HOME
diff --git a/bin/start-coordinator.sh b/bin/start-coordinator.sh
index 142f8d6ad..3e320134a 100755
--- a/bin/start-coordinator.sh
+++ b/bin/start-coordinator.sh
@@ -34,12 +34,11 @@ OUT_PATH="${RSS_LOG_DIR}/coordinator.out"
MAIN_CLASS="org.apache.uniffle.coordinator.CoordinatorServer"
-HADOOP_DEPENDENCY="$("$HADOOP_HOME/bin/hadoop" classpath --glob)"
-
echo "Check process existence"
is_jvm_process_running "$JPS" $MAIN_CLASS
CLASSPATH=""
+JAVA_LIB_PATH=""
for file in $(ls ${JAR_DIR}/coordinator/*.jar 2>/dev/null); do
CLASSPATH=$CLASSPATH:$file
@@ -48,8 +47,17 @@ done
mkdir -p "${RSS_LOG_DIR}"
mkdir -p "${RSS_PID_DIR}"
-CLASSPATH=$CLASSPATH:$HADOOP_CONF_DIR:$HADOOP_DEPENDENCY
-JAVA_LIB_PATH="-Djava.library.path=$HADOOP_HOME/lib/native"
+set +u
+if [ $HADOOP_HOME ]; then
+ HADOOP_DEPENDENCY="$("$HADOOP_HOME/bin/hadoop" classpath --glob)"
+ CLASSPATH=$CLASSPATH:$HADOOP_DEPENDENCY
+ JAVA_LIB_PATH="-Djava.library.path=$HADOOP_HOME/lib/native"
+fi
+
+if [ $HADOOP_CONF_DIR ]; then
+ CLASSPATH=$CLASSPATH:$HADOOP_CONF_DIR
+fi
+set -u
echo "class path is $CLASSPATH"
diff --git a/bin/start-shuffle-server.sh b/bin/start-shuffle-server.sh
index 7568bfe42..5fb85a885 100755
--- a/bin/start-shuffle-server.sh
+++ b/bin/start-shuffle-server.sh
@@ -51,14 +51,13 @@ export MALLOC_ARENA_MAX=${MALLOC_ARENA_MAX:-4}
MAIN_CLASS="org.apache.uniffle.server.ShuffleServer"
-HADOOP_DEPENDENCY="$("$HADOOP_HOME/bin/hadoop" classpath --glob)"
-
echo "Check process existence"
RPC_PORT=`grep '^rss.rpc.server.port' $SHUFFLE_SERVER_CONF_FILE |awk '{print
$2}'`
is_port_in_use $RPC_PORT
CLASSPATH=""
+JAVA_LIB_PATH=""
for file in $(ls ${JAR_DIR}/server/*.jar 2>/dev/null); do
CLASSPATH=$CLASSPATH:$file
@@ -67,8 +66,17 @@ done
mkdir -p "${RSS_LOG_DIR}"
mkdir -p "${RSS_PID_DIR}"
-CLASSPATH=$CLASSPATH:$HADOOP_CONF_DIR:$HADOOP_DEPENDENCY
-JAVA_LIB_PATH="-Djava.library.path=$HADOOP_HOME/lib/native"
+set +u
+if [ $HADOOP_HOME ]; then
+ HADOOP_DEPENDENCY="$("$HADOOP_HOME/bin/hadoop" classpath --glob)"
+ CLASSPATH=$CLASSPATH:$HADOOP_DEPENDENCY
+ JAVA_LIB_PATH="-Djava.library.path=$HADOOP_HOME/lib/native"
+fi
+
+if [ "$HADOOP_CONF_DIR" ]; then
+ CLASSPATH=$CLASSPATH:$HADOOP_CONF_DIR
+fi
+set -u
echo "class path is $CLASSPATH"
diff --git a/bin/utils.sh b/bin/utils.sh
index 809ee40c0..f30df831f 100644
--- a/bin/utils.sh
+++ b/bin/utils.sh
@@ -177,14 +177,6 @@ function load_rss_env {
echo "No env JAVA_HOME."
exit 1
fi
- if [[ -z "$HADOOP_HOME" ]]; then
- if [[ $is_dashboard -eq 1 ]]; then
- echo "Dashboard need not HADOOP_HOME."
- else
- echo "No env HADOOP_HOME."
- exit 1
- fi
- fi
# export default value
set +o nounset
@@ -194,7 +186,7 @@ function load_rss_env {
if [ -z "$RSS_CONF_DIR" ]; then
RSS_CONF_DIR="${RSS_HOME}/conf"
fi
- if [ -z "$HADOOP_CONF_DIR" ]; then
+ if [ -z "$HADOOP_CONF_DIR" ] && [ "$HADOOP_HOME" ]; then
HADOOP_CONF_DIR="${HADOOP_HOME}/etc/hadoop"
fi
if [ -z "$RSS_LOG_DIR" ]; then
@@ -213,10 +205,18 @@ function load_rss_env {
# If UNIFFLE_SHELL_SCRIPT_DEBUG is false, we do not print Env information.
if [[ "${UNIFFLE_SHELL_SCRIPT_DEBUG}" = true ]]; then
echo "Using Java from ${JAVA_HOME}"
- echo "Using Hadoop from ${HADOOP_HOME}"
echo "Using RSS from ${RSS_HOME}"
echo "Using RSS conf from ${RSS_CONF_DIR}"
- echo "Using Hadoop conf from ${HADOOP_CONF_DIR}"
+
+ set +u
+ if [ $HADOOP_HOME ]; then
+ echo "Using Hadoop from ${HADOOP_HOME}"
+ fi
+ if [ $HADOOP_CONF_DIR ]; then
+ echo "Using Hadoop conf from ${HADOOP_CONF_DIR}"
+ fi
+ set -u
+
echo "Write log file to ${RSS_LOG_DIR}"
echo "Write pid file to ${RSS_PID_DIR}"
fi
diff --git a/pom.xml b/pom.xml
index ddcf695b1..d34f81b15 100644
--- a/pom.xml
+++ b/pom.xml
@@ -2174,6 +2174,12 @@
<module>deploy/kubernetes</module>
</modules>
</profile>
+ <profile>
+ <id>hadoop-dependencies-included</id>
+ <properties>
+ <hadoop.scope>compile</hadoop.scope>
+ </properties>
+ </profile>
<profile>
<id>hadoop2.8</id>
<properties>