This is an automated email from the ASF dual-hosted git repository.
abstractdog pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/tez.git
The following commit(s) were added to refs/heads/master by this push:
new b0a65ecee TEZ-4631: Include an official script that installs hadoop
and tez and runs a simple example DAG (#414) - addendum ASF license +
shellcheck fixes (#417) (Laszlo Bodor reviewed by Ayush Saxena)
b0a65ecee is described below
commit b0a65ecee15bc5c0bf14479b2b3760aafb23f8d8
Author: Bodor Laszlo <[email protected]>
AuthorDate: Wed Jun 4 07:35:43 2025 +0200
TEZ-4631: Include an official script that installs hadoop and tez and runs
a simple example DAG (#414) - addendum ASF license + shellcheck fixes (#417)
(Laszlo Bodor reviewed by Ayush Saxena)
---
dev-support/bin/tez_run_example.sh | 77 +++++++++++++++++++++++---------------
1 file changed, 46 insertions(+), 31 deletions(-)
diff --git a/dev-support/bin/tez_run_example.sh
b/dev-support/bin/tez_run_example.sh
index 7e980b948..144b6485c 100755
--- a/dev-support/bin/tez_run_example.sh
+++ b/dev-support/bin/tez_run_example.sh
@@ -1,3 +1,19 @@
+#!/bin/bash
+
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
# This script is used to set up a local Hadoop and Tez environment for running
a simple word count example.
# Prerequisites
@@ -10,19 +26,19 @@
# TEZ_EXAMPLE_WORKING_DIR: defaults to the current working directory
# TEZ_VERSION comes from environment variable or is fetched from the Apache
Tez download page
-export TEZ_VERSION=${TEZ_VERSION:=$(curl -s
"https://downloads.apache.org/tez/" | grep --color=never -o
'[0-9]\+\.[0-9]\+\.[0-9]\+' | sed -n '/\/$/!p' | sort -V | tail -1)} # e.g.
0.10.4
-export TEZ_EXAMPLE_WORKING_DIR=${TEZ_EXAMPLE_WORKING_DIR:=$PWD}
-cd $TEZ_EXAMPLE_WORKING_DIR
+export TEZ_VERSION="${TEZ_VERSION:-$(curl -s
"https://downloads.apache.org/tez/" | grep --color=never -o
'[0-9]\+\.[0-9]\+\.[0-9]\+' | sed -n '/\/$/!p' | sort -V | tail -1)}" # e.g.
0.10.4
+export TEZ_EXAMPLE_WORKING_DIR="${TEZ_EXAMPLE_WORKING_DIR:-$PWD}"
+cd "$TEZ_EXAMPLE_WORKING_DIR" || exit
echo "TEZ_VERSION: $TEZ_VERSION"
-wget -nc
https://archive.apache.org/dist/tez/$TEZ_VERSION/apache-tez-$TEZ_VERSION-bin.tar.gz
+wget -nc
"https://archive.apache.org/dist/tez/$TEZ_VERSION/apache-tez-$TEZ_VERSION-bin.tar.gz"
# Need to extract the Tez tarball early to get hadoop version it depends on
if [ ! -d "apache-tez-$TEZ_VERSION-bin" ]; then
- tar -xzf apache-tez-$TEZ_VERSION-bin.tar.gz
+ tar -xzf "apache-tez-$TEZ_VERSION-bin.tar.gz"
fi
-export HADOOP_VERSION=${HADOOP_VERSION:=$(basename
apache-tez-$TEZ_VERSION-bin/lib/hadoop-hdfs-client-*.jar | sed -E
's/.*hadoop-hdfs-client-([0-9]+\.[0-9]+\.[0-9]+)\.jar/\1/')} # e.g. 3.4.1
+export HADOOP_VERSION="${HADOOP_VERSION:-$(basename
"apache-tez-$TEZ_VERSION-bin/lib/hadoop-hdfs-client-"*.jar | sed -E
's/.*hadoop-hdfs-client-([0-9]+\.[0-9]+\.[0-9]+)\.jar/\1/')}" # e.g. 3.4.1
cat <<EOF
***
@@ -32,20 +48,20 @@ cat <<EOF
***
EOF
-wget -nc
https://archive.apache.org/dist/hadoop/common/hadoop-$HADOOP_VERSION/hadoop-$HADOOP_VERSION.tar.gz
+wget -nc
"https://archive.apache.org/dist/hadoop/common/hadoop-$HADOOP_VERSION/hadoop-$HADOOP_VERSION.tar.gz"
if [ ! -d "hadoop-$HADOOP_VERSION" ]; then
- tar -xzf hadoop-$HADOOP_VERSION.tar.gz
+ tar -xzf "hadoop-$HADOOP_VERSION.tar.gz"
fi
-export HADOOP_HOME=$TEZ_EXAMPLE_WORKING_DIR/hadoop-$HADOOP_VERSION
-export TEZ_HOME=$TEZ_EXAMPLE_WORKING_DIR/apache-tez-$TEZ_VERSION-bin
-export HADOOP_CLASSPATH=$TEZ_HOME/*:$TEZ_HOME/lib/*:$TEZ_HOME/conf
+export HADOOP_HOME="$TEZ_EXAMPLE_WORKING_DIR/hadoop-$HADOOP_VERSION"
+export TEZ_HOME="$TEZ_EXAMPLE_WORKING_DIR/apache-tez-$TEZ_VERSION-bin"
+export HADOOP_CLASSPATH="$TEZ_HOME/*:$TEZ_HOME/lib/*:$TEZ_HOME/conf"
-export PATH=$PATH:$HADOOP_HOME/bin
+export PATH="$PATH:$HADOOP_HOME/bin"
#
https://hadoop.apache.org/docs/stable/hadoop-project-dist/hadoop-common/SingleCluster.html#Pseudo-Distributed_Operation
-cat <<EOF > $HADOOP_HOME/etc/hadoop/hdfs-site.xml
+cat <<EOF > "$HADOOP_HOME/etc/hadoop/hdfs-site.xml"
<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
@@ -57,7 +73,7 @@ cat <<EOF > $HADOOP_HOME/etc/hadoop/hdfs-site.xml
</configuration>
EOF
-cat <<EOF > $HADOOP_HOME/etc/hadoop/core-site.xml
+cat <<EOF > "$HADOOP_HOME/etc/hadoop/core-site.xml"
<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
@@ -69,7 +85,7 @@ cat <<EOF > $HADOOP_HOME/etc/hadoop/core-site.xml
</configuration>
EOF
-cat <<EOF > $HADOOP_HOME/etc/hadoop/yarn-site.xml
+cat <<EOF > "$HADOOP_HOME/etc/hadoop/yarn-site.xml"
<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
@@ -82,20 +98,20 @@ cat <<EOF > $HADOOP_HOME/etc/hadoop/yarn-site.xml
EOF
# optionally stop previous clusters if any
-$HADOOP_HOME/sbin/stop-dfs.sh
-$HADOOP_HOME/sbin/stop-yarn.sh
+"$HADOOP_HOME/sbin/stop-dfs.sh"
+"$HADOOP_HOME/sbin/stop-yarn.sh"
-rm -rf /tmp/hadoop-$USER/dfs/data
+rm -rf "/tmp/hadoop-$USER/dfs/data"
hdfs namenode -format -force
-$HADOOP_HOME/sbin/start-dfs.sh
-$HADOOP_HOME/sbin/start-yarn.sh
+"$HADOOP_HOME/sbin/start-dfs.sh"
+"$HADOOP_HOME/sbin/start-yarn.sh"
-hadoop fs -mkdir -p /apps/tez-$TEZ_VERSION
-hadoop fs -copyFromLocal $TEZ_HOME/share/tez.tar.gz /apps/tez-$TEZ_VERSION
+hadoop fs -mkdir -p "/apps/tez-$TEZ_VERSION"
+hadoop fs -copyFromLocal "$TEZ_HOME/share/tez.tar.gz" "/apps/tez-$TEZ_VERSION"
# create a simple tez-site.xml
-cat <<EOF > $TEZ_HOME/conf/tez-site.xml
+cat <<EOF > "$TEZ_HOME/conf/tez-site.xml"
<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
@@ -121,24 +137,23 @@ Friend
Game
EOF
-hadoop fs -copyFromLocal words.txt /words.txt
+hadoop fs -copyFromLocal "words.txt" "/words.txt"
export HADOOP_USER_CLASSPATH_FIRST=true
# finally run the example
-yarn jar $TEZ_HOME/tez-examples-$TEZ_VERSION.jar orderedwordcount /words.txt
/words_out
+yarn jar "$TEZ_HOME/tez-examples-$TEZ_VERSION.jar" orderedwordcount
"/words.txt" "/words_out"
# check the output
-hadoop fs -ls /words_out
-hadoop fs -text /words_out/part-v002-o000-r-00000
-
+hadoop fs -ls "/words_out"
+hadoop fs -text "/words_out/part-v002-o000-r-00000"
cat <<EOF
*** Since the environment is already set up, you can rerun the DAG using the
commands below.
export HADOOP_USER_CLASSPATH_FIRST=true
-export TEZ_HOME=$TEZ_EXAMPLE_WORKING_DIR/apache-tez-$TEZ_VERSION-bin
-export HADOOP_CLASSPATH=$TEZ_HOME/*:$TEZ_HOME/lib/*:$TEZ_HOME/conf
-$HADOOP_HOME/bin/yarn jar $TEZ_HOME/tez-examples-$TEZ_VERSION.jar
orderedwordcount /words.txt /words_out
+export TEZ_HOME="$TEZ_EXAMPLE_WORKING_DIR/apache-tez-$TEZ_VERSION-bin"
+export HADOOP_CLASSPATH="$TEZ_HOME/*:$TEZ_HOME/lib/*:$TEZ_HOME/conf"
+"$HADOOP_HOME/bin/yarn" jar "$TEZ_HOME/tez-examples-$TEZ_VERSION.jar"
orderedwordcount /words.txt /words_out
*** You can also visit some of the sites that are set up during the script
execution.