Ottomata has uploaded a new change for review. ( https://gerrit.wikimedia.org/r/390435 )
Change subject: 2.1.2-2 release for Hadoop 2.6 ...................................................................... 2.1.2-2 release for Hadoop 2.6 Bug: T158334 Change-Id: I7f8d78f2378627325d3cdea4de765d995690e223 --- M debian/README.Debian D debian/bin/spark2-beeline M debian/changelog M debian/conf/log4j.properties M debian/conf/spark-env.sh M debian/control M debian/rules M debian/spark2.install M debian/spark2.postinst M debian/spark2.postrm 10 files changed, 100 insertions(+), 9 deletions(-) git pull ssh://gerrit.wikimedia.org:29418/operations/debs/spark2 refs/changes/35/390435/1 diff --git a/debian/README.Debian b/debian/README.Debian index e929737..e901a58 100644 --- a/debian/README.Debian +++ b/debian/README.Debian @@ -1,4 +1,4 @@ -Druid for Debian +Spark2 for Debian ---------------- This package is created from the release tarballs provided from @@ -21,3 +21,9 @@ to git-buildpackage or dpkg-buildpackage. On a Wikimedia build server: GIT_PBUILDER_AUTOCONF=no DIST=jessie WIKIMEDIA=yes gbp buildpackage -sa -us -uc --git-builder=git-pbuilder --source-option="--include-removal" + +spark2-assembly.zip +------------------- +This package builds a spark2-assembly.zip archive from all files in the +jars directory. If an HDFS, client is avaliable, spark2-assembly.zip will be +uploaded to HDFS, and spark-defaults.conf will be configured to use it. diff --git a/debian/bin/spark2-beeline b/debian/bin/spark2-beeline deleted file mode 100755 index d94a75b..0000000 --- a/debian/bin/spark2-beeline +++ /dev/null @@ -1 +0,0 @@ -exec /usr/lib/spark2/bin/beeline "$@" diff --git a/debian/changelog b/debian/changelog index 7697342..b834681 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,3 +1,14 @@ +spark2 (2.1.2-bin-hadoop2.6-2) jessie-wikimedia; urgency=low + + * 2.1.2 -2 release for Hadoop 2.6 + * default log4j improvements + * Use Hadoop native libs if they exist + * Build spark2-assembly.zip and upload to HDFS and use it + * spark2-beeline executable removed + + -- Andrew Otto (WMF) <[email protected]> Tue, 31 Oct 2017 18:04:29 +0000 + + spark2 (2.1.2-bin-hadoop2.6-1) jessie-wikimedia; urgency=low * 2.1.2 binary release for Hadoop 2.6 diff --git a/debian/conf/log4j.properties b/debian/conf/log4j.properties index ec1aa18..c4c4196 100644 --- a/debian/conf/log4j.properties +++ b/debian/conf/log4j.properties @@ -38,3 +38,6 @@ # SPARK-9183: Settings to avoid annoying messages when looking up nonexistent UDFs in SparkSQL with Hive support log4j.logger.org.apache.hadoop.hive.metastore.RetryingHMSHandler=FATAL log4j.logger.org.apache.hadoop.hive.ql.exec.FunctionRegistry=ERROR + +# Quiet spark-sql logger, it is too verbose on in INFO. +log4j.logger.org.apache.spark.sql=WARN diff --git a/debian/conf/spark-env.sh b/debian/conf/spark-env.sh index 4d5a14c..0361882 100755 --- a/debian/conf/spark-env.sh +++ b/debian/conf/spark-env.sh @@ -69,3 +69,8 @@ if [ -z "${HADOOP_CONF_DIR}" -a -e "/etc/hadoop/conf" ]; then export HADOOP_CONF_DIR=/etc/hadoop/conf fi + +# If /usr/lib/hadoop/native exists, use Hadoop native libs from there +if [ -z "${LD_LIBRARY_PATH}" -a -e /usr/lib/hadoop/lib/native ]; then + export LD_LIBRARY_PATH=/usr/lib/hadoop/lib/native +fi diff --git a/debian/control b/debian/control index 3daae1c..c783ad5 100644 --- a/debian/control +++ b/debian/control @@ -2,7 +2,7 @@ Section: misc Priority: optional Maintainer: Andrew Otto <[email protected]> -Build-Depends: debhelper (>= 9) +Build-Depends: debhelper (>= 9), zip Standards-Version: 3.9.3 Homepage: https://spark.apache.org diff --git a/debian/rules b/debian/rules index 14110db..959d04b 100755 --- a/debian/rules +++ b/debian/rules @@ -30,3 +30,8 @@ cp -v $(CURDIR)/bin/spark-sql $(CURDIR)/debian/spark2/usr/lib/spark2/bin/spark-sql cp -v $(CURDIR)/bin/spark-submit $(CURDIR)/debian/spark2/usr/lib/spark2/bin/spark-submit cp -v $(CURDIR)/bin/sparkR $(CURDIR)/debian/spark2/usr/lib/spark2/bin/sparkR + + # Create a spark2-assembly.zip archive that contains everything in jars/ + # This can be used for spark.yarn.archive to avoid having to load + # all .jar files to HDFS everytime spark is launched. + zip -j $(CURDIR)/debian/spark2/usr/lib/spark2/spark2-assembly.zip $(CURDIR)/jars/* diff --git a/debian/spark2.install b/debian/spark2.install index 1e9e414..233929c 100644 --- a/debian/spark2.install +++ b/debian/spark2.install @@ -1,4 +1,3 @@ -debian/bin/spark2-beeline usr/bin debian/bin/pyspark2 usr/bin debian/bin/spark2-shell usr/bin debian/bin/spark2-sql usr/bin diff --git a/debian/spark2.postinst b/debian/spark2.postinst index c965bf3..f602c03 100644 --- a/debian/spark2.postinst +++ b/debian/spark2.postinst @@ -6,9 +6,48 @@ set -e -# Symlink hive-site.xml into spark2/conf if it exists. -# This lets spark2 infer Hive configuration. -test -f /etc/hive/conf/hive-site.xml && ln -sf /etc/hive/conf/hive-site.xml /etc/spark2/conf/hive-site.xml + +case "$1" in + configure|reconfigure) + if ! getent passwd spark >/dev/null; then + # Adding system user: spark. + adduser \ + --system \ + --group \ + --no-create-home \ + --home /nonexistent \ + --gecos "Spark" \ + --shell /bin/false \ + spark >/dev/null 2>/dev/null || : + fi + + + # If /etc/hadoop/conf exists, assume an HDFS client exists and is useable. + # Automate storing spark2-assembly.jar into HDFS. + if [ -e /etc/hadoop/conf -a -x $(which hdfs) ]; then + sudo -u spark hdfs dfs -mkdir -p /user/spark/share/lib && \ + sudo -u spark hdfs dfs -put -f /usr/lib/spark2/spark2-assembly.zip /user/spark/share/lib/spark2-assembly.zip && \ + # Append configuration to spark-defaults.conf to use spark2-assembly.zip + echo 'spark.yarn.archive hdfs:///user/spark/share/lib/spark2-assembly.zip' >> /etc/spark2/conf/spark-defaults.conf + fi + + # Symlink hive-site.xml into spark2/conf if it exists. + # This lets spark2 infer Hive configuration. + test -f /etc/hive/conf/hive-site.xml && ln -sf /etc/hive/conf/hive-site.xml /etc/spark2/conf/hive-site.xml + ;; + + abort-upgrade|abort-remove|abort-deconfigure) + ;; + + *) + echo "postinst called with unknown argument \`$1'" >&2 + exit 1 + ;; +esac + + + + # dh_installdeb will replace this with shell code automatically # generated by other debhelper scripts. diff --git a/debian/spark2.postrm b/debian/spark2.postrm index fc9f5c4..4d580d2 100644 --- a/debian/spark2.postrm +++ b/debian/spark2.postrm @@ -6,8 +6,32 @@ set -e -# Remove the hive-site.xml symlink if it exists -test -e /etc/hive/usr/lib/spark2/conf/hive-site.xml && unlink /etc/hive/usr/lib/spark2/conf/hive-site.xml +case "$1" in + purge) + # spark2.postinst conditionally creates a spark user and /user/spark + # dir in HDFS, but spark2.postrm will not remove them. This is because + # spark2 is meant to be installable alongside a spark 1 installation. + # We want to use the same spark user from that, so we can't be sure + # that we should be responsible for removing the spark user here. + + # Remove the hive-site.xml symlink if it exists + test -e /etc/hive/usr/lib/spark2/conf/hive-site.xml && unlink /etc/hive/usr/lib/spark2/conf/hive-site.xml + + # If /etc/hadoop/conf exists, assume an HDFS client exists and is useable. + # Automate removing spark2-assembly.jar into HDFS. + if [ -e /etc/hadoop/conf -a -x $(which hdfs) ]; then + sudo -u spark hdfs dfs -rm /user/spark/share/lib/spark2-assembly.zip + fi + + ;; + remove|upgrade|failed-upgrade|abort-install|abort-upgrade|disappear) + ;; + *) + echo "postrm called with unknown argument \`$1'" >&2 + exit 1 + ;; +esac + # dh_installdeb will replace this with shell code automatically # generated by other debhelper scripts. -- To view, visit https://gerrit.wikimedia.org/r/390435 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: newchange Gerrit-Change-Id: I7f8d78f2378627325d3cdea4de765d995690e223 Gerrit-PatchSet: 1 Gerrit-Project: operations/debs/spark2 Gerrit-Branch: debian Gerrit-Owner: Ottomata <[email protected]> _______________________________________________ MediaWiki-commits mailing list [email protected] https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits
