[MediaWiki-commits] [Gerrit] operations...spark2[debian]: 2.1.2-2 release for Hadoop 2.6

Ottomata (Code Review) Fri, 10 Nov 2017 09:00:27 -0800

Ottomata has uploaded a new change for review. ( 
https://gerrit.wikimedia.org/r/390435 )


Change subject: 2.1.2-2 release for Hadoop 2.6
......................................................................

2.1.2-2 release for Hadoop 2.6

Bug: T158334
Change-Id: I7f8d78f2378627325d3cdea4de765d995690e223
---
M debian/README.Debian
D debian/bin/spark2-beeline
M debian/changelog
M debian/conf/log4j.properties
M debian/conf/spark-env.sh
M debian/control
M debian/rules
M debian/spark2.install
M debian/spark2.postinst
M debian/spark2.postrm
10 files changed, 100 insertions(+), 9 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/operations/debs/spark2 
refs/changes/35/390435/1

diff --git a/debian/README.Debian b/debian/README.Debian
index e929737..e901a58 100644
--- a/debian/README.Debian
+++ b/debian/README.Debian
@@ -1,4 +1,4 @@
-Druid for Debian
+Spark2 for Debian
 ----------------
 
 This package is created from the release tarballs provided from
@@ -21,3 +21,9 @@
 to git-buildpackage or dpkg-buildpackage.  On a Wikimedia build server:
 
   GIT_PBUILDER_AUTOCONF=no DIST=jessie WIKIMEDIA=yes gbp buildpackage -sa -us 
-uc --git-builder=git-pbuilder --source-option="--include-removal"
+
+spark2-assembly.zip
+-------------------
+This package builds a spark2-assembly.zip archive from all files in the
+jars directory.  If an HDFS, client is avaliable, spark2-assembly.zip will be
+uploaded to HDFS, and spark-defaults.conf will be configured to use it.
diff --git a/debian/bin/spark2-beeline b/debian/bin/spark2-beeline
deleted file mode 100755
index d94a75b..0000000
--- a/debian/bin/spark2-beeline
+++ /dev/null
@@ -1 +0,0 @@
-exec /usr/lib/spark2/bin/beeline "$@"
diff --git a/debian/changelog b/debian/changelog
index 7697342..b834681 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -1,3 +1,14 @@
+spark2 (2.1.2-bin-hadoop2.6-2) jessie-wikimedia; urgency=low
+
+  * 2.1.2 -2 release for Hadoop 2.6
+  * default log4j improvements
+  * Use Hadoop native libs if they exist
+  * Build spark2-assembly.zip and upload to HDFS and use it
+  * spark2-beeline executable removed
+
+ -- Andrew Otto (WMF) <[email protected]>  Tue, 31 Oct 2017 18:04:29 +0000
+
+
 spark2 (2.1.2-bin-hadoop2.6-1) jessie-wikimedia; urgency=low
 
   * 2.1.2 binary release for Hadoop 2.6
diff --git a/debian/conf/log4j.properties b/debian/conf/log4j.properties
index ec1aa18..c4c4196 100644
--- a/debian/conf/log4j.properties
+++ b/debian/conf/log4j.properties
@@ -38,3 +38,6 @@
 # SPARK-9183: Settings to avoid annoying messages when looking up nonexistent 
UDFs in SparkSQL with Hive support
 log4j.logger.org.apache.hadoop.hive.metastore.RetryingHMSHandler=FATAL
 log4j.logger.org.apache.hadoop.hive.ql.exec.FunctionRegistry=ERROR
+
+# Quiet spark-sql logger, it is too verbose on in INFO.
+log4j.logger.org.apache.spark.sql=WARN
diff --git a/debian/conf/spark-env.sh b/debian/conf/spark-env.sh
index 4d5a14c..0361882 100755
--- a/debian/conf/spark-env.sh
+++ b/debian/conf/spark-env.sh
@@ -69,3 +69,8 @@
 if [ -z "${HADOOP_CONF_DIR}" -a -e "/etc/hadoop/conf" ]; then
   export HADOOP_CONF_DIR=/etc/hadoop/conf
 fi
+
+# If /usr/lib/hadoop/native exists, use Hadoop native libs from there
+if [ -z "${LD_LIBRARY_PATH}" -a -e /usr/lib/hadoop/lib/native ]; then
+    export LD_LIBRARY_PATH=/usr/lib/hadoop/lib/native
+fi
diff --git a/debian/control b/debian/control
index 3daae1c..c783ad5 100644
--- a/debian/control
+++ b/debian/control
@@ -2,7 +2,7 @@
 Section: misc
 Priority: optional
 Maintainer: Andrew Otto <[email protected]>
-Build-Depends: debhelper (>= 9)
+Build-Depends: debhelper (>= 9), zip
 Standards-Version: 3.9.3
 Homepage: https://spark.apache.org
 
diff --git a/debian/rules b/debian/rules
index 14110db..959d04b 100755
--- a/debian/rules
+++ b/debian/rules
@@ -30,3 +30,8 @@
        cp -v $(CURDIR)/bin/spark-sql         
$(CURDIR)/debian/spark2/usr/lib/spark2/bin/spark-sql
        cp -v $(CURDIR)/bin/spark-submit      
$(CURDIR)/debian/spark2/usr/lib/spark2/bin/spark-submit
        cp -v $(CURDIR)/bin/sparkR            
$(CURDIR)/debian/spark2/usr/lib/spark2/bin/sparkR
+
+       # Create a spark2-assembly.zip archive that contains everything in jars/
+       # This can be used for spark.yarn.archive to avoid having to load
+       # all .jar files to HDFS everytime spark is launched.
+       zip -j $(CURDIR)/debian/spark2/usr/lib/spark2/spark2-assembly.zip 
$(CURDIR)/jars/*
diff --git a/debian/spark2.install b/debian/spark2.install
index 1e9e414..233929c 100644
--- a/debian/spark2.install
+++ b/debian/spark2.install
@@ -1,4 +1,3 @@
-debian/bin/spark2-beeline usr/bin
 debian/bin/pyspark2 usr/bin
 debian/bin/spark2-shell usr/bin
 debian/bin/spark2-sql usr/bin
diff --git a/debian/spark2.postinst b/debian/spark2.postinst
index c965bf3..f602c03 100644
--- a/debian/spark2.postinst
+++ b/debian/spark2.postinst
@@ -6,9 +6,48 @@
 
 set -e
 
-# Symlink hive-site.xml into spark2/conf if it exists.
-# This lets spark2 infer Hive configuration.
-test -f /etc/hive/conf/hive-site.xml && ln -sf /etc/hive/conf/hive-site.xml 
/etc/spark2/conf/hive-site.xml
+
+case "$1" in
+    configure|reconfigure)
+        if ! getent passwd spark >/dev/null; then
+            # Adding system user: spark.
+            adduser \
+                --system \
+                --group \
+                --no-create-home \
+                --home /nonexistent \
+                --gecos "Spark" \
+                --shell /bin/false \
+                spark >/dev/null 2>/dev/null || :
+        fi
+
+
+        # If /etc/hadoop/conf exists, assume an HDFS client exists and is 
useable.
+        # Automate storing spark2-assembly.jar into HDFS.
+        if [ -e /etc/hadoop/conf -a -x $(which hdfs) ]; then
+            sudo -u spark hdfs dfs -mkdir -p /user/spark/share/lib && \
+            sudo -u spark hdfs dfs -put -f /usr/lib/spark2/spark2-assembly.zip 
/user/spark/share/lib/spark2-assembly.zip && \
+            # Append configuration to spark-defaults.conf to use 
spark2-assembly.zip
+            echo 'spark.yarn.archive                  
hdfs:///user/spark/share/lib/spark2-assembly.zip' >> 
/etc/spark2/conf/spark-defaults.conf
+        fi
+
+        # Symlink hive-site.xml into spark2/conf if it exists.
+        # This lets spark2 infer Hive configuration.
+        test -f /etc/hive/conf/hive-site.xml && ln -sf 
/etc/hive/conf/hive-site.xml /etc/spark2/conf/hive-site.xml
+        ;;
+
+    abort-upgrade|abort-remove|abort-deconfigure)
+        ;;
+
+    *)
+        echo "postinst called with unknown argument \`$1'" >&2
+        exit 1
+        ;;
+esac
+
+
+
+
 
 # dh_installdeb will replace this with shell code automatically
 # generated by other debhelper scripts.
diff --git a/debian/spark2.postrm b/debian/spark2.postrm
index fc9f5c4..4d580d2 100644
--- a/debian/spark2.postrm
+++ b/debian/spark2.postrm
@@ -6,8 +6,32 @@
 
 set -e
 
-# Remove the hive-site.xml symlink if it exists
-test -e /etc/hive/usr/lib/spark2/conf/hive-site.xml && unlink 
/etc/hive/usr/lib/spark2/conf/hive-site.xml
+case "$1" in
+    purge)
+        # spark2.postinst conditionally creates a spark user and /user/spark
+        # dir in HDFS, but spark2.postrm will not remove them.  This is because
+        # spark2 is meant to be installable alongside a spark 1 installation.
+        # We want to use the same spark user from that, so we can't be sure
+        # that we should be responsible for removing the spark user here.
+
+        # Remove the hive-site.xml symlink if it exists
+        test -e /etc/hive/usr/lib/spark2/conf/hive-site.xml && unlink 
/etc/hive/usr/lib/spark2/conf/hive-site.xml
+
+        # If /etc/hadoop/conf exists, assume an HDFS client exists and is 
useable.
+        # Automate removing spark2-assembly.jar into HDFS.
+        if [ -e /etc/hadoop/conf -a -x $(which hdfs) ]; then
+            sudo -u spark hdfs dfs -rm 
/user/spark/share/lib/spark2-assembly.zip
+        fi
+
+        ;;
+    remove|upgrade|failed-upgrade|abort-install|abort-upgrade|disappear)
+        ;;
+    *)
+        echo "postrm called with unknown argument \`$1'" >&2
+        exit 1
+        ;;
+esac
+
 
 # dh_installdeb will replace this with shell code automatically
 # generated by other debhelper scripts.

-- 
To view, visit https://gerrit.wikimedia.org/r/390435
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: I7f8d78f2378627325d3cdea4de765d995690e223
Gerrit-PatchSet: 1
Gerrit-Project: operations/debs/spark2
Gerrit-Branch: debian
Gerrit-Owner: Ottomata <[email protected]>

_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

[MediaWiki-commits] [Gerrit] operations...spark2[debian]: 2.1.2-2 release for Hadoop 2.6

Reply via email to