This is an automated email from the ASF dual-hosted git repository.
liuxun pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/submarine.git
The following commit(s) were added to refs/heads/master by this push:
new 4f5f545 SUBMARINE-335. Add a guide of yarn runtime with docker
containers
4f5f545 is described below
commit 4f5f5456edae3b724fc5cab4a925dc080566162d
Author: Keqiu Hu <[email protected]>
AuthorDate: Sun Jan 12 21:48:30 2020 -0800
SUBMARINE-335. Add a guide of yarn runtime with docker containers
### What is this PR for?
Provide an example and instructions for how to run submarine with tony
runtime using Docker.
### What type of PR is it?
Documentation
### Todos
* [ ] - Pending access to DockerHub apache repo to push the default docker
image.
### What is the Jira issue?
https://issues.apache.org/jira/browse/SUBMARINE-335
### How should this be tested?
* Follow the instructions uploaded and it should work under individual's
dev environment
### Screenshots (if appropriate)
### Questions:
* Does the licenses files need update? No
* Is there breaking changes for older versions? No
* Does this needs documentation? No
Author: Keqiu Hu <[email protected]>
Closes #149 from oliverhu/master and squashes the following commits:
42fb58d [Keqiu Hu] Address comments
089d29d [Keqiu Hu] Add license header
b996f94 [Keqiu Hu] Update comment
ab353ee [Keqiu Hu] SUBMARINE-335
---
.../mini-submarine/submarine/mnist_distributed.py | 2 +-
.../submarine/run_submarine_mnist_tony.sh | 44 +++++++++----
docs/helper/InstallationGuide.md | 18 ++++--
docs/helper/docker/tensorflow/build-all.sh | 3 +
.../mnist/Dockerfile.tony.tf.mnist.tf_1.13.1 | 72 ++++++++++++++++++++++
5 files changed, 123 insertions(+), 16 deletions(-)
diff --git a/dev-support/mini-submarine/submarine/mnist_distributed.py
b/dev-support/mini-submarine/submarine/mnist_distributed.py
index 58a9a34..eee96f4 100644
--- a/dev-support/mini-submarine/submarine/mnist_distributed.py
+++ b/dev-support/mini-submarine/submarine/mnist_distributed.py
@@ -37,7 +37,7 @@ import sys
import tensorboard.program as tb_program
import tensorflow as tf
-import submarine
+# import submarine
# Environment variable containing port to launch TensorBoard on, set by TonY.
TB_PORT_ENV_VAR = 'TB_PORT'
diff --git a/dev-support/mini-submarine/submarine/run_submarine_mnist_tony.sh
b/dev-support/mini-submarine/submarine/run_submarine_mnist_tony.sh
index f4d9dbd..93d923e 100755
--- a/dev-support/mini-submarine/submarine/run_submarine_mnist_tony.sh
+++ b/dev-support/mini-submarine/submarine/run_submarine_mnist_tony.sh
@@ -14,6 +14,23 @@
# limitations under the License.
#!/bin/bash
+
+# Below are configurable variables, please adapt base on your local
environment.
+# Version of submarine jar
+SUBMARINE_VERSION=0.3.0-SNAPSHOT
+
+# Version of affiliated Hadoop version for this Submarine jar.
+SUBMARINE_HADOOP_VERSION=2.9
+
+# Path to the submarine jars.
+SUBMARINE_PATH=/opt/submarine-current
+
+# Similar to HADOOP_CONF_DIR, location of the Hadoop configuration directory
+HADOOP_CONF_PATH=/usr/local/hadoop/etc/hadoop
+
+# Path to the MNIST example.
+MNIST_PATH=/home/yarn/submarine
+
while [ $# -gt 0 ]; do
case "$1" in
--debug*)
@@ -33,27 +50,31 @@ else
JAVA_CMD="java"
fi
-while getopts 'd:' OPT; do
+while getopts 'd:c' OPT; do
case $OPT in
d)
DATA_URL="$OPTARG";;
+ c)
+ USE_DOCKER=1;;
esac
done
shift $(($OPTIND - 1))
if [[ -n "$DATA_URL" ]]; then
- WORKER_CMD="myvenv.zip/venv/bin/python mnist_distributed.py --steps 2
--data_dir /tmp/data --working_dir /tmp/mode --mnist_data_url ${DATA_URL}"
+ WORKER_CMD="venv/bin/python mnist_distributed.py --steps 2 --data_dir
/tmp/data --working_dir /tmp/mode --mnist_data_url ${DATA_URL}"
else
- WORKER_CMD="myvenv.zip/venv/bin/python mnist_distributed.py --steps 2
--data_dir /tmp/data --working_dir /tmp/mode"
-fi
+ WORKER_CMD="venv/bin/python mnist_distributed.py --steps 2 --data_dir
/tmp/data --working_dir /tmp/mode"
+fi
-SUBMARINE_VERSION=0.3.0-SNAPSHOT
-HADOOP_VERSION=2.9
-SUBMARINE_PATH=/opt/submarine-current
-HADOOP_CONF_PATH=/usr/local/hadoop/etc/hadoop
-MNIST_PATH=/home/yarn/submarine
+if [[ -n "$USE_DOCKER" ]]; then
+ WORKER_CMD="/opt/$WORKER_CMD"
+ # tony-mnist-tf-1.13.1:0.0.1 is built from the
Dockerfile.tony.tf.mnist.tf_1.13.1 under docs/helper/docker/tensorflow/mnist
+ DOCKER_CONF="--conf tony.docker.containers.image=tony-mnist-tf-1.13.1:0.0.1
--conf tony.docker.enabled=true"
+else
+ WORKER_CMD="myvenv.zip/$WORKER_CMD"
+fi
-${JAVA_CMD} -cp
${SUBMARINE_PATH}/submarine-all-${SUBMARINE_VERSION}-hadoop-${HADOOP_VERSION}.jar:${HADOOP_CONF_PATH}
\
+${JAVA_CMD} -cp $(${HADOOP_HOME}/bin/hadoop classpath
--glob):${SUBMARINE_PATH}/submarine-all-${SUBMARINE_VERSION}-hadoop-${SUBMARINE_HADOOP_VERSION}.jar:${HADOOP_CONF_PATH}
\
org.apache.submarine.client.cli.Cli job run --name tf-job-001 \
--framework tensorflow \
--verbose \
@@ -65,4 +86,5 @@ ${JAVA_CMD} -cp
${SUBMARINE_PATH}/submarine-all-${SUBMARINE_VERSION}-hadoop-${HA
--worker_launch_cmd "${WORKER_CMD}" \
--ps_launch_cmd "myvenv.zip/venv/bin/python mnist_distributed.py --steps 2
--data_dir /tmp/data --working_dir /tmp/mode" \
--insecure \
- --conf
tony.containers.resources=${MNIST_PATH}/myvenv.zip#archive,${MNIST_PATH}/mnist_distributed.py,${SUBMARINE_PATH}/submarine-all-${SUBMARINE_VERSION}-hadoop-${HADOOP_VERSION}.jar
+ --conf
tony.containers.resources=${MNIST_PATH}/myvenv.zip#archive,${MNIST_PATH}/mnist_distributed.py,${SUBMARINE_PATH}/submarine-all-${SUBMARINE_VERSION}-hadoop-${SUBMARINE_HADOOP_VERSION}.jar
\
+ $DOCKER_CONF
diff --git a/docs/helper/InstallationGuide.md b/docs/helper/InstallationGuide.md
index b2a4edb..e9ea3a5 100644
--- a/docs/helper/InstallationGuide.md
+++ b/docs/helper/InstallationGuide.md
@@ -394,9 +394,9 @@ Add configurations in container-executor.cfg
yarn-hierarchy=/hadoop-yarn
```
-## Tensorflow Job with yarn runtime.
+## TensorFlow Job with yarn runtime.
-### Run a tensorflow job in a zipped python virtual environment
+### Run a TensorFlow job in a zipped python virtual environment
Refer to build_python_virtual_env.sh in the directory of
${SUBMARINE_REPO_PATH}/dev-support/mini-submarine/submarine/ to build a zipped
python virtual
@@ -404,7 +404,7 @@ environment. ${SUBMARINE_REPO_PATH} indicates submarine
repo location.
The generated zipped file can be named myvenv.zip.
Copy
${SUBMARINE_REPO_PATH}/dev-support/mini-submarine/submarine/run_submarine_mnist_tony.sh
-to the server on which you submit jobs. And modify the variables,
SUBMARINE_VERSION, HADOOP_VERSION, SUBMARINE_PATH,
+to the server on which you submit jobs. And modify the variables,
SUBMARINE_VERSION, SUBMARINE_HADOOP_VERSION, SUBMARINE_PATH,
HADOOP_CONF_PATH and MNIST_PATH in it, according to your environment. If
Kerberos
is enabled, please delete the parameter, --insecure, in the command.
@@ -414,7 +414,17 @@ Run a distributed tensorflow job.
```
The parameter -d is used to specify the url from which we can get the mnist
data.
-### Run a tensorflow job in a docker container(TODO)
+### Run a TensorFlow job in a Docker container
+Prepare your docker image, you could refer to this sample Docker image for
building your own Docker image. An example is provided under
`docker/tensorflow/mnist/Dockerfile.tony.tf.mnist.tf_1.13.1`
+
+Please make sure you have _HADOOP_HOME_, _HADOOP_YARN_HOME_,
_HADOOP_HDFS_HOME_, _HADOOP_CONF_DIR_, _JAVA_HOME_ configured correctly. You
could use this command to run a distributed TensorFLow job in Docker
+
+```
+./run_submarine_mnist_tony.sh -c -d http://yann.lecun.com/exdb/mnist/
+```
+The parameter -c is used to specify the job will be run in a Docker
environment.
+
+The parameter -d is used to specify the url from which we can get the mnist
data.
## Yarn Service Runtime Requirement (Deprecated)
diff --git a/docs/helper/docker/tensorflow/build-all.sh
b/docs/helper/docker/tensorflow/build-all.sh
index 1e9848f..e0a1266 100755
--- a/docs/helper/docker/tensorflow/build-all.sh
+++ b/docs/helper/docker/tensorflow/build-all.sh
@@ -30,3 +30,6 @@ cd ../../with-cifar10-models/ubuntu-16.04
docker build . -f Dockerfile.cpu.tf_1.13.1 -t tf-1.13.1-cpu:0.0.1
docker build . -f Dockerfile.gpu.tf_1.13.1 -t tf-1.13.1-gpu:0.0.1
+
+cd ../../mnist
+docker build . -f Dockerfile.tony.tf.mnist.tf_1.13.1 -t
tony-mnist-tf-1.13.1:0.0.1
\ No newline at end of file
diff --git
a/docs/helper/docker/tensorflow/mnist/Dockerfile.tony.tf.mnist.tf_1.13.1
b/docs/helper/docker/tensorflow/mnist/Dockerfile.tony.tf.mnist.tf_1.13.1
new file mode 100644
index 0000000..86874d4
--- /dev/null
+++ b/docs/helper/docker/tensorflow/mnist/Dockerfile.tony.tf.mnist.tf_1.13.1
@@ -0,0 +1,72 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+FROM ubuntu:18.04
+ENV HADOOP_VERSION 2.9.2
+ENV HADOOP_URL
https://www.apache.org/dist/hadoop/common/hadoop-$HADOOP_VERSION/hadoop-$HADOOP_VERSION.tar.gz
+RUN apt-get update
+RUN apt-get -y install apt-transport-https \
+ ca-certificates \
+ curl \
+ gnupg2 \
+ git \
+ software-properties-common \
+ openjdk-8-jdk vim \
+ wget python3-distutils
+RUN curl -fsSL https://download.docker.com/linux/ubuntu/gpg | apt-key add -
+RUN add-apt-repository \
+ "deb [arch=amd64] https://download.docker.com/linux/ubuntu \
+ $(lsb_release -cs) \
+ stable"
+
+# Download Hadoop binaries.
+RUN set -x \
+ && curl -fSL "$HADOOP_URL" -o /tmp/hadoop.tar.gz \
+ && tar -xvf /tmp/hadoop.tar.gz -C /opt/ \
+ && rm /tmp/hadoop.tar.gz*
+
+# Copy the $HADOOP_CONF_DIR folder as "hadoop" folder in the same dir as
dockerfile
+# pi@pi-aw:~/apache/submarine/docker$ tree
+# .
+# ├── Dockerfile
+# └── hadoopconf
+# ├── capacity-scheduler.xml
+# ├── configuration.xsl
+# ...
+COPY hadoopconf /opt/hadoop-$HADOOP_VERSION/etc/hadoop
+
+# Download and config submarine
+RUN cd ~
+RUN git clone https://github.com/apache/submarine.git
+RUN cd submarine
+
+RUN submarine/dev-support/mini-submarine/submarine/build_python_virtual_env.sh
+RUN mv venv/ /opt/
+RUN chmod +r -R /opt/venv
+
+# Config Hadoop env
+ENV HADOOP_HOME=/opt/hadoop-$HADOOP_VERSION/
+ENV HADOOP_YARN_HOME=/opt/hadoop-$HADOOP_VERSION/
+ENV HADOOP_HDFS_HOME=/opt/hadoop-$HADOOP_VERSION/
+ENV HADOOP_CONF_DIR=/opt/hadoop-$HADOOP_VERSION/etc/hadoop
+ENV JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64
+
+# Crate user, make sure the user groups are the same as your host
+RUN groupadd -g 5000 hadoop
+RUN useradd -u 1000 -g hadoop pi
+RUN mkdir /home/pi
+RUN chown pi:hadoop /home/pi
+RUN mkdir /tmp/mode
+RUN chmod 777 /tmp/mode
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]