This is an automated email from the ASF dual-hosted git repository.

liuxun pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/submarine.git


The following commit(s) were added to refs/heads/master by this push:
     new 4f5f545  SUBMARINE-335. Add a guide of yarn runtime with docker 
containers
4f5f545 is described below

commit 4f5f5456edae3b724fc5cab4a925dc080566162d
Author: Keqiu Hu <[email protected]>
AuthorDate: Sun Jan 12 21:48:30 2020 -0800

    SUBMARINE-335. Add a guide of yarn runtime with docker containers
    
    ### What is this PR for?
    Provide an example and instructions for how to run submarine with tony 
runtime using Docker.
    
    ### What type of PR is it?
    Documentation
    
    ### Todos
    * [ ] - Pending access to DockerHub apache repo to push the default docker 
image.
    
    ### What is the Jira issue?
    https://issues.apache.org/jira/browse/SUBMARINE-335
    
    ### How should this be tested?
    * Follow the instructions uploaded and it should work under individual's 
dev environment
    
    ### Screenshots (if appropriate)
    
    ### Questions:
    * Does the licenses files need update? No
    * Is there breaking changes for older versions? No
    * Does this needs documentation? No
    
    Author: Keqiu Hu <[email protected]>
    
    Closes #149 from oliverhu/master and squashes the following commits:
    
    42fb58d [Keqiu Hu] Address comments
    089d29d [Keqiu Hu] Add license header
    b996f94 [Keqiu Hu] Update comment
    ab353ee [Keqiu Hu] SUBMARINE-335
---
 .../mini-submarine/submarine/mnist_distributed.py  |  2 +-
 .../submarine/run_submarine_mnist_tony.sh          | 44 +++++++++----
 docs/helper/InstallationGuide.md                   | 18 ++++--
 docs/helper/docker/tensorflow/build-all.sh         |  3 +
 .../mnist/Dockerfile.tony.tf.mnist.tf_1.13.1       | 72 ++++++++++++++++++++++
 5 files changed, 123 insertions(+), 16 deletions(-)

diff --git a/dev-support/mini-submarine/submarine/mnist_distributed.py 
b/dev-support/mini-submarine/submarine/mnist_distributed.py
index 58a9a34..eee96f4 100644
--- a/dev-support/mini-submarine/submarine/mnist_distributed.py
+++ b/dev-support/mini-submarine/submarine/mnist_distributed.py
@@ -37,7 +37,7 @@ import sys
 
 import tensorboard.program as tb_program
 import tensorflow as tf
-import submarine
+# import submarine
 
 # Environment variable containing port to launch TensorBoard on, set by TonY.
 TB_PORT_ENV_VAR = 'TB_PORT'
diff --git a/dev-support/mini-submarine/submarine/run_submarine_mnist_tony.sh 
b/dev-support/mini-submarine/submarine/run_submarine_mnist_tony.sh
index f4d9dbd..93d923e 100755
--- a/dev-support/mini-submarine/submarine/run_submarine_mnist_tony.sh
+++ b/dev-support/mini-submarine/submarine/run_submarine_mnist_tony.sh
@@ -14,6 +14,23 @@
 # limitations under the License.
 
 #!/bin/bash
+
+# Below are configurable variables, please adapt base on your local 
environment.
+# Version of submarine jar
+SUBMARINE_VERSION=0.3.0-SNAPSHOT
+
+# Version of affiliated Hadoop version for this Submarine jar.
+SUBMARINE_HADOOP_VERSION=2.9
+
+# Path to the submarine jars.
+SUBMARINE_PATH=/opt/submarine-current
+
+# Similar to HADOOP_CONF_DIR, location of the Hadoop configuration directory
+HADOOP_CONF_PATH=/usr/local/hadoop/etc/hadoop
+
+# Path to the MNIST example.
+MNIST_PATH=/home/yarn/submarine
+
 while [ $# -gt 0 ]; do
   case "$1" in
     --debug*)
@@ -33,27 +50,31 @@ else
   JAVA_CMD="java"
 fi
 
-while getopts 'd:' OPT; do
+while getopts 'd:c' OPT; do
   case $OPT in
     d)
       DATA_URL="$OPTARG";;
+    c)
+      USE_DOCKER=1;;
   esac
 done
 shift $(($OPTIND - 1))
 
 if [[ -n "$DATA_URL" ]]; then
-  WORKER_CMD="myvenv.zip/venv/bin/python mnist_distributed.py --steps 2 
--data_dir /tmp/data --working_dir /tmp/mode --mnist_data_url ${DATA_URL}"
+  WORKER_CMD="venv/bin/python mnist_distributed.py --steps 2 --data_dir 
/tmp/data --working_dir /tmp/mode --mnist_data_url ${DATA_URL}"
 else
-  WORKER_CMD="myvenv.zip/venv/bin/python mnist_distributed.py --steps 2 
--data_dir /tmp/data --working_dir /tmp/mode"
-fi 
+  WORKER_CMD="venv/bin/python mnist_distributed.py --steps 2 --data_dir 
/tmp/data --working_dir /tmp/mode"
+fi
 
-SUBMARINE_VERSION=0.3.0-SNAPSHOT
-HADOOP_VERSION=2.9
-SUBMARINE_PATH=/opt/submarine-current
-HADOOP_CONF_PATH=/usr/local/hadoop/etc/hadoop
-MNIST_PATH=/home/yarn/submarine
+if [[ -n "$USE_DOCKER" ]]; then
+  WORKER_CMD="/opt/$WORKER_CMD"
+  # tony-mnist-tf-1.13.1:0.0.1 is built from the 
Dockerfile.tony.tf.mnist.tf_1.13.1 under docs/helper/docker/tensorflow/mnist
+  DOCKER_CONF="--conf tony.docker.containers.image=tony-mnist-tf-1.13.1:0.0.1 
--conf tony.docker.enabled=true"
+else
+  WORKER_CMD="myvenv.zip/$WORKER_CMD"
+fi
 
-${JAVA_CMD} -cp 
${SUBMARINE_PATH}/submarine-all-${SUBMARINE_VERSION}-hadoop-${HADOOP_VERSION}.jar:${HADOOP_CONF_PATH}
 \
+${JAVA_CMD} -cp $(${HADOOP_HOME}/bin/hadoop classpath 
--glob):${SUBMARINE_PATH}/submarine-all-${SUBMARINE_VERSION}-hadoop-${SUBMARINE_HADOOP_VERSION}.jar:${HADOOP_CONF_PATH}
 \
  org.apache.submarine.client.cli.Cli job run --name tf-job-001 \
  --framework tensorflow \
  --verbose \
@@ -65,4 +86,5 @@ ${JAVA_CMD} -cp 
${SUBMARINE_PATH}/submarine-all-${SUBMARINE_VERSION}-hadoop-${HA
  --worker_launch_cmd "${WORKER_CMD}" \
  --ps_launch_cmd "myvenv.zip/venv/bin/python mnist_distributed.py --steps 2 
--data_dir /tmp/data --working_dir /tmp/mode" \
  --insecure \
- --conf 
tony.containers.resources=${MNIST_PATH}/myvenv.zip#archive,${MNIST_PATH}/mnist_distributed.py,${SUBMARINE_PATH}/submarine-all-${SUBMARINE_VERSION}-hadoop-${HADOOP_VERSION}.jar
+ --conf 
tony.containers.resources=${MNIST_PATH}/myvenv.zip#archive,${MNIST_PATH}/mnist_distributed.py,${SUBMARINE_PATH}/submarine-all-${SUBMARINE_VERSION}-hadoop-${SUBMARINE_HADOOP_VERSION}.jar
 \
+ $DOCKER_CONF
diff --git a/docs/helper/InstallationGuide.md b/docs/helper/InstallationGuide.md
index b2a4edb..e9ea3a5 100644
--- a/docs/helper/InstallationGuide.md
+++ b/docs/helper/InstallationGuide.md
@@ -394,9 +394,9 @@ Add configurations in container-executor.cfg
    yarn-hierarchy=/hadoop-yarn
    ```
 
-## Tensorflow Job with yarn runtime.
+## TensorFlow Job with yarn runtime.
 
-### Run a tensorflow job in a zipped python virtual environment
+### Run a TensorFlow job in a zipped python virtual environment
 
 Refer to build_python_virtual_env.sh in the directory of
 ${SUBMARINE_REPO_PATH}/dev-support/mini-submarine/submarine/ to build a zipped 
python virtual
@@ -404,7 +404,7 @@ environment. ${SUBMARINE_REPO_PATH} indicates submarine 
repo location.
 The generated zipped file can be named myvenv.zip.
 
 Copy 
${SUBMARINE_REPO_PATH}/dev-support/mini-submarine/submarine/run_submarine_mnist_tony.sh
-to the server on which you submit jobs. And modify the variables, 
SUBMARINE_VERSION, HADOOP_VERSION, SUBMARINE_PATH,
+to the server on which you submit jobs. And modify the variables, 
SUBMARINE_VERSION, SUBMARINE_HADOOP_VERSION, SUBMARINE_PATH,
 HADOOP_CONF_PATH and MNIST_PATH in it, according to your environment. If 
Kerberos
 is enabled, please delete the parameter, --insecure, in the command.
 
@@ -414,7 +414,17 @@ Run a distributed tensorflow job.
 ```
 The parameter -d is used to specify the url from which we can get the mnist 
data.
 
-### Run a tensorflow job in a docker container(TODO)
+### Run a TensorFlow job in a Docker container
+Prepare your docker image, you could refer to this sample Docker image for 
building your own Docker image. An example is provided under 
`docker/tensorflow/mnist/Dockerfile.tony.tf.mnist.tf_1.13.1`
+
+Please make sure you have _HADOOP_HOME_, _HADOOP_YARN_HOME_, 
_HADOOP_HDFS_HOME_, _HADOOP_CONF_DIR_, _JAVA_HOME_ configured correctly. You 
could use this command to run a distributed TensorFLow job in Docker
+
+```
+./run_submarine_mnist_tony.sh -c -d http://yann.lecun.com/exdb/mnist/
+```
+The parameter -c is used to specify the job will be run in a Docker 
environment.
+
+The parameter -d is used to specify the url from which we can get the mnist 
data.
 
 
 ## Yarn Service Runtime Requirement (Deprecated)
diff --git a/docs/helper/docker/tensorflow/build-all.sh 
b/docs/helper/docker/tensorflow/build-all.sh
index 1e9848f..e0a1266 100755
--- a/docs/helper/docker/tensorflow/build-all.sh
+++ b/docs/helper/docker/tensorflow/build-all.sh
@@ -30,3 +30,6 @@ cd ../../with-cifar10-models/ubuntu-16.04
 
 docker build . -f Dockerfile.cpu.tf_1.13.1 -t tf-1.13.1-cpu:0.0.1
 docker build . -f Dockerfile.gpu.tf_1.13.1 -t tf-1.13.1-gpu:0.0.1
+
+cd ../../mnist
+docker build . -f Dockerfile.tony.tf.mnist.tf_1.13.1 -t 
tony-mnist-tf-1.13.1:0.0.1
\ No newline at end of file
diff --git 
a/docs/helper/docker/tensorflow/mnist/Dockerfile.tony.tf.mnist.tf_1.13.1 
b/docs/helper/docker/tensorflow/mnist/Dockerfile.tony.tf.mnist.tf_1.13.1
new file mode 100644
index 0000000..86874d4
--- /dev/null
+++ b/docs/helper/docker/tensorflow/mnist/Dockerfile.tony.tf.mnist.tf_1.13.1
@@ -0,0 +1,72 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+FROM ubuntu:18.04
+ENV HADOOP_VERSION 2.9.2
+ENV HADOOP_URL 
https://www.apache.org/dist/hadoop/common/hadoop-$HADOOP_VERSION/hadoop-$HADOOP_VERSION.tar.gz
+RUN apt-get update
+RUN apt-get -y install apt-transport-https \
+     ca-certificates \
+     curl \
+     gnupg2 \
+     git \
+     software-properties-common \
+     openjdk-8-jdk vim \
+     wget python3-distutils
+RUN curl -fsSL https://download.docker.com/linux/ubuntu/gpg | apt-key add -
+RUN add-apt-repository \
+   "deb [arch=amd64] https://download.docker.com/linux/ubuntu \
+   $(lsb_release -cs) \
+   stable"
+
+# Download Hadoop binaries.
+RUN set -x \
+    && curl -fSL "$HADOOP_URL" -o /tmp/hadoop.tar.gz \
+    && tar -xvf /tmp/hadoop.tar.gz -C /opt/ \
+    && rm /tmp/hadoop.tar.gz*
+
+# Copy the $HADOOP_CONF_DIR folder as "hadoop" folder in the same dir as 
dockerfile
+# pi@pi-aw:~/apache/submarine/docker$ tree
+# .
+# ├── Dockerfile
+# └── hadoopconf
+#     ├── capacity-scheduler.xml
+#     ├── configuration.xsl
+#     ...
+COPY hadoopconf /opt/hadoop-$HADOOP_VERSION/etc/hadoop
+
+# Download and config submarine
+RUN cd ~
+RUN git clone https://github.com/apache/submarine.git
+RUN cd submarine
+
+RUN submarine/dev-support/mini-submarine/submarine/build_python_virtual_env.sh
+RUN mv venv/ /opt/
+RUN chmod +r -R /opt/venv
+
+# Config Hadoop env
+ENV HADOOP_HOME=/opt/hadoop-$HADOOP_VERSION/
+ENV HADOOP_YARN_HOME=/opt/hadoop-$HADOOP_VERSION/
+ENV HADOOP_HDFS_HOME=/opt/hadoop-$HADOOP_VERSION/
+ENV HADOOP_CONF_DIR=/opt/hadoop-$HADOOP_VERSION/etc/hadoop
+ENV JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64
+
+# Crate user, make sure the user groups are the same as your host
+RUN groupadd -g 5000 hadoop
+RUN useradd -u 1000 -g hadoop pi
+RUN mkdir /home/pi
+RUN chown pi:hadoop /home/pi
+RUN mkdir /tmp/mode
+RUN chmod 777 /tmp/mode


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to