Repository: spark
Updated Branches:
  refs/heads/master 4aa9ccbde -> 9b48107f9


[SPARK-25957][K8S] Make building alternate language binding docker images 
optional

## What changes were proposed in this pull request?
bin/docker-image-tool.sh tries to build all docker images (JVM, PySpark
and SparkR) by default. But not all spark distributions are built with
SparkR and hence this script will fail on such distros.

With this change, we make building alternate language binding docker images 
(PySpark and SparkR) optional. User has to specify dockerfile for those 
language bindings using -p and -R flags accordingly, to build the binding 
docker images.

## How was this patch tested?

Tested following scenarios.
*bin/docker-image-tool.sh -r <repo> -t <tag> build* --> Builds only JVM docker 
image (default behavior)

*bin/docker-image-tool.sh -r <repo> -t <tag> -p 
kubernetes/dockerfiles/spark/bindings/python/Dockerfile build* --> Builds both 
JVM and PySpark docker images

*bin/docker-image-tool.sh -r <repo> -t <tag> -p 
kubernetes/dockerfiles/spark/bindings/python/Dockerfile -R 
kubernetes/dockerfiles/spark/bindings/R/Dockerfile build* --> Builds JVM, 
PySpark and SparkR docker images.

Author: Nagaram Prasad Addepally <[email protected]>

Closes #23053 from ramaddepally/SPARK-25957.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/9b48107f
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/9b48107f
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/9b48107f

Branch: refs/heads/master
Commit: 9b48107f9c84631e0ddaf0f2223296a3cbc16f83
Parents: 4aa9ccb
Author: Nagaram Prasad Addepally <[email protected]>
Authored: Wed Nov 21 15:51:37 2018 -0800
Committer: mcheah <[email protected]>
Committed: Wed Nov 21 15:51:37 2018 -0800

----------------------------------------------------------------------
 bin/docker-image-tool.sh                        | 63 ++++++++++++--------
 docs/running-on-kubernetes.md                   | 12 ++++
 .../scripts/setup-integration-test-env.sh       | 12 +++-
 3 files changed, 59 insertions(+), 28 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/9b48107f/bin/docker-image-tool.sh
----------------------------------------------------------------------
diff --git a/bin/docker-image-tool.sh b/bin/docker-image-tool.sh
index aa5d847..e51201a 100755
--- a/bin/docker-image-tool.sh
+++ b/bin/docker-image-tool.sh
@@ -41,6 +41,18 @@ function image_ref {
   echo "$image"
 }
 
+function docker_push {
+  local image_name="$1"
+  if [ ! -z $(docker images -q "$(image_ref ${image_name})") ]; then
+    docker push "$(image_ref ${image_name})"
+    if [ $? -ne 0 ]; then
+      error "Failed to push $image_name Docker image."
+    fi
+  else
+    echo "$(image_ref ${image_name}) image not found. Skipping push for this 
image."
+  fi
+}
+
 function build {
   local BUILD_ARGS
   local IMG_PATH
@@ -92,8 +104,8 @@ function build {
     base_img=$(image_ref spark)
   )
   local BASEDOCKERFILE=${BASEDOCKERFILE:-"$IMG_PATH/spark/Dockerfile"}
-  local 
PYDOCKERFILE=${PYDOCKERFILE:-"$IMG_PATH/spark/bindings/python/Dockerfile"}
-  local RDOCKERFILE=${RDOCKERFILE:-"$IMG_PATH/spark/bindings/R/Dockerfile"}
+  local PYDOCKERFILE=${PYDOCKERFILE:-false}
+  local RDOCKERFILE=${RDOCKERFILE:-false}
 
   docker build $NOCACHEARG "${BUILD_ARGS[@]}" \
     -t $(image_ref spark) \
@@ -102,33 +114,29 @@ function build {
     error "Failed to build Spark JVM Docker image, please refer to Docker 
build output for details."
   fi
 
-  docker build $NOCACHEARG "${BINDING_BUILD_ARGS[@]}" \
-    -t $(image_ref spark-py) \
-    -f "$PYDOCKERFILE" .
+  if [ "${PYDOCKERFILE}" != "false" ]; then
+    docker build $NOCACHEARG "${BINDING_BUILD_ARGS[@]}" \
+      -t $(image_ref spark-py) \
+      -f "$PYDOCKERFILE" .
+      if [ $? -ne 0 ]; then
+        error "Failed to build PySpark Docker image, please refer to Docker 
build output for details."
+      fi
+  fi
+
+  if [ "${RDOCKERFILE}" != "false" ]; then
+    docker build $NOCACHEARG "${BINDING_BUILD_ARGS[@]}" \
+      -t $(image_ref spark-r) \
+      -f "$RDOCKERFILE" .
     if [ $? -ne 0 ]; then
-      error "Failed to build PySpark Docker image, please refer to Docker 
build output for details."
+      error "Failed to build SparkR Docker image, please refer to Docker build 
output for details."
     fi
-  docker build $NOCACHEARG "${BINDING_BUILD_ARGS[@]}" \
-    -t $(image_ref spark-r) \
-    -f "$RDOCKERFILE" .
-  if [ $? -ne 0 ]; then
-    error "Failed to build SparkR Docker image, please refer to Docker build 
output for details."
   fi
 }
 
 function push {
-  docker push "$(image_ref spark)"
-  if [ $? -ne 0 ]; then
-    error "Failed to push Spark JVM Docker image."
-  fi
-  docker push "$(image_ref spark-py)"
-  if [ $? -ne 0 ]; then
-    error "Failed to push PySpark Docker image."
-  fi
-  docker push "$(image_ref spark-r)"
-  if [ $? -ne 0 ]; then
-    error "Failed to push SparkR Docker image."
-  fi
+  docker_push "spark"
+  docker_push "spark-py"
+  docker_push "spark-r"
 }
 
 function usage {
@@ -143,8 +151,10 @@ Commands:
 
 Options:
   -f file               Dockerfile to build for JVM based Jobs. By default 
builds the Dockerfile shipped with Spark.
-  -p file               Dockerfile to build for PySpark Jobs. Builds Python 
dependencies and ships with Spark.
-  -R file               Dockerfile to build for SparkR Jobs. Builds R 
dependencies and ships with Spark.
+  -p file               (Optional) Dockerfile to build for PySpark Jobs. 
Builds Python dependencies and ships with Spark.
+                        Skips building PySpark docker image if not specified.
+  -R file               (Optional) Dockerfile to build for SparkR Jobs. Builds 
R dependencies and ships with Spark.
+                        Skips building SparkR docker image if not specified.
   -r repo               Repository address.
   -t tag                Tag to apply to the built image, or to identify the 
image to be pushed.
   -m                    Use minikube's Docker daemon.
@@ -164,6 +174,9 @@ Examples:
   - Build image in minikube with tag "testing"
     $0 -m -t testing build
 
+  - Build PySpark docker image
+    $0 -r docker.io/myrepo -t v2.3.0 -p 
kubernetes/dockerfiles/spark/bindings/python/Dockerfile build
+
   - Build and push image with tag "v2.3.0" to docker.io/myrepo
     $0 -r docker.io/myrepo -t v2.3.0 build
     $0 -r docker.io/myrepo -t v2.3.0 push

http://git-wip-us.apache.org/repos/asf/spark/blob/9b48107f/docs/running-on-kubernetes.md
----------------------------------------------------------------------
diff --git a/docs/running-on-kubernetes.md b/docs/running-on-kubernetes.md
index a7b6fd1..a9d4488 100644
--- a/docs/running-on-kubernetes.md
+++ b/docs/running-on-kubernetes.md
@@ -88,6 +88,18 @@ $ ./bin/docker-image-tool.sh -r <repo> -t my-tag build
 $ ./bin/docker-image-tool.sh -r <repo> -t my-tag push
 ```
 
+By default `bin/docker-image-tool.sh` builds docker image for running JVM 
jobs. You need to opt-in to build additional 
+language binding docker images.
+
+Example usage is
+```bash
+# To build additional PySpark docker image
+$ ./bin/docker-image-tool.sh -r <repo> -t my-tag -p 
./kubernetes/dockerfiles/spark/bindings/python/Dockerfile build
+
+# To build additional SparkR docker image
+$ ./bin/docker-image-tool.sh -r <repo> -t my-tag -R 
./kubernetes/dockerfiles/spark/bindings/R/Dockerfile build
+```
+
 ## Cluster Mode
 
 To launch Spark Pi in cluster mode,

http://git-wip-us.apache.org/repos/asf/spark/blob/9b48107f/resource-managers/kubernetes/integration-tests/scripts/setup-integration-test-env.sh
----------------------------------------------------------------------
diff --git 
a/resource-managers/kubernetes/integration-tests/scripts/setup-integration-test-env.sh
 
b/resource-managers/kubernetes/integration-tests/scripts/setup-integration-test-env.sh
index a4a9f5b..36e30d7 100755
--- 
a/resource-managers/kubernetes/integration-tests/scripts/setup-integration-test-env.sh
+++ 
b/resource-managers/kubernetes/integration-tests/scripts/setup-integration-test-env.sh
@@ -72,10 +72,16 @@ then
   IMAGE_TAG=$(uuidgen);
   cd $UNPACKED_SPARK_TGZ
 
+  # Build PySpark image
+  LANGUAGE_BINDING_BUILD_ARGS="-p 
$UNPACKED_SPARK_TGZ/kubernetes/dockerfiles/spark/bindings/python/Dockerfile"
+
+  # Build SparkR image
+  LANGUAGE_BINDING_BUILD_ARGS="$LANGUAGE_BINDING_BUILD_ARGS -R 
$UNPACKED_SPARK_TGZ/kubernetes/dockerfiles/spark/bindings/R/Dockerfile"
+
   case $DEPLOY_MODE in
     cloud)
       # Build images
-      $UNPACKED_SPARK_TGZ/bin/docker-image-tool.sh -r $IMAGE_REPO -t 
$IMAGE_TAG build
+      $UNPACKED_SPARK_TGZ/bin/docker-image-tool.sh -r $IMAGE_REPO -t 
$IMAGE_TAG $LANGUAGE_BINDING_BUILD_ARGS build
 
       # Push images appropriately
       if [[ $IMAGE_REPO == gcr.io* ]] ;
@@ -89,13 +95,13 @@ then
     docker-for-desktop)
        # Only need to build as this will place it in our local Docker repo 
which is all
        # we need for Docker for Desktop to work so no need to also push
-       $UNPACKED_SPARK_TGZ/bin/docker-image-tool.sh -r $IMAGE_REPO -t 
$IMAGE_TAG build
+       $UNPACKED_SPARK_TGZ/bin/docker-image-tool.sh -r $IMAGE_REPO -t 
$IMAGE_TAG $LANGUAGE_BINDING_BUILD_ARGS build
        ;;
 
     minikube)
        # Only need to build and if we do this with the -m option for minikube 
we will
        # build the images directly using the minikube Docker daemon so no need 
to push
-       $UNPACKED_SPARK_TGZ/bin/docker-image-tool.sh -m -r $IMAGE_REPO -t 
$IMAGE_TAG build
+       $UNPACKED_SPARK_TGZ/bin/docker-image-tool.sh -m -r $IMAGE_REPO -t 
$IMAGE_TAG $LANGUAGE_BINDING_BUILD_ARGS build
        ;;
     *)
        echo "Unrecognized deploy mode $DEPLOY_MODE" && exit 1


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to