This is an automated email from the ASF dual-hosted git repository.

zuston pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-uniffle.git


The following commit(s) were added to refs/heads/master by this push:
     new a73dcb968 feat(docker): Add example docker compose Uniffle/Spark 
cluster (#1532)
a73dcb968 is described below

commit a73dcb968ee3fde7170af300c4af238ab26c2de7
Author: Enrico Minack <[email protected]>
AuthorDate: Tue Feb 27 03:11:29 2024 +0100

    feat(docker): Add example docker compose Uniffle/Spark cluster (#1532)
    
    ### What changes were proposed in this pull request?
    Adds code to spin up an example Uniffle/Spark docker cluster using docker 
compose. This is used by the CI to test the example cluster setup.
    
    ### Why are the changes needed?
    This setup has a smaller footprint than the existing kubernetes example in 
`deploy/kubernetes/integration-test/e2e/README.md`, which is not trivial to 
setup. The new example only requires Docker to be installed, and can be spun up 
via
    
        ./deploy/docker-compose/build.sh
        docker compose -f deploy/docker-compose/docker-compose.yml up
    
    The Uniffle and Spark cluster can be used to interactively test Spark with 
Uniffle.
    
    ### Does this PR introduce _any_ user-facing change?
    No.
    
    ### How was this patch tested?
    Manually and CI tested.
---
 .github/workflows/build.yml       |   4 +
 .github/workflows/docker.yml      | 151 +++++++++++++++++++++++++++++++++
 deploy/docker/README.md           | 172 ++++++++++++++++++++++++++++++++++++++
 deploy/docker/build.sh            |  46 ++++++++++
 deploy/docker/docker-compose.yml  |  86 +++++++++++++++++++
 deploy/docker/spark/Dockerfile    |  26 ++++++
 deploy/docker/uniffle/Dockerfile  |  23 +++++
 deploy/docker/uniffle/start.sh    |  41 +++++++++
 deploy/kubernetes/docker/build.sh |   4 +
 9 files changed, 553 insertions(+)

diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index 9a232523c..12009192d 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -95,6 +95,10 @@ jobs:
         **/target/surefire-reports/*.txt
         **/target/surefire-reports/*.xml
 
+  docker:
+    needs: [integration]
+    uses: ./.github/workflows/docker.yml
+
   kubernetes:
     needs: [checkstyle, license, changes] # delay execution
     if: needs.changes.outputs.kubernetes == 'true' ||  github.event_name == 
'push'
diff --git a/.github/workflows/docker.yml b/.github/workflows/docker.yml
new file mode 100644
index 000000000..67ab2a2ab
--- /dev/null
+++ b/.github/workflows/docker.yml
@@ -0,0 +1,151 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+name: Docker example
+
+on:
+  workflow_call:
+    inputs:
+      java-version:
+        default: '8'
+        required: false
+        type: string
+      jdk-distro:
+        default: 'temurin'
+        required: false
+        type: string
+
+jobs:
+  docker:
+    runs-on: ubuntu-20.04
+    name: speculate fail tasks
+    steps:
+    - name: Checkout project
+      uses: actions/checkout@v3
+    - name: Set up JDK ${{ inputs.java-version }}
+      uses: actions/setup-java@v3
+      with:
+        java-version: ${{ inputs.java-version }}
+        distribution: ${{ inputs.jdk-distro }}
+    - name: Cache local Maven repository
+      uses: actions/cache@v3
+      with:
+        path: ~/.m2/repository
+        key: mvn-${{ inputs.java-version }}-docker-${{ hashFiles('**/pom.xml') 
}}
+        restore-keys: |
+          mvn-${{ inputs.java-version }}-docker-
+    - name: Build the docker images
+      run: ./deploy/docker/build.sh
+      shell: bash
+    - name: Start the docker cluster
+      id: up
+      run: |
+        docker compose -f deploy/docker/docker-compose.yml up --wait 
--wait-timeout 60 \
+          --scale coordinator=2 --scale shuffle-server=4 --scale spark-worker=5
+
+        # check all rss containers are up
+        healthy="$(docker container ls | grep rss-server-example | grep 
"(healthy)" | wc -l)"
+        if [ "$healthy" == "6" ]
+        then
+          echo "All RSS containers up"
+        else
+          echo "::error::Could not bring up Docker cluster"
+          exit 1
+        fi
+      shell: bash
+    - name: Prepare example Spark app
+      run: |
+        cat << EOL > example.scala
+
+        import org.apache.spark.TaskContext
+        import org.apache.spark.sql.SaveMode
+
+        // fails iteration (at the end) or delays iteration (each element)
+        // failing tasks negates iterator values, shuffle data of failing task 
must not leak into next stage
+        case class FaultyIterator(it: Iterator[java.lang.Long], fail: Boolean, 
sleep: Option[Int]) extends Iterator[java.lang.Long] {
+          override def hasNext: Boolean = it.hasNext || fail
+          override def next(): java.lang.Long = {
+            // delay iteration if requested
+            if (sleep.isDefined) {
+              val start = System.nanoTime()
+              while (start + sleep.get >= System.nanoTime()) { }
+            }
+
+            // fail at the end if requested
+            if (fail && !it.hasNext) throw new RuntimeException()
+
+            // just iterate
+            if (fail) {
+              -it.next()
+            } else {
+              it.next()
+            }
+          }
+        }
+
+        spark.range(0, 10000000, 1, 100) \
+          .mapPartitions { it =>
+            val ctx = TaskContext.get();
+            FaultyIterator(
+              it,
+              // we fail task two 3 times
+              (ctx.partitionId == 2 && ctx.attemptNumber < 3),
+              // and delay attempt 4 so we see a speculative execution
+              Some(ctx.partitionId == 2 && ctx.attemptNumber >= 3).filter(v => 
v).map(_ => 250000)
+            )
+          } \
+          .groupBy(($"value" / 1000000).cast("int")) \
+          .as[Long, Long] \
+          .mapGroups{(id, it) => (id, it.length)} \
+          .sort("_1") \
+          .write \
+          .mode(SaveMode.Overwrite) \
+          .csv("/shared/result.csv")
+        EOL
+        docker cp example.scala rss-spark-master-1:/
+      shell: bash
+    - name: Run example Spark app
+      run: |
+        docker exec rss-spark-master-1 /bin/bash -c "cat /example.scala | 
/opt/spark/bin/spark-shell \
+          --master spark://rss-spark-master-1:7077 \
+          --conf spark.serializer=org.apache.spark.serializer.KryoSerializer \
+          --conf 
spark.shuffle.manager=org.apache.spark.shuffle.RssShuffleManager \
+          --conf 
spark.rss.coordinator.quorum=rss-coordinator-1:19999,rss-coordinator-2:19999 \
+          --conf spark.rss.storage.type=MEMORY_LOCALFILE \
+          --conf spark.task.maxFailures=4 \
+          --conf spark.speculation=true"
+      shell: bash
+    - name: Assert result
+      run: |
+        docker exec rss-spark-master-1 bash -c "cat /shared/result.csv/*.csv" 
> ./result.csv
+        cat << EOL | diff -y - result.csv
+        0,1000000
+        1,1000000
+        2,1000000
+        3,1000000
+        4,1000000
+        5,1000000
+        6,1000000
+        7,1000000
+        8,1000000
+        9,1000000
+        EOL
+      shell: bash
+    - name: Stop the docker cluster
+      if: always() && steps.up.outcome == 'success'
+      run: docker compose -f deploy/docker/docker-compose.yml down
+      shell: bash
diff --git a/deploy/docker/README.md b/deploy/docker/README.md
new file mode 100644
index 000000000..909b00faf
--- /dev/null
+++ b/deploy/docker/README.md
@@ -0,0 +1,172 @@
+<!--
+  ~ Licensed to the Apache Software Foundation (ASF) under one or more
+  ~ contributor license agreements.  See the NOTICE file distributed with
+  ~ this work for additional information regarding copyright ownership.
+  ~ The ASF licenses this file to You under the Apache License, Version 2.0
+  ~ (the "License"); you may not use this file except in compliance with
+  ~ the License.  You may obtain a copy of the License at
+  ~
+  ~    http://www.apache.org/licenses/LICENSE-2.0
+  ~
+  ~ Unless required by applicable law or agreed to in writing, software
+  ~ distributed under the License is distributed on an "AS IS" BASIS,
+  ~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  ~ See the License for the specific language governing permissions and
+  ~ limitations under the License.
+  -->
+
+# Example Uniffle/Spark docker cluster
+
+This example creates a docker cluster consisting of
+- two coordinators
+- three shuffle servers
+- one Spark master
+- two Spark workers
+
+## Build the docker images
+
+First build the needed docker images:
+
+```bash
+./deploy/docker/build.sh
+```
+
+## Start the docker cluster
+
+Then start the cluster:
+
+```bash
+docker compose -f deploy/docker/docker-compose.yml up
+```
+```
+[+] Running 8/0
+ ✔ Container rss-coordinator-1     Created                                     
                0.0s
+ ✔ Container rss-coordinator-2     Created                                     
                0.0s
+ ✔ Container rss-shuffle-server-1  Created                                     
                0.0s
+ ✔ Container rss-shuffle-server-2  Created                                     
                0.0s
+ ✔ Container rss-shuffle-server-3  Created                                     
                0.0s
+ ✔ Container rss-spark-master-1    Created                                     
                0.0s
+ ✔ Container rss-spark-worker-1    Created                                     
                0.0s
+ ✔ Container rss-spark-worker-2    Created                                     
                0.0s
+```
+
+## Scale the docker cluster
+
+You can scale up and down this cluster, easily.
+
+Let's scale the shuffle servers up from 3 to 4, and the Spark workers from 2 
to 4:
+
+```bash
+docker compose -f deploy/docker/docker-compose.yml scale shuffle-server=4 
spark-worker=4
+```
+```
+[+] Running 11/11
+ ✔ Container rss-coordinator-1     Running                                     
                0.0s
+ ✔ Container rss-coordinator-2     Running                                     
                0.0s
+ ✔ Container rss-shuffle-server-1  Running                                     
                0.0s
+ ✔ Container rss-shuffle-server-2  Running                                     
                0.0s
+ ✔ Container rss-shuffle-server-3  Running                                     
                0.0s
+ ✔ Container rss-shuffle-server-4  Started                                     
                0.0s
+ ✔ Container rss-spark-master-1    Running                                     
                0.0s
+ ✔ Container rss-spark-worker-1    Running                                     
                0.0s
+ ✔ Container rss-spark-worker-2    Running                                     
                0.0s
+ ✔ Container rss-spark-worker-3    Started                                     
                0.0s
+ ✔ Container rss-spark-worker-4    Started                                     
                0.0s
+```
+
+## Use the Spark cluster
+
+Start a Spark shell on the cluster:
+
+```bash
+docker exec -it rss-spark-master-1 /opt/spark/bin/spark-shell \
+  --master spark://rss-spark-master-1:7077 \
+  --conf spark.serializer=org.apache.spark.serializer.KryoSerializer \
+  --conf spark.shuffle.manager=org.apache.spark.shuffle.RssShuffleManager \
+  --conf 
spark.rss.coordinator.quorum=rss-coordinator-1:19999,rss-coordinator-2:19999 \
+  --conf spark.rss.storage.type=MEMORY_LOCALFILE \
+  --conf spark.speculation=true
+```
+
+You can view the Spark master UI at http://localhost:8080/
+
+The following example runs a job where
+- task two fails several times
+- task four observes the execution of a speculative attempt
+
+```Scala
+import org.apache.spark.TaskContext
+
+// fails iteration (at the end) or delays iteration (each element)
+// failing tasks negates iterator values, shuffle data of failing task must 
not leak into next stage
+case class FaultyIterator(it: Iterator[java.lang.Long], fail: Boolean, sleep: 
Option[Int]) extends Iterator[java.lang.Long] {
+  override def hasNext: Boolean = it.hasNext || fail
+  override def next(): java.lang.Long = {
+    // delay iteration if requested
+    if (sleep.isDefined) {
+      val start = System.nanoTime()
+      while (start + sleep.get >= System.nanoTime()) { }
+    }
+
+    // fail at the end if requested
+    if (fail && !it.hasNext) throw new RuntimeException()
+
+    // just iterate
+    if (fail) {
+      -it.next()
+    } else {
+      it.next()
+    }
+  }
+}
+
+// we fail task two 3 times and delay task four so we see a speculative 
execution
+val result = (
+  spark.range(0, 10000000, 1, 100)
+       .mapPartitions { it => {
+         val ctx = TaskContext.get()
+         FaultyIterator(
+           it,
+           ctx.partitionId == 2 && ctx.attemptNumber < 3,
+           Some(ctx.partitionId == 4).filter(v => v).map(_ => 250000)
+         )
+       }}
+       .groupBy(($"value" / 1000000).cast("int"))
+       .as[Long, Long]
+       .mapGroups{(id, it) => (id, it.length)}
+       .sort("_1")
+       .collect
+  )
+```
+
+We can compare the result with the expected outcome:
+```Scala
+assert(result.sameElements(Array((0,1000000), (1,1000000), (2,1000000), 
(3,1000000), (4,1000000), (5,1000000), (6,1000000), (7,1000000), (8,1000000), 
(9,1000000))))
+```
+
+## Stop the docker cluster
+
+Finally, stop the cluster:
+
+```bash
+docker compose -f deploy/docker/docker-compose.yml down
+```
+```
+[+] Running 12/12
+ ✔ Container rss-shuffle-server-1  Removed                                     
               10.5s
+ ✔ Container rss-shuffle-server-2  Removed                                     
               10.7s
+ ✔ Container rss-shuffle-server-3  Removed                                     
               10.5s
+ ✔ Container rss-shuffle-server-4  Removed                                     
               10.6s
+ ✔ Container rss-spark-worker-1    Removed                                     
                0.8s
+ ✔ Container rss-spark-worker-2    Removed                                     
                1.0s
+ ✔ Container rss-spark-worker-3    Removed                                     
                0.9s
+ ✔ Container rss-spark-worker-4    Removed                                     
                1.1s
+ ✔ Container rss-spark-master-1    Removed                                     
                1.6s
+ ✔ Container rss-coordinator-1     Removed                                     
               10.4s
+ ✔ Container rss-coordinator-2     Removed                                     
               10.5s
+ ✔ Network rss_default             Removed                                     
                0.4s
+```
+
+## Dependence
+
+This example needs [docker](https://www.docker.com/get-started/) to be 
installed.
diff --git a/deploy/docker/build.sh b/deploy/docker/build.sh
new file mode 100755
index 000000000..c11e88355
--- /dev/null
+++ b/deploy/docker/build.sh
@@ -0,0 +1,46 @@
+#!/bin/bash
+
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+set -o pipefail
+set -o nounset   # exit the script if you try to use an uninitialised variable
+set -o errexit   # exit the script if any statement returns a non-true return 
value
+
+EXAMPLE_DIR=$(cd "$(dirname "$0")"; pwd)
+RSS_DIR="$EXAMPLE_DIR/../.."
+
+# build RSS docker image
+cd "$RSS_DIR/deploy/kubernetes/docker"
+IMAGE_VERSION=head ./build.sh --push-image false
+
+# patch conf/server.conf
+cp "$RSS_DIR/conf/server.conf" "$EXAMPLE_DIR/uniffle/"
+sed -i -e "s%rss.storage.basePath .*%rss.storage.basePath /tmp/rss%" 
"$EXAMPLE_DIR/uniffle/server.conf"
+sed -i -e "s/rss.storage.type .*/rss.storage.type MEMORY_LOCALFILE/" 
"$EXAMPLE_DIR/uniffle/server.conf"
+sed -i -e "s/rss.coordinator.quorum .*/rss.coordinator.quorum 
rss-coordinator-1:19999,rss-coordinator-2:19999/" 
"$EXAMPLE_DIR/uniffle/server.conf"
+sed -i -e "s/rss.server.buffer.capacity .*/rss.server.buffer.capacity 200mb/" 
"$EXAMPLE_DIR/uniffle/server.conf"
+sed -i -e "s/rss.server.read.buffer.capacity 
.*/rss.server.read.buffer.capacity 100mb/" "$EXAMPLE_DIR/uniffle/server.conf"
+sed -i -e "s/rss.server.disk.capacity .*/rss.server.disk.capacity 100m/" 
"$EXAMPLE_DIR/uniffle/server.conf"
+
+# build RSS example docker image
+docker build -t rss-server-example "$EXAMPLE_DIR/uniffle"
+
+# build Spark example docker image
+cp "$RSS_DIR/client-spark/spark3-shaded/target/"rss-client-spark3-shaded-*.jar 
"$EXAMPLE_DIR/spark/"
+docker build -t rss-spark-example "$EXAMPLE_DIR/spark"
+
diff --git a/deploy/docker/docker-compose.yml b/deploy/docker/docker-compose.yml
new file mode 100644
index 000000000..0d412bedc
--- /dev/null
+++ b/deploy/docker/docker-compose.yml
@@ -0,0 +1,86 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+version: "3"
+name: rss
+
+services:
+  coordinator:
+    image: rss-server-example:latest
+    deploy:
+      mode: replicated
+      replicas: 2
+    environment:
+      SERVICE_NAME: coordinator
+      RPC_PORT: 19999
+      XMX_SIZE: 1g
+    expose:
+      - 19998
+      - 19999
+
+  shuffle-server:
+    image: rss-server-example:latest
+    deploy:
+      mode: replicated
+      replicas: 3
+    environment:
+      SERVICE_NAME: server
+      RPC_PORT: 19999
+      XMX_SIZE: 1g
+    links:
+      - coordinator
+    expose:
+      - 19998
+      - 19999
+
+  spark-master:
+    image: rss-spark-example:latest
+    command: /opt/spark/bin/spark-class org.apache.spark.deploy.master.Master 
-h rss-spark-master-1
+    environment:
+      MASTER: spark://rss-spark-master-1:7077
+      SPARK_PUBLIC_DNS: localhost
+      SPARK_MASTER_WEBUI_PORT: 8080
+      PYSPARK_PYTHON: python${PYTHON_VERSION:-3.8}
+      PYSPARK_DRIVER_PYTHON: python${PYTHON_VERSION:-3.8}
+    volumes:
+      - shared:/shared:rw
+    expose:
+      - 7077
+    ports:
+      - 4040:4040
+      - 7077:7077
+      - 8080:8080
+
+  spark-worker:
+    image: rss-spark-example:latest
+    command: /opt/spark/bin/spark-class org.apache.spark.deploy.worker.Worker 
spark://rss-spark-master-1:7077
+    environment:
+      SPARK_WORKER_CORES: 1
+      SPARK_WORKER_MEMORY: 1g
+      SPARK_WORKER_PORT: 8080
+      SPARK_WORKER_WEBUI_PORT: 8881
+      SPARK_PUBLIC_DNS: localhost
+    volumes:
+      - shared:/shared:rw
+    deploy:
+      mode: replicated
+      replicas: 2
+    links:
+      - spark-master
+
+volumes:
+  shared:
diff --git a/deploy/docker/spark/Dockerfile b/deploy/docker/spark/Dockerfile
new file mode 100644
index 000000000..f15ff27e2
--- /dev/null
+++ b/deploy/docker/spark/Dockerfile
@@ -0,0 +1,26 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+FROM apache/spark:3.5.0
+
+ENV PATH="${PATH}:/opt/spark/bin"
+
+USER root
+RUN mkdir -p /home/spark /shared; chown spark:spark /home/spark /shared
+COPY rss-client-spark3-shaded-*.jar /opt/spark/jars
+VOLUME /shared
+USER spark
diff --git a/deploy/docker/uniffle/Dockerfile b/deploy/docker/uniffle/Dockerfile
new file mode 100644
index 000000000..9beb8a3f0
--- /dev/null
+++ b/deploy/docker/uniffle/Dockerfile
@@ -0,0 +1,23 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+FROM rss-server:head
+
+COPY start.sh /data/rssadmin/rss/bin
+COPY server.conf /data/rssadmin/rss/conf
+
+HEALTHCHECK --interval=3s --timeout=3s --start-period=60s CMD lsof 
-i:"${RPC_PORT}" -sTCP:LISTEN
diff --git a/deploy/docker/uniffle/start.sh b/deploy/docker/uniffle/start.sh
new file mode 100644
index 000000000..030a5633c
--- /dev/null
+++ b/deploy/docker/uniffle/start.sh
@@ -0,0 +1,41 @@
+#!/bin/bash
+
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+basedir='/data/rssadmin/rss'
+cd $basedir || exit
+
+coordinator_conf=$basedir'/conf/coordinator.conf'
+echo "coordinator_conf: $coordinator_conf"
+server_conf=$basedir'/conf/server.conf'
+echo "server_conf: $server_conf"
+
+if [ "$SERVICE_NAME" == "coordinator" ];then
+    start_script=${basedir}'/bin/start-coordinator.sh'
+    log_file=$basedir'/logs/coordinator.log'
+fi
+
+if [ "$SERVICE_NAME" == "server" ];then
+    start_script=${basedir}'/bin/start-shuffle-server.sh'
+    log_file=$basedir'/logs/shuffle_server.log'
+fi
+
+touch "${log_file}"
+${start_script} | grep -v "class path is"
+echo
+tail -n +0 -f "${log_file}"
diff --git a/deploy/kubernetes/docker/build.sh 
b/deploy/kubernetes/docker/build.sh
index 67e3d8473..58478c352 100755
--- a/deploy/kubernetes/docker/build.sh
+++ b/deploy/kubernetes/docker/build.sh
@@ -17,6 +17,10 @@
 # limitations under the License.
 #
 
+set -o pipefail
+set -o nounset   # exit the script if you try to use an uninitialised variable
+set -o errexit   # exit the script if any statement returns a non-true return 
value
+
 function exit_with_usage() {
   set +x
   echo "./build.sh - Tool for building docker images of Remote Shuffle Service"

Reply via email to