(amoro) branch master updated: [AMORO-2815] Adjust docker images of apache amoro project (#2818)

jinsongzhou Fri, 10 May 2024 20:28:19 -0700

This is an automated email from the ASF dual-hosted git repository.

jinsongzhou pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/amoro.git



The following commit(s) were added to refs/heads/master by this push:
     new 5291c152e [AMORO-2815] Adjust docker images of apache amoro project 
(#2818)
5291c152e is described below

commit 5291c152e60b68c6a3115d74a4308bb868b5d1c8
Author: tcodehuber <[email protected]>
AuthorDate: Sat May 11 11:28:09 2024 +0800

    [AMORO-2815] Adjust docker images of apache amoro project (#2818)
    
    * [AMORO-2815] Adjust docker images of apache amoro project
    
    * code refactor
    
    * build master-snapshot image
---
 .github/workflows/docker-images.yml                |  69 +-----
 .../dist/src/main/arctic-bin/conf/config.yaml      |  12 +-
 charts/amoro/values.yaml                           |   8 +-
 docker/README.md                                   |  13 +-
 docker/amoro/Dockerfile                            |   2 +-
 docker/build.sh                                    | 124 +---------
 docker/datanode/Dockerfile                         |  70 ------
 docker/datanode/run_dn.sh                          |  40 ----
 docker/demo-cluster.sh                             | 186 ---------------
 docker/namenode/Dockerfile                         |  76 ------
 docker/namenode/entrypoint.sh                      | 124 ----------
 docker/namenode/export_container_ip.sh             |  30 ---
 docker/namenode/run_nn.sh                          |  37 ---
 docker/quickdemo/Dockerfile                        |  74 ------
 docker/quickdemo/flink-conf.yaml                   | 260 ---------------------
 docs/admin-guides/deployment-on-kubernetes.md      |   8 +-
 docs/admin-guides/managing-optimizers.md           |  32 +--
 17 files changed, 50 insertions(+), 1115 deletions(-)

diff --git a/.github/workflows/docker-images.yml 
b/.github/workflows/docker-images.yml
index 52e732604..3494dc4b0 100644
--- a/.github/workflows/docker-images.yml
+++ b/.github/workflows/docker-images.yml
@@ -62,7 +62,7 @@ jobs:
           flavor: |
             latest=auto
           images: |
-            name=arctic163/amoro
+            name=apache/amoro
           tags: |
             type=ref,event=branch,enable=${{ matrix.hadoop == 'v3' 
}},suffix=-snapshot
             type=ref,event=branch,enable=${{ matrix.hadoop == 'v3' 
}},suffix=-snapshot-hadoop3
@@ -124,7 +124,7 @@ jobs:
           flavor: |
             latest=auto
           images: |
-            name=arctic163/optimizer-flink
+            name=apache/amoro-flink-optimizer
           tags: |
             type=ref,event=branch,enable=${{ matrix.flink == '1.14.6' 
}},suffix=-snapshot
             type=ref,event=branch,enable=${{ matrix.flink == '1.14.6' 
}},suffix=-snapshot-flink1.14
@@ -204,10 +204,11 @@ jobs:
           flavor: |
             latest=auto
           images: |
-            name=arctic163/optimizer-spark
+            name=apache/amoro-spark-optimizer
           tags: |
+            type=ref,event=branch,enable=${{ matrix.spark == '3.3.3' 
}},suffix=-snapshot
             type=ref,event=branch,enable=${{ matrix.spark == '3.3.3' 
}},suffix=-snapshot-spark3.3
-            type=semver,enable=${{ matrix.spark == '3.3.3' 
}},pattern={{version}}-spark3.3
+            type=semver,enable=${{ matrix.spark == '3.3.3' 
}},pattern={{version}}
 
       - name: Print tags
         run: echo '${{ steps.meta.outputs.tags }}'
@@ -249,63 +250,3 @@ jobs:
             
OPTIMIZER_JOB=amoro-ams/amoro-ams-optimizer/amoro-optimizer-spark/target/amoro-optimizer-spark-${{
 env.AMORO_VERSION }}-jar-with-dependencies.jar
 
 
-  docker-quickdemo:
-    name: Push Amoro Quick-Demo Docker Image to Docker Hub
-    runs-on: ubuntu-latest
-    needs: docker-amoro
-    if: ${{ startsWith(github.repository, 'apache/') && startsWith(github.ref, 
'refs/tags/' )}}
-    steps:
-      - uses: actions/checkout@v3
-      - name: Set up JDK 8
-        uses: actions/setup-java@v3
-        with:
-          java-version: '8'
-          distribution: 'temurin'
-          cache: maven
-      - name: Set up QEMU
-        uses: docker/setup-qemu-action@v2
-
-      - name: Set up Docker Buildx
-        uses: docker/setup-buildx-action@v2
-
-      - name: Set up Docker tags
-        uses: docker/metadata-action@v5
-        id: meta
-        with:
-          flavor: |
-            latest=auto
-          images: |
-            name=arctic163/quickdemo
-          tags: |
-            type=ref,event=branch,suffix=-snapshot
-            type=semver,pattern={{version}}
-
-      - name: Print tags
-        run: echo '${{ steps.meta.outputs.tags }}'
-
-      - name: Set Amoro Tag
-        id: tag
-        run: |
-          AMORO_TAG=$(echo "${{ steps.meta.outputs.tags }}" | head -n 1) && 
echo "AMORO_TAG=${AMORO_TAG#*:}" >> $GITHUB_OUTPUT
-
-      - name: Login to Docker Hub
-        uses: docker/login-action@v2
-        with:
-          username: ${{ secrets.DOCKERHUB_USER }}
-          password: ${{ secrets.DOCKERHUB_TOKEN }}
-
-      - name: Build optimizer module with Maven
-        run: mvn clean package -pl 
'amoro-mixed-format/amoro-mixed-format-flink/v1.17/amoro-mixed-format-flink-runtime-1.17'
 -am -e -DskipTests -B -ntp
-
-      - name: Build and Push Flink Optimizer Docker Image
-        uses: docker/build-push-action@v4
-        with:
-          context: .
-          push: true
-          file: docker/quickdemo/Dockerfile
-          platforms: linux/amd64,linux/arm64
-          tags: ${{ steps.meta.outputs.tags }}
-          build-args: |
-            AMORO_TAG=${{ steps.tag.outputs.AMORO_TAG }}
-            FLINK_VERSION=1.18.1
-            ICEBERG_VERSION=1.3.1
diff --git a/amoro-ams/dist/src/main/arctic-bin/conf/config.yaml 
b/amoro-ams/dist/src/main/arctic-bin/conf/config.yaml
index d6cbb7ff2..a41f0774b 100644
--- a/amoro-ams/dist/src/main/arctic-bin/conf/config.yaml
+++ b/amoro-ams/dist/src/main/arctic-bin/conf/config.yaml
@@ -142,7 +142,7 @@ containers:
 #   container-impl: 
com.netease.arctic.server.manager.KubernetesOptimizerContainer
 #    properties:
 #     kube-config-path: ～/.kube/config
-#     image: arctic163/amoro:0.6
+#     image: apache/amoro:{version}
 #     namespace: default
 
 #  - name: flinkContainer
@@ -156,7 +156,7 @@ containers:
 #      export.FLINK_CONF_DIR: /opt/flink/conf/                     # Flink 
config dir
 #      # flink kubernetes application properties.
 #      job-uri: "local:///opt/flink/usrlib/optimizer-job.jar"      # Optimizer 
job main jar for kubernetes application
-#      flink-conf.kubernetes.container.image: 
"arctic163/optimizer-flink:{version}"   # Optimizer image ref
+#      flink-conf.kubernetes.container.image: 
"apache/amoro-flink-optimizer:{version}"   # Optimizer image ref
 #      flink-conf.kubernetes.service-account: flink                # Service 
account that is used within kubernetes cluster.
 
 #containers:
@@ -175,7 +175,7 @@ containers:
 #      ams-optimizing-uri: thrift://ams.amoro.service.local:1261   # AMS 
optimizing uri
 #      spark-conf.spark.dynamicAllocation.enabled: "true"          # Enabling 
DRA feature can make full use of computing resources
 #      spark-conf.spark.shuffle.service.enabled: "false"           # If spark 
DRA is used on kubernetes, we should set it false
-#      spark-conf.spark.dynamicAllocation.shuffleTracking.enabled: "true"      
              # Enables shuffle file tracking for executors, which allows 
dynamic allocation without the need for an external shuffle service
-#      spark-conf.spark.kubernetes.container.image: 
"arctic163/optimizer-spark:{version}"    # Optimizer image ref
-#      spark-conf.spark.kubernetes.namespace: <spark-namespace>                
              # Namespace that is used within kubernetes cluster
-#      spark-conf.spark.kubernetes.authenticate.driver.serviceAccountName: 
<spark-sa>        # Service account that is used within kubernetes cluster.
+#      spark-conf.spark.dynamicAllocation.shuffleTracking.enabled: "true"      
                    # Enables shuffle file tracking for executors, which allows 
dynamic allocation without the need for an external shuffle service
+#      spark-conf.spark.kubernetes.container.image: 
"apache/amoro-spark-optimizer:{version}"       # Optimizer image ref
+#      spark-conf.spark.kubernetes.namespace: <spark-namespace>                
                    # Namespace that is used within kubernetes cluster
+#      spark-conf.spark.kubernetes.authenticate.driver.serviceAccountName: 
<spark-sa>              # Service account that is used within kubernetes 
cluster.
diff --git a/charts/amoro/values.yaml b/charts/amoro/values.yaml
index 3273ce88f..ee8ed2eae 100644
--- a/charts/amoro/values.yaml
+++ b/charts/amoro/values.yaml
@@ -35,7 +35,7 @@ nameOverride: ""
 image:
   ## @param image.repository AMS image repository
   ##
-  repository: arctic163/amoro
+  repository: apache/amoro
   ## @param image.tag for AMS image repository
   ##
   tag: ""
@@ -197,7 +197,7 @@ optimizer:
     image:
       ## @param optimizer.flink.image.repository the image repository
       ##
-      repository: arctic163/optimizer-flink
+      repository: apache/amoro-flink-optimizer
       ## @param optimizer.flink.image.tag the image tag, if not set, the 
default value is the same with amoro image tag
       ##
       tag: ~
@@ -222,7 +222,7 @@ optimizer:
     image:
       ## @param optimizer.spark.image.repository the image repository
       ##
-      repository: arctic163/optimizer-spark
+      repository: apache/amoro-spark-optimizer
       ## @param optimizer.spark.image.tag the image tag, if not set, the 
default value is the same with amoro image tag
       ##
       tag: ~
@@ -254,7 +254,7 @@ optimizer:
     properties:
       namespace: "default"
       kube-config-path: "～/.kube/config"
-      image: "arctic163/amoro:0.6"
+      image: "apache/amoro:0.6"
   extra: []
 
 ## Configure the ingress resource that allows you to access the
diff --git a/docker/README.md b/docker/README.md
index 6849e02d3..08322ffa9 100644
--- a/docker/README.md
+++ b/docker/README.md
@@ -22,11 +22,8 @@ We provide a bash script to help you build docker image 
easier.
 You can control which image is built by the follow parameters :
 
 - amoro
-- optimizer-flink
-- optimizer-spark
-- quickdemo
-- namenode
-- datanode
+- amoro-flink-optimizer
+- amoro-spark-optimizer
 
 e.g.
 
@@ -34,8 +31,8 @@ e.g.
 ./build.sh amoro
 ```
 
-- NOTICE: The amoro image, quickdemo image and optimizer-flink image required 
the project had been packaged. 
-so run `mvn package -pl '!trino'` before build amoro, quickdemo or 
optimizer-flink image.
+- NOTICE: The amoro image, amoro-spark-optimizer image and 
amoro-flink-optimizer image required the project had been packaged.
+  So run `mvn package -pl '!amoro-mixed-fomrat/amoro-mixed-format-trino'` 
before building amoro, amoro-spark-optimizer, or amoro-flink-optimizer image.
 
 You can speed up image building via 
 
@@ -43,7 +40,7 @@ You can speed up image building via
 ./build.sh \
   --apache-archive https://mirrors.aliyun.com/apache \
   --debian-mirror https://mirrors.aliyun.com  \
-  optimizer-flink
+  amoro-flink-optimizer
 ```
 
 more options see `./build.sh --help`
diff --git a/docker/amoro/Dockerfile b/docker/amoro/Dockerfile
index d74f0e32c..28117acad 100644
--- a/docker/amoro/Dockerfile
+++ b/docker/amoro/Dockerfile
@@ -20,7 +20,7 @@
 #   Run the docker command below under project dir.
 #      docker build \
 #        --file docker/amoro/Dockerfile \
-#        --tag arctic163/amoro:tagname
+#        --tag apache/amoro:tagname
 #        .
 
 FROM eclipse-temurin:8-jdk-jammy as builder
diff --git a/docker/build.sh b/docker/build.sh
index 7cc3182d7..724fa9610 100755
--- a/docker/build.sh
+++ b/docker/build.sh
@@ -26,7 +26,6 @@ cd $CURRENT_DIR
 AMORO_VERSION=`cat $PROJECT_HOME/pom.xml | grep 'amoro-parent' -C 3 | grep -Eo 
'<version>.*</version>' | awk -F'[><]' '{print $3}'`
 FLINK_VERSION=1.15.3
 SPARK_VERSION=3.3.3
-HADOOP_VERSION=2.10.2
 DEBIAN_MIRROR=http://deb.debian.org
 APACHE_ARCHIVE=https://archive.apache.org/dist
 
FLINK_OPTIMIZER_JOB_PATH=amoro-ams/amoro-ams-optimizer/amoro-optimizer-flink/target/amoro-optimizer-flink-${AMORO_VERSION}-jar-with-dependencies.jar
@@ -34,7 +33,6 @@ 
FLINK_OPTIMIZER_JOB=${PROJECT_HOME}/${FLINK_OPTIMIZER_JOB_PATH}
 
SPARK_OPTIMIZER_JOB_PATH=amoro-ams/amoro-ams-optimizer/amoro-optimizer-spark/target/amoro-optimizer-spark-${AMORO_VERSION}-jar-with-dependencies.jar
 SPARK_OPTIMIZER_JOB=${PROJECT_HOME}/${SPARK_OPTIMIZER_JOB_PATH}
 AMORO_TAG=$AMORO_VERSION
-ALSO_MAKE=true
 MAVEN_MIRROR=https://repo.maven.apache.org/maven2
 
 
@@ -44,23 +42,18 @@ Usage: $0 [options] [image]
 Build for Amoro demo docker images.
 
 Images:
-    quickdemo               Build Amoro QuickStart Image, for run flink 
ingestion job in quick-demo http://amoro.apache.org/quick-demo/
-    namenode                Build a hadoop namenode container for quick start 
demo.
-    datanode                Build a hadoop datanode container for quick start 
demo.
-    optimizer-flink         Build official Amoro optimizer deployed with flink 
engine for production environments.
-    optimizer-spark         Build official Amoro optimizer deployed with spark 
engine for production environments.
+    amoro-flink-optimizer   Build official Amoro optimizer deployed with flink 
engine for production environments.
+    amoro-spark-optimizer   Build official Amoro optimizer deployed with spark 
engine for production environments.
     amoro                   Build official Amoro image used for production 
environments.
 
 Options:
     --flink-version         Flink binary release version, default is 1.15.3, 
format must be x.y.z
     --spark-version         Spark binary release version, default is 3.3.3, 
format must be x.y.z
-    --hadoop-version        Hadoop binary release version, default is 2.10.2, 
format must be x.y.z
     --apache-archive        Apache Archive url, default is 
https://archive.apache.org/dist
     --debian-mirror         Mirror url of debian, default is 
http://deb.debian.org
     --maven-mirror          Mirror url of maven, default is 
https://repo.maven.apache.org/maven2
     --optimizer-job         Location of flink/spark optimizer job
-    --tag                   Tag for 
amoro/optimizer-flink/optimizer-spark/quickdemo image.
-    --also-make             Also make amoro when build quickdemo, if set to 
false, it will pull from hub or use exists dependency.
+    --tag                   Tag for 
amoro/amoro-flink-optimizer/amoro-spark-optimizer image.
     --dry-run               If this set to true, will not call 'docker build'
 EOF
 }
@@ -72,7 +65,7 @@ i=1;
 j=$#;
 while [ $i -le $j ]; do
   case $1 in
-    quickdemo|namenode|datanode|optimizer-flink|optimizer-spark|amoro)
+    amoro-flink-optimizer|amoro-spark-optimizer|amoro)
     ACTION=$1;
     i=$((i+1))
     shift 1
@@ -92,13 +85,6 @@ while [ $i -le $j ]; do
     shift 1
     ;;
 
-    "--hadoop-version")
-    shift 1
-    HADOOP_VERSION=$1
-    i=$((i+2))
-    shift 1
-    ;;
-
     "--apache-archive")
     shift 1
     APACHE_ARCHIVE=$1
@@ -125,12 +111,6 @@ while [ $i -le $j ]; do
     i=$((i+2))
     ;;
 
-    "--also-make")
-    shift 1
-    ALSO_MAKE=$1
-    i=$((i+2))
-    ;;
-
     "--maven-mirror")
     shift 1
     MAVEN_MIRROR=$1
@@ -153,7 +133,6 @@ function print_env() {
   echo "SET FLINK_MAJOR_VERSION=${FLINK_MAJOR_VERSION}"
   echo "SET SPARK_VERSION=${SPARK_VERSION}"
   echo "SET SPARK_MAJOR_VERSION=${SPARK_MAJOR_VERSION}"
-  echo "SET HADOOP_VERSION=${HADOOP_VERSION}"
   echo "SET APACHE_ARCHIVE=${APACHE_ARCHIVE}"
   echo "SET DEBIAN_MIRROR=${DEBIAN_MIRROR}"
   echo "SET AMORO_VERSION=${AMORO_VERSION}"
@@ -170,40 +149,8 @@ function print_image() {
    echo "Start Build ${image}:${tag} Image"
 }
 
-function build_namenode() {
-  echo "=============================================="
-  echo "               arctic163/namenode     "
-  echo "=============================================="
-  echo "Start Build arctic163/namenode Image"
-
-  set -x
-
-  find ./namenode -name "*.sh" | dos2unix
-  docker build -t arctic163/namenode \
-    --build-arg HADOOP_VERSION=${HADOOP_VERSION} \
-    --build-arg APACHE_ARCHIVE=${APACHE_ARCHIVE} \
-    --build-arg DEBIAN_MIRROR=${DEBIAN_MIRROR} \
-    namenode/.
-}
-
-function build_datanode() {
-  echo "=============================================="
-  echo "               arctic163/datanode     "
-  echo "=============================================="
-  echo "Start Build arctic163/datanode Image"
-
-  set -x
-
-  find ./datanode -name "*.sh" | dos2unix
-  docker build -t arctic163/datanode \
-    --build-arg HADOOP_VERSION=${HADOOP_VERSION} \
-    --build-arg APACHE_ARCHIVE=${APACHE_ARCHIVE} \
-    --build-arg DEBIAN_MIRROR=${DEBIAN_MIRROR} \
-    datanode/.
-}
-
 function build_optimizer_flink() {
-    local IMAGE_REF=arctic163/optimizer-flink
+    local IMAGE_REF=apache/amoro-flink-optimizer
     local IMAGE_TAG=$AMORO_TAG-flink${FLINK_MAJOR_VERSION}
     print_image $IMAGE_REF $IMAGE_TAG
 
@@ -226,7 +173,7 @@ function build_optimizer_flink() {
 }
 
 function build_optimizer_spark() {
-    local IMAGE_REF=arctic163/optimizer-spark
+    local IMAGE_REF=apache/amoro-spark-optimizer
     local IMAGE_TAG=$AMORO_TAG-spark${SPARK_MAJOR_VERSION}
     print_image $IMAGE_REF $IMAGE_TAG
 
@@ -249,7 +196,7 @@ function build_optimizer_spark() {
 }
 
 function build_amoro() {
-  local IMAGE_REF=arctic163/amoro
+  local IMAGE_REF=apache/amoro
   local IMAGE_TAG=$AMORO_TAG
   print_image $IMAGE_REF $IMAGE_TAG
 
@@ -268,65 +215,12 @@ function build_amoro() {
   return $?
 }
 
-function build_quickdemo() {
-    local IMAGE_REF=arctic163/quickdemo
-    local IMAGE_TAG=$AMORO_TAG
-
-    local 
FLINK_CONNECTOR_BINARY=${PROJECT_HOME}/amoro-mixed-format/amoro-mixed-format-flink/v${FLINK_MAJOR_VERSION}/amoro-mixed-format-flink-runtime-${FLINK_MAJOR_VERSION}/target/amoro-mixed-format-flink-runtime-${FLINK_MAJOR_VERSION}-${AMORO_VERSION}.jar
-    local 
SPARK_CONNECTOR_BINARY=${PROJECT_HOME}/amoro-mixed-format/amoro-mixed-format-spark/v${SPARK_MAJOR_VERSION}/amoro-mixed-format-spark-${SPARK_MAJOR_VERSION}-runtime/target/amoro-mixed-format-spark-${SPARK_MAJOR_VERSION}-runtime-${AMORO_VERSION}.jar
-
-    if [ ! -f "${FLINK_CONNECTOR_BINARY}" ]; then
-        echo "amoro-mixed-flink-connector not exists in 
${FLINK_CONNECTOR_BINARY}, run 'mvn clean package -pl !mixed/trino' first. "
-        exit  1
-    fi
-
-    if [ ! -f "${SPARK_CONNECTOR_BINARY}" ]; then
-        echo "amoro-mixed-spark-connector not exists in 
${SPARK_CONNECTOR_BINARY}, run 'mvn clean package -pl !mixed/trino' first. "
-        exit  1
-    fi
-
-    if [ "${ALSO_MAKE}" == "true" ]; then
-        echo "Build dependency Amoro image."
-        build_amoro
-        if [ "$?" -ne 0 ]; then
-          echo "Build required Amoro image failed."
-          exit 1
-        fi
-    fi
-
-    print_image $IMAGE_REF "$IMAGE_TAG"
-
-    set -x
-    cd "$PROJECT_HOME" || exit
-
-    docker build -t $IMAGE_REF:$IMAGE_TAG \
-      --build-arg AMORO_TAG=${AMORO_TAG} \
-      --build-arg DEBIAN_MIRROR=${DEBIAN_MIRROR} \
-      --build-arg APACHE_ARCHIVE=${APACHE_ARCHIVE} \
-      --build-arg FLINK_VERSION=${FLINK_VERSION} \
-      --build-arg SPARK_VERSION=${SPARK_VERSION} \
-      -f docker/quickdemo/Dockerfile .
-}
-
-
 case "$ACTION" in
-  quickdemo)
-    print_env
-    build_quickdemo
-    ;;
-  namenode)
-    print_env
-    build_namenode
-    ;;
-  datanode)
-    print_env
-    build_datanode
-    ;;
-  optimizer-flink)
+  amoro-flink-optimizer)
     print_env
     build_optimizer_flink
     ;;
-  optimizer-spark)
+  amoro-spark-optimizer)
     print_env
     build_optimizer_spark
     ;;
diff --git a/docker/datanode/Dockerfile b/docker/datanode/Dockerfile
deleted file mode 100644
index 911d7c4c9..000000000
--- a/docker/datanode/Dockerfile
+++ /dev/null
@@ -1,70 +0,0 @@
-
-#  Licensed to the Apache Software Foundation (ASF) under one
-#  or more contributor license agreements.  See the NOTICE file
-#  distributed with this work for additional information
-#  regarding copyright ownership.  The ASF licenses this file
-#  to you under the Apache License, Version 2.0 (the
-#  "License"); you may not use this file except in compliance
-#  with the License.  You may obtain a copy of the License at
-#
-#      http://www.apache.org/licenses/LICENSE-2.0
-#
-#  Unless required by applicable law or agreed to in writing, software
-#  distributed under the License is distributed on an "AS IS" BASIS,
-#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#  See the License for the specific language governing permissions and
-# limitations under the License.
-
-FROM openjdk:8u332-jdk
-MAINTAINER Arctic
-USER root
-
-# Default to UTF-8 file.encoding
-ENV LANG C.UTF-8
-
-ARG HADOOP_VERSION=2.10.2
-ARG APACHE_ARCHIVE=https://archive.apache.org/dist
-ARG DEBIAN_MIRROR=http://deb.debian.org
-
-ENV HADOOP_VERSION=${HADOOP_VERSION}
-ENV 
HADOOP_URL=${APACHE_ARCHIVE}/hadoop/common/hadoop-${HADOOP_VERSION}/hadoop-${HADOOP_VERSION}.tar.gz
-ENV HADOOP_DN_PORT 50075
-
-RUN sed -i "s#http://deb.debian.org#${DEBIAN_MIRROR}#g"; /etc/apt/sources.list
-
-RUN set -x \
-    && DEBIAN_FRONTEND=noninteractive apt-get -yq update && apt-get -yq 
install curl wget netcat procps dos2unix \
-    && apt-get clean
-
-RUN echo "Fetch URL2 is : ${HADOOP_URL}" \
-    && curl -fSL "${HADOOP_URL}" -o /tmp/hadoop.tar.gz \
-    && mkdir -p /opt/hadoop-$HADOOP_VERSION/logs \
-    && tar -xvf /tmp/hadoop.tar.gz -C /opt/ \
-    && rm /tmp/hadoop.tar.gz* \
-    && mkdir /etc/hadoop \
-    && cp /opt/hadoop-$HADOOP_VERSION/etc/hadoop/mapred-site.xml.template 
/opt/hadoop-$HADOOP_VERSION/etc/hadoop/mapred-site.xml \
-    && mkdir /hadoop-data
-
-ENV HADOOP_PREFIX=/opt/hadoop-$HADOOP_VERSION
-ENV HADOOP_CONF_DIR=/etc/hadoop
-ENV MULTIHOMED_NETWORK=1
-ENV HADOOP_HOME=${HADOOP_PREFIX}
-ENV HADOOP_INSTALL=${HADOOP_HOME}
-ENV USER=root
-ENV PATH /usr/bin:/bin:$HADOOP_PREFIX/bin/:$PATH
-
-# Exposing a union of ports across hadoop versions
-# Well known ports including ssh
-EXPOSE 0-1024 4040 7000-10100 5000-5100 50000-50200 58188 58088 58042
-
-
-
-ENV HDFS_CONF_dfs_datanode_data_dir=file:///hadoop/dfs/data
-RUN mkdir -p /hadoop/dfs/data
-VOLUME /hadoop/dfs/data
-
-ADD run_dn.sh /run_dn.sh
-RUN chmod a+x /run_dn.sh && dos2unix /run_dn.sh
-
-ENTRYPOINT ["/bin/bash", "/run_dn.sh"]
-
diff --git a/docker/datanode/run_dn.sh b/docker/datanode/run_dn.sh
deleted file mode 100644
index 4ff8a5529..000000000
--- a/docker/datanode/run_dn.sh
+++ /dev/null
@@ -1,40 +0,0 @@
-#!/bin/bash
-
-#  Licensed to the Apache Software Foundation (ASF) under one
-#  or more contributor license agreements.  See the NOTICE file
-#  distributed with this work for additional information
-#  regarding copyright ownership.  The ASF licenses this file
-#  to you under the Apache License, Version 2.0 (the
-#  "License"); you may not use this file except in compliance
-#  with the License.  You may obtain a copy of the License at
-#
-#      http://www.apache.org/licenses/LICENSE-2.0
-#
-#  Unless required by applicable law or agreed to in writing, software
-#  distributed under the License is distributed on an "AS IS" BASIS,
-#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#  See the License for the specific language governing permissions and
-# limitations under the License.
-
-interfaces=( "en0" "eth0" )
-
-ipAddr=""
-for interface in "${interfaces[@]}"
-do
-  ipAddr=`ifconfig $interface | grep -Eo 'inet (addr:)?([0-9]+\.){3}[0-9]+' | 
grep -Eo '([0-9]+\.){3}[0-9]+' | grep -v '127.0.0.1' | head`
-  if [ -n "$ipAddr" ]; then
-    break
-  fi
-done
-
-echo "Container IP is set to : $ipAddr"
-export MY_CONTAINER_IP=$ipAddr
-
-
-datadir=`echo $HDFS_CONF_dfs_datanode_data_dir | perl -pe 's#file://##'`
-if [ ! -d $datadir ]; then
-  echo "Datanode data directory not found: $datadir"
-  exit 2
-fi
-
-$HADOOP_PREFIX/bin/hdfs --config $HADOOP_CONF_DIR datanode
diff --git a/docker/demo-cluster.sh b/docker/demo-cluster.sh
deleted file mode 100755
index 7777b8b6c..000000000
--- a/docker/demo-cluster.sh
+++ /dev/null
@@ -1,186 +0,0 @@
-#!/usr/bin/env bash
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#  *
-#     http://www.apache.org/licenses/LICENSE-2.0
-#  *
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-
-# default to use the last built image when no specified tag given
-AMORO_TAG=latest
-
-
-CURRENT_DIR="$( cd "$(dirname "$0")" ; pwd -P )"
-
-
-DOCKER_COMPOSE="${CURRENT_DIR}/docker-compose.yml"
-HADOOP_CONF="${CURRENT_DIR}/hadoop-config"
-VOLUMES_DIR="${CURRENT_DIR}/volumes"
-
-function usage() {
-    cat <<EOF
-Usage: $0 [options] [command]
-Build for Amoro demo docker images.
-
-Commands:
-    start                   Setup demo cluster
-    stop                    Stop demo cluster and remove containers, volume 
data will be kept.
-    clean                   clean volume data.
-
-Options:
-    -v    --version         Setup Amoro image version. default is ${AMORO_TAG}
-
-EOF
-}
-
-
-COMMAND=none
-
-i=1;
-j=$#;
-while [ $i -le $j ]; do
-    case $1 in
-      start|stop|clean)
-      COMMAND=$1;
-      i=$((i+1))
-      shift 1
-      ;;
-
-      "-v"|"--version")
-      shift 1
-      AMORO_TAG=$1
-      i=$((i+2))
-      shift 1
-      ;;
-
-      *)
-      echo "Unknown args of $1"
-      usage
-      exit 1
-      ;;
-    esac
-done
-
-
-function create_docker_compose() {
-  echo "Write docker-compose file to $DOCKER_COMPOSE"
-  cat <<EOT >> docker-compose.yml
-version: "3"
-services:
-  namenode:
-    image: arctic163/namenode
-    hostname: namenode
-    container_name: namenode
-    environment:
-      - CLUSTER_NAME=demo-cluster
-      - CORE_CONF_hadoop_http_staticuser_user=root
-      - CORE_CONF_hadoop_proxyuser_amoro_hosts=*
-      - CORE_CONF_hadoop_proxyuser_amoro_groups=*
-      - HDFS_CONF_dfs_replication=1
-      - HDFS_CONF_dfs_permissions_enabled=false
-      - HDFS_CONF_dfs_webhdfs_enabled=true
-    networks:
-      - amoro_network
-    ports:
-      - 10070:50070
-      - 8020:8020
-    volumes:
-      - ${HADOOP_CONF}:/etc/hadoop
-      - ${VOLUMES_DIR}/namenode:/hadoop/dfs/name
-
-  datanode:
-    image: arctic163/datanode
-    container_name: datanode
-    environment:
-      - CLUSTER_NAME=demo-cluster
-    hostname: datanode
-    volumes:
-      - ${HADOOP_CONF}:/etc/hadoop
-      - ${VOLUMES_DIR}/datanode:/hadoop/dfs/data
-    networks:
-      - amoro_network
-    ports:
-      - 10075:50075
-      - 10010:50010
-    depends_on:
-      - namenode
-
-  quickdemo:
-    image: arctic163/quickdemo:${AMORO_TAG}
-    container_name: quickdemo
-    ports:
-      - 8081:8081
-      - 1630:1630
-      - 1260:1260
-    environment:
-      - JVM_XMS=1024
-    networks:
-      - amoro_network
-    volumes:
-      - ${VOLUMES_DIR}/amoro:/tmp/amoro
-    command: "ams"
-    tty: true
-    stdin_open: true
-
-networks:
-  amoro_network:
-    driver: bridge
-EOT
-}
-
-
-function start() {
-  echo "SET AMORO_VERSION=${AMORO_TAG}"
-
-  echo "generate docker compose"
-  if [ ! -f "$DOCKER_COMPOSE" ]; then
-    create_docker_compose
-  fi
-
-  echo "start cluster"
-  docker-compose up -d
-}
-
-function stop() {
-  docker-compose down
-}
-
-function clean() {
-  test -f "$DOCKER_COMPOSE" && rm "$DOCKER_COMPOSE"
-  test -d "${HADOOP_CONF}" && rm "${HADOOP_CONF}" -rf
-  test -d "${VOLUMES_DIR}" && rm "${VOLUMES_DIR}" -rf
-}
-
-set +x
-
-case "$COMMAND" in
-  start)
-    start
-    ;;
-  stop)
-    stop
-    ;;
-  clean)
-    clean
-    ;;
-  none)
-    usage
-    exit 1
-    ;;
-  *)
-    echo "Unknown command type: $COMMAND"
-    exit 1
-    ;;
-esac
diff --git a/docker/namenode/Dockerfile b/docker/namenode/Dockerfile
deleted file mode 100644
index 81dfa70be..000000000
--- a/docker/namenode/Dockerfile
+++ /dev/null
@@ -1,76 +0,0 @@
-
-#  Licensed to the Apache Software Foundation (ASF) under one
-#  or more contributor license agreements.  See the NOTICE file
-#  distributed with this work for additional information
-#  regarding copyright ownership.  The ASF licenses this file
-#  to you under the Apache License, Version 2.0 (the
-#  "License"); you may not use this file except in compliance
-#  with the License.  You may obtain a copy of the License at
-#
-#      http://www.apache.org/licenses/LICENSE-2.0
-#
-#  Unless required by applicable law or agreed to in writing, software
-#  distributed under the License is distributed on an "AS IS" BASIS,
-#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#  See the License for the specific language governing permissions and
-# limitations under the License.
-FROM openjdk:8u332-jdk
-MAINTAINER Arctic
-USER root
-
-# Default to UTF-8 file.encoding
-ENV LANG C.UTF-8
-
-ARG HADOOP_VERSION=2.10.2
-ARG APACHE_ARCHIVE=https://archive.apache.org/dist
-ARG DEBIAN_MIRROR=http://deb.debian.org
-
-ENV HADOOP_VERSION=${HADOOP_VERSION}
-ENV 
HADOOP_URL=${APACHE_ARCHIVE}/hadoop/common/hadoop-${HADOOP_VERSION}/hadoop-${HADOOP_VERSION}.tar.gz
-ENV HADOOP_WEBHDFS_PORT=50070
-
-
-RUN sed -i "s#http://deb.debian.org#${DEBIAN_MIRROR}#g"; /etc/apt/sources.list
-
-RUN set -x \
-    && DEBIAN_FRONTEND=noninteractive apt-get -yq update && apt-get -yq 
install curl wget netcat procps dos2unix net-tools \
-    && apt-get clean
-
-RUN set -x \
-    && echo "Fetch URL2 is : ${HADOOP_URL}" \
-    && curl -fSL "${HADOOP_URL}" -o /tmp/hadoop.tar.gz \
-    && mkdir -p /opt/hadoop-$HADOOP_VERSION/logs \
-    && tar -xvf /tmp/hadoop.tar.gz -C /opt/ \
-    && rm /tmp/hadoop.tar.gz* \
-    && mkdir /etc/hadoop \
-    && cp /opt/hadoop-$HADOOP_VERSION/etc/hadoop/mapred-site.xml.template 
/opt/hadoop-$HADOOP_VERSION/etc/hadoop/mapred-site.xml \
-    && mkdir /hadoop-data
-
-ENV HADOOP_PREFIX=/opt/hadoop-$HADOOP_VERSION
-ENV HADOOP_CONF_DIR=/etc/hadoop
-ENV MULTIHOMED_NETWORK=1
-ENV HADOOP_HOME=${HADOOP_PREFIX}
-ENV HADOOP_INSTALL=${HADOOP_HOME}
-ENV USER=root
-ENV PATH /usr/bin:/bin:$HADOOP_PREFIX/bin/:$PATH
-
-# Exposing a union of ports across hadoop versions
-# Well known ports including ssh
-EXPOSE 0-1024 4040 7000-10100 5000-5100 50000-50200 58188 58088 58042
-
-ADD entrypoint.sh /entrypoint.sh
-ADD export_container_ip.sh /usr/bin/
-RUN chmod a+x /usr/bin/export_container_ip.sh \
-    && chmod a+x /entrypoint.sh \
-    && dos2unix /usr/bin/export_container_ip.sh \
-    && dos2unix /entrypoint.sh
-
-
-ENV HDFS_CONF_dfs_namenode_name_dir=file:///hadoop/dfs/name
-RUN mkdir -p /hadoop/dfs/name
-VOLUME /hadoop/dfs/name
-
-ADD run_nn.sh /run_nn.sh
-RUN chmod a+x /run_nn.sh && dos2unix /run_nn.sh
-
-CMD ["/bin/bash", "/entrypoint.sh"]
diff --git a/docker/namenode/entrypoint.sh b/docker/namenode/entrypoint.sh
deleted file mode 100644
index 3d0bea44b..000000000
--- a/docker/namenode/entrypoint.sh
+++ /dev/null
@@ -1,124 +0,0 @@
-#!/bin/bash
-
-#  Licensed to the Apache Software Foundation (ASF) under one
-#  or more contributor license agreements.  See the NOTICE file
-#  distributed with this work for additional information
-#  regarding copyright ownership.  The ASF licenses this file
-#  to you under the Apache License, Version 2.0 (the
-#  "License"); you may not use this file except in compliance
-#  with the License.  You may obtain a copy of the License at
-#
-#      http://www.apache.org/licenses/LICENSE-2.0
-#
-#  Unless required by applicable law or agreed to in writing, software
-#  distributed under the License is distributed on an "AS IS" BASIS,
-#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#  See the License for the specific language governing permissions and
-# limitations under the License.
-
-
-#######################################################################################
-##            COPIED FROM                                                      
      ##
-##  
https://github.com/big-data-europe/docker-hadoop/blob/master/base/entrypoint.sh 
 ##
-#                                                                              
      ##
-#######################################################################################
-
-# Set some sensible defaults
-export CORE_CONF_fs_defaultFS=${CORE_CONF_fs_defaultFS:-hdfs://`hostname 
-f`:8020}
-
-function addProperty() {
-  local path=$1
-  local name=$2
-  local value=$3
-
-  local entry="<property><name>$name</name><value>${value}</value></property>"
-  local escapedEntry=$(echo $entry | sed 's/\//\\\//g')
-
-  if [ ! -f "$path" ]; then
-      cp $HADOOP_HOME/$path $path
-  fi
-
-  sed -i "/<\/configuration>/ s/.*/${escapedEntry}\n&/" $path
-}
-
-function configure() {
-    local path=$1
-    local module=$2
-    local envPrefix=$3
-
-    local var
-    local value
-
-    echo "Configuring $module"
-    for c in `printenv | perl -sne 'print "$1 " if m/^${envPrefix}_(.+?)=.*/' 
-- -envPrefix=$envPrefix`; do 
-        name=`echo ${c} | perl -pe 's/___/-/g; s/__/@/g; s/_/./g; s/@/_/g;'`
-        var="${envPrefix}_${c}"
-        value=${!var}
-        echo " - Setting $name=$value"
-        addProperty /etc/hadoop/$module-site.xml $name "$value"
-    done
-}
-
-set +x
-
-test -f /etc/hadoop/core-site.xml && rm /etc/hadoop/core-site.xml
-test -f /etc/hadoop/hdfs-site.xml && rm /etc/hadoop/hdfs-site.xml
-test -f /etc/hadoop/yarn-site.xml && rm /etc/hadoop/yarn-site.xml
-test -f /etc/hadoop/httpfs-site.xml && rm /etc/hadoop/httpfs-site.xml
-test -f /etc/hadoop/kms-site.xml && rm /etc/hadoop/kms-site.xml
-
-mkdir -p /etc/hadoop
-cp $HADOOP_HOME/etc/hadoop/core-site.xml /etc/hadoop/core-site.xml
-cp $HADOOP_HOME/etc/hadoop/hdfs-site.xml /etc/hadoop/hdfs-site.xml
-
-configure /etc/hadoop/core-site.xml core CORE_CONF
-configure /etc/hadoop/hdfs-site.xml hdfs HDFS_CONF
-configure /etc/hadoop/yarn-site.xml yarn YARN_CONF
-configure /etc/hadoop/httpfs-site.xml httpfs HTTPFS_CONF
-configure /etc/hadoop/kms-site.xml kms KMS_CONF
-
-if [ "$MULTIHOMED_NETWORK" = "1" ]; then
-    echo "Configuring for multihomed network"
-
-    # HDFS
-    addProperty /etc/hadoop/hdfs-site.xml dfs.namenode.rpc-bind-host `hostname 
-f`
-    addProperty /etc/hadoop/hdfs-site.xml dfs.namenode.servicerpc-bind-host 
`hostname -f`
-    addProperty /etc/hadoop/hdfs-site.xml dfs.namenode.http-bind-host 
`hostname -f`
-    addProperty /etc/hadoop/hdfs-site.xml dfs.namenode.https-bind-host 
`hostname -f`
-    addProperty /etc/hadoop/hdfs-site.xml dfs.client.use.datanode.hostname true
-    addProperty /etc/hadoop/hdfs-site.xml dfs.datanode.use.datanode.hostname 
true
-
-    # YARN
-    addProperty /etc/hadoop/yarn-site.xml yarn.resourcemanager.bind-host 
0.0.0.0
-    addProperty /etc/hadoop/yarn-site.xml yarn.nodemanager.bind-host 0.0.0.0
-    addProperty /etc/hadoop/yarn-site.xml yarn.nodemanager.bind-host 0.0.0.0
-    addProperty /etc/hadoop/yarn-site.xml yarn.timeline-service.bind-host 
0.0.0.0
-
-    # MAPRED
-    addProperty /etc/hadoop/mapred-site.xml yarn.nodemanager.bind-host 0.0.0.0
-fi
-
-if [ -n "$GANGLIA_HOST" ]; then
-    mv /etc/hadoop/hadoop-metrics.properties 
/etc/hadoop/hadoop-metrics.properties.orig
-    mv /etc/hadoop/hadoop-metrics2.properties 
/etc/hadoop/hadoop-metrics2.properties.orig
-
-    for module in mapred jvm rpc ugi; do
-        echo "$module.class=org.apache.hadoop.metrics.ganglia.GangliaContext31"
-        echo "$module.period=10"
-        echo "$module.servers=$GANGLIA_HOST:8649"
-    done > /etc/hadoop/hadoop-metrics.properties
-    
-    for module in namenode datanode resourcemanager nodemanager mrappmaster 
jobhistoryserver; do
-        echo 
"$module.sink.ganglia.class=org.apache.hadoop.metrics2.sink.ganglia.GangliaSink31"
-        echo "$module.sink.ganglia.period=10"
-        echo "$module.sink.ganglia.supportsparse=true"
-        echo 
"$module.sink.ganglia.slope=jvm.metrics.gcCount=zero,jvm.metrics.memHeapUsedM=both"
-        echo 
"$module.sink.ganglia.dmax=jvm.metrics.threadsBlocked=70,jvm.metrics.memHeapUsedM=40"
-        echo "$module.sink.ganglia.servers=$GANGLIA_HOST:8649"
-    done > /etc/hadoop/hadoop-metrics2.properties
-fi
-
-# Save Container IP in ENV variable
-/usr/bin/export_container_ip.sh
-
-/run_nn.sh
diff --git a/docker/namenode/export_container_ip.sh 
b/docker/namenode/export_container_ip.sh
deleted file mode 100755
index b427f92cc..000000000
--- a/docker/namenode/export_container_ip.sh
+++ /dev/null
@@ -1,30 +0,0 @@
-
-#  Licensed to the Apache Software Foundation (ASF) under one
-#  or more contributor license agreements.  See the NOTICE file
-#  distributed with this work for additional information
-#  regarding copyright ownership.  The ASF licenses this file
-#  to you under the Apache License, Version 2.0 (the
-#  "License"); you may not use this file except in compliance
-#  with the License.  You may obtain a copy of the License at
-#
-#      http://www.apache.org/licenses/LICENSE-2.0
-#
-#  Unless required by applicable law or agreed to in writing, software
-#  distributed under the License is distributed on an "AS IS" BASIS,
-#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#  See the License for the specific language governing permissions and
-# limitations under the License.
-
-interfaces=( "en0" "eth0" )
-
-ipAddr=""
-for interface in "${interfaces[@]}"
-do
-  ipAddr=`ifconfig $interface | grep -Eo 'inet (addr:)?([0-9]+\.){3}[0-9]+' | 
grep -Eo '([0-9]+\.){3}[0-9]+' | grep -v '127.0.0.1' | head`
-  if [ -n "$ipAddr" ]; then
-    break
-  fi 
-done
-
-echo "Container IP is set to : $ipAddr"
-export MY_CONTAINER_IP=$ipAddr
diff --git a/docker/namenode/run_nn.sh b/docker/namenode/run_nn.sh
deleted file mode 100644
index bd1934a53..000000000
--- a/docker/namenode/run_nn.sh
+++ /dev/null
@@ -1,37 +0,0 @@
-#!/bin/bash
-
-#  Licensed to the Apache Software Foundation (ASF) under one
-#  or more contributor license agreements.  See the NOTICE file
-#  distributed with this work for additional information
-#  regarding copyright ownership.  The ASF licenses this file
-#  to you under the Apache License, Version 2.0 (the
-#  "License"); you may not use this file except in compliance
-#  with the License.  You may obtain a copy of the License at
-#
-#      http://www.apache.org/licenses/LICENSE-2.0
-#
-#  Unless required by applicable law or agreed to in writing, software
-#  distributed under the License is distributed on an "AS IS" BASIS,
-#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#  See the License for the specific language governing permissions and
-# limitations under the License.
-
-set +x
-
-namedir=`echo $HDFS_CONF_dfs_namenode_name_dir | perl -pe 's#file://##'`
-if [ ! -d $namedir ]; then
-  echo "Namenode name directory not found: $namedir"
-  exit 2
-fi
-
-if [ -z "$CLUSTER_NAME" ]; then
-  echo "Cluster name not specified"
-  exit 2
-fi
-
-if [ "`ls -A $namedir`" == "" ]; then
-  echo "Formatting namenode name directory: $namedir"
-  $HADOOP_PREFIX/bin/hdfs --config $HADOOP_CONF_DIR namenode -format 
$CLUSTER_NAME 
-fi
-
-$HADOOP_PREFIX/bin/hdfs --config $HADOOP_CONF_DIR namenode
diff --git a/docker/quickdemo/Dockerfile b/docker/quickdemo/Dockerfile
deleted file mode 100644
index eef22c504..000000000
--- a/docker/quickdemo/Dockerfile
+++ /dev/null
@@ -1,74 +0,0 @@
-
-#  Licensed to the Apache Software Foundation (ASF) under one
-#  or more contributor license agreements.  See the NOTICE file
-#  distributed with this work for additional information
-#  regarding copyright ownership.  The ASF licenses this file
-#  to you under the Apache License, Version 2.0 (the
-#  "License"); you may not use this file except in compliance
-#  with the License.  You may obtain a copy of the License at
-#
-#      http://www.apache.org/licenses/LICENSE-2.0
-#
-#  Unless required by applicable law or agreed to in writing, software
-#  distributed under the License is distributed on an "AS IS" BASIS,
-#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#  See the License for the specific language governing permissions and
-# limitations under the License.
-
-# Usage:
-#   Run the docker command below under project dir.
-#      docker build \
-#        --file docker/amoro/Dockerfile \
-#        --tag arctic163/amoro:tagname \
-#        --build-arg AMORO_TAG=${AMORO_TAG} \
-#        --build-arg FLINK_VERSION=${FLINK_VERSION} \
-#        .
-ARG AMORO_TAG=master-snapshot
-
-FROM eclipse-temurin:8-jdk-jammy as builder
-ARG FLINK_VERSION=1.15.3
-
-ADD . /workspace/amoro
-WORKDIR /workspace/amoro
-
-RUN AMORO_VERSION=`cat pom.xml | grep 'amoro-parent' -C 3 | grep -Eo 
'<version>.*</version>' | awk -F'[><]' '{print $3}'` \
-    && FLINK_VERSION=${FLINK_VERSION} \
-    && FLINK_MAJOR_VERSION="${FLINK_VERSION%.*}" \
-    && cp 
amoro-mixed-format/amoro-mixed-format-flink/v${FLINK_MAJOR_VERSION}/amoro-mixed-format-flink-runtime-${FLINK_MAJOR_VERSION}/target/amoro-mixed-format-flink-runtime-${FLINK_MAJOR_VERSION}-${AMORO_VERSION}.jar
 \
-      
/opt/amoro-mixed-format-flink-runtime-${FLINK_MAJOR_VERSION}-${AMORO_VERSION}.jar
 \
-    && rm -rf /workspace/amoro
-
-
-FROM arctic163/amoro:${AMORO_TAG}
-
-ARG FLINK_VERSION=1.15.3
-ARG FLINK_HADOOP_VERSION=2.8.3-10.0
-ARG HIVE_VERSION=2.1.1
-ARG ICEBERG_VERSION=1.1.0
-ARG APACHE_ARCHIVE=https://archive.apache.org/dist
-ARG MAVEN_URL=https://maven.aliyun.com/repository/central
-ARG DEBIAN_MIRROR=http://deb.debian.org
-
-
-RUN apt-get update \
-    && apt-get install -y netcat \
-    && apt-get clean
-
-WORKDIR /usr/local
-RUN wget 
${APACHE_ARCHIVE}/flink/flink-${FLINK_VERSION}/flink-${FLINK_VERSION}-bin-scala_2.12.tgz
 \
-    && tar -zxvf flink-${FLINK_VERSION}-bin-scala_2.12.tgz \
-    && rm -f flink-${FLINK_VERSION}-bin-scala_2.12.tgz
-ENV FLINK_HOME=/usr/local/flink-${FLINK_VERSION} \
-    PATH=${PATH}:${FLINK_HOME}/bin
-WORKDIR ${FLINK_HOME}/lib
-RUN cp ../examples/table/ChangelogSocketExample.jar . \
-    && FLINK_VERSION=${FLINK_VERSION} \
-    && FLINK_MAJOR_VERSION="${FLINK_VERSION%.*}" \
-    && wget 
${MAVEN_URL}/org/apache/flink/flink-shaded-hadoop-2-uber/${FLINK_HADOOP_VERSION}/flink-shaded-hadoop-2-uber-${FLINK_HADOOP_VERSION}.jar
 \
-    && wget 
${MAVEN_URL}/org/apache/hive/hive-exec/${HIVE_VERSION}/hive-exec-${HIVE_VERSION}.jar
 \
-    && wget 
${MAVEN_URL}/org/apache/iceberg/iceberg-flink-runtime-${FLINK_MAJOR_VERSION}/${ICEBERG_VERSION}/iceberg-flink-runtime-${FLINK_MAJOR_VERSION}-${ICEBERG_VERSION}.jar
-WORKDIR ${FLINK_HOME}
-
-COPY --from=builder /opt/*.jar ${FLINK_HOME}/lib/
-COPY ./docker/quickdemo/flink-conf.yaml ${FLINK_HOME}/conf/flink-conf.yaml
-RUN ln -s ${FLINK_HOME} /usr/local/flink
diff --git a/docker/quickdemo/flink-conf.yaml b/docker/quickdemo/flink-conf.yaml
deleted file mode 100644
index 6bbe39df9..000000000
--- a/docker/quickdemo/flink-conf.yaml
+++ /dev/null
@@ -1,260 +0,0 @@
-################################################################################
-#  Licensed to the Apache Software Foundation (ASF) under one
-#  or more contributor license agreements.  See the NOTICE file
-#  distributed with this work for additional information
-#  regarding copyright ownership.  The ASF licenses this file
-#  to you under the Apache License, Version 2.0 (the
-#  "License"); you may not use this file except in compliance
-#  with the License.  You may obtain a copy of the License at
-#
-#      http://www.apache.org/licenses/LICENSE-2.0
-#
-#  Unless required by applicable law or agreed to in writing, software
-#  distributed under the License is distributed on an "AS IS" BASIS,
-#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#  See the License for the specific language governing permissions and
-# limitations under the License.
-################################################################################
-
-
-#==============================================================================
-# Common
-#==============================================================================
-
-# The external address of the host on which the JobManager runs and can be
-# reached by the TaskManagers and any clients which want to connect. This 
setting
-# is only used in Standalone mode and may be overwritten on the JobManager side
-# by specifying the --host <hostname> parameter of the bin/jobmanager.sh 
executable.
-# In high availability mode, if you use the bin/start-cluster.sh script and 
setup
-# the conf/masters file, this will be taken care of automatically. Yarn/Mesos
-# automatically configure the host name based on the hostname of the node 
where the
-# JobManager runs.
-
-jobmanager.rpc.address: localhost
-
-# The RPC port where the JobManager is reachable.
-
-jobmanager.rpc.port: 6123
-
-
-# The total process memory size for the JobManager.
-#
-# Note this accounts for all memory usage within the JobManager process, 
including JVM metaspace and other overhead.
-
-jobmanager.memory.process.size: 1600m
-
-
-# The total process memory size for the TaskManager.
-#
-# Note this accounts for all memory usage within the TaskManager process, 
including JVM metaspace and other overhead.
-
-taskmanager.memory.process.size: 1728m
-
-# To exclude JVM metaspace and overhead, please, use total Flink memory size 
instead of 'taskmanager.memory.process.size'.
-# It is not recommended to set both 'taskmanager.memory.process.size' and 
Flink memory.
-#
-# taskmanager.memory.flink.size: 1280m
-
-# The number of task slots that each TaskManager offers. Each slot runs one 
parallel pipeline.
-
-# there will be 2 task in arctic quick start, so set slot to 4
-taskmanager.numberOfTaskSlots: 4
-
-# enable checkpoints to commit arctic table
-execution.checkpointing.interval: 10s
-
-# The parallelism used for programs that did not specify and other parallelism.
-
-parallelism.default: 1
-
-# The default file system scheme and authority.
-# 
-# By default file paths without scheme are interpreted relative to the local
-# root file system 'file:///'. Use this to override the default and interpret
-# relative paths relative to a different file system,
-# for example 'hdfs://mynamenode:12345'
-#
-# fs.default-scheme
-
-#==============================================================================
-# High Availability
-#==============================================================================
-
-# The high-availability mode. Possible options are 'NONE' or 'zookeeper'.
-#
-# high-availability: zookeeper
-
-# The path where metadata for master recovery is persisted. While ZooKeeper 
stores
-# the small ground truth for checkpoint and leader election, this location 
stores
-# the larger objects, like persisted dataflow graphs.
-# 
-# Must be a durable file system that is accessible from all nodes
-# (like HDFS, S3, Ceph, nfs, ...) 
-#
-# high-availability.storageDir: hdfs:///flink/ha/
-
-# The list of ZooKeeper quorum peers that coordinate the high-availability
-# setup. This must be a list of the form:
-# "host1:clientPort,host2:clientPort,..." (default clientPort: 2181)
-#
-# high-availability.zookeeper.quorum: localhost:2181
-
-
-# ACL options are based on 
https://zookeeper.apache.org/doc/r3.1.2/zookeeperProgrammers.html#sc_BuiltinACLSchemes
-# It can be either "creator" (ZOO_CREATE_ALL_ACL) or "open" 
(ZOO_OPEN_ACL_UNSAFE)
-# The default value is "open" and it can be changed to "creator" if ZK 
security is enabled
-#
-# high-availability.zookeeper.client.acl: open
-
-#==============================================================================
-# Fault tolerance and checkpointing
-#==============================================================================
-
-# The backend that will be used to store operator state checkpoints if
-# checkpointing is enabled.
-#
-# Supported backends are 'jobmanager', 'filesystem', 'rocksdb', or the
-# <class-name-of-factory>.
-#
-# state.backend: filesystem
-
-# Directory for checkpoints filesystem, when using any of the default bundled
-# state backends.
-#
-# state.checkpoints.dir: hdfs://namenode-host:port/flink-checkpoints
-
-# Default target directory for savepoints, optional.
-#
-# state.savepoints.dir: hdfs://namenode-host:port/flink-savepoints
-
-# Flag to enable/disable incremental checkpoints for backends that
-# support incremental checkpoints (like the RocksDB state backend). 
-#
-# state.backend.incremental: false
-
-# The failover strategy, i.e., how the job computation recovers from task 
failures.
-# Only restart tasks that may have been affected by the task failure, which 
typically includes
-# downstream tasks and potentially upstream tasks if their produced data is no 
longer available for consumption.
-
-jobmanager.execution.failover-strategy: region
-
-#==============================================================================
-# Rest & web frontend
-#==============================================================================
-
-# The port to which the REST client connects to. If rest.bind-port has
-# not been specified, then the server will bind to this port as well.
-#
-#rest.port: 8081
-
-# The address to which the REST client will connect to
-#
-#rest.address: 0.0.0.0
-
-# Port range for the REST and web server to bind to.
-#
-#rest.bind-port: 8080-8090
-
-# The address that the REST & web server binds to
-#
-#rest.bind-address: 0.0.0.0
-
-# Flag to specify whether job submission is enabled from the web-based
-# runtime monitor. Uncomment to disable.
-
-#web.submit.enable: false
-
-#==============================================================================
-# Advanced
-#==============================================================================
-
-# Override the directories for temporary files. If not specified, the
-# system-specific Java temporary directory (java.io.tmpdir property) is taken.
-#
-# For framework setups on Yarn or Mesos, Flink will automatically pick up the
-# containers' temp directories without any need for configuration.
-#
-# Add a delimited list for multiple directories, using the system directory
-# delimiter (colon ':' on unix) or a comma, e.g.:
-#     /data1/tmp:/data2/tmp:/data3/tmp
-#
-# Note: Each directory entry is read from and written to by a different I/O
-# thread. You can include the same directory multiple times in order to create
-# multiple I/O threads against that directory. This is for example relevant for
-# high-throughput RAIDs.
-#
-# io.tmp.dirs: /tmp
-
-# The classloading resolve order. Possible values are 'child-first' (Flink's 
default)
-# and 'parent-first' (Java's default).
-#
-# Child first classloading allows users to use different dependency/library
-# versions in their application than those in the classpath. Switching back
-# to 'parent-first' may help with debugging dependency issues.
-#
-# classloader.resolve-order: child-first
-
-# The amount of memory going to the network stack. These numbers usually need 
-# no tuning. Adjusting them may be necessary in case of an "Insufficient number
-# of network buffers" error. The default min is 64MB, the default max is 1GB.
-# 
-# taskmanager.memory.network.fraction: 0.1
-# taskmanager.memory.network.min: 64mb
-# taskmanager.memory.network.max: 1gb
-
-#==============================================================================
-# Flink Cluster Security Configuration
-#==============================================================================
-
-# Kerberos authentication for various components - Hadoop, ZooKeeper, and 
connectors -
-# may be enabled in four steps:
-# 1. configure the local krb5.conf file
-# 2. provide Kerberos credentials (either a keytab or a ticket cache w/ kinit)
-# 3. make the credentials available to various JAAS login contexts
-# 4. configure the connector to use JAAS/SASL
-
-# The below configure how Kerberos credentials are provided. A keytab will be 
used instead of
-# a ticket cache if the keytab path and principal are set.
-
-# security.kerberos.login.use-ticket-cache: true
-# security.kerberos.login.keytab: /path/to/kerberos/keytab
-# security.kerberos.login.principal: flink-user
-
-# The configuration below defines which JAAS login contexts
-
-# security.kerberos.login.contexts: Client,KafkaClient
-
-#==============================================================================
-# ZK Security Configuration
-#==============================================================================
-
-# Below configurations are applicable if ZK ensemble is configured for security
-
-# Override below configuration to provide custom ZK service name if configured
-# zookeeper.sasl.service-name: zookeeper
-
-# The configuration below must match one of the values set in 
"security.kerberos.login.contexts"
-# zookeeper.sasl.login-context-name: Client
-
-#==============================================================================
-# HistoryServer
-#==============================================================================
-
-# The HistoryServer is started and stopped via bin/historyserver.sh 
(start|stop)
-
-# Directory to upload completed jobs to. Add this directory to the list of
-# monitored directories of the HistoryServer as well (see below).
-#jobmanager.archive.fs.dir: hdfs:///completed-jobs/
-
-# The address under which the web-based HistoryServer listens.
-#historyserver.web.address: 0.0.0.0
-
-# The port under which the web-based HistoryServer listens.
-#historyserver.web.port: 8082
-
-# Comma separated list of directories to monitor for completed jobs.
-#historyserver.archive.fs.dir: hdfs:///completed-jobs/
-
-# Interval in milliseconds for refreshing the monitored directories.
-#historyserver.archive.fs.refresh-interval: 10000
-
diff --git a/docs/admin-guides/deployment-on-kubernetes.md 
b/docs/admin-guides/deployment-on-kubernetes.md
index b6228bf62..e924ccdb6 100644
--- a/docs/admin-guides/deployment-on-kubernetes.md
+++ b/docs/admin-guides/deployment-on-kubernetes.md
@@ -21,15 +21,15 @@ If you want to deploy AMS on Kubernetes, you’d better get a 
sense of the follo
 
 ## Amoro Official Docker Image
 
-You can find the official docker image at [Amoro Docker 
Hub](https://hub.docker.com/u/arctic163).
+You can find the official docker image at [Amoro Docker 
Hub](https://hub.docker.com/u/apache).
 
 The following are images that can be used in a production environment.
 
-**arctic163/amoro**
+**apache/amoro**
 
 This is an image built based on the Amoro binary distribution package for 
deploying AMS.
 
-**arctic163/optimizer-flink**
+**apache/amoro-flink-optimizer**
 
 This is an image built based on the official version of Flink for deploying 
the Flink optimizer.
 
@@ -171,7 +171,7 @@ optimizer:
     name: ~ 
     image:
       ## the image repository
-      repository: arctic163/optimizer-flink
+      repository: apache/amoro-flink-optimizer
       ## the image tag, if not set, the default value is the same with amoro 
image tag.
       tag: ~
       ## the location of flink optimizer jar in image.
diff --git a/docs/admin-guides/managing-optimizers.md 
b/docs/admin-guides/managing-optimizers.md
index f7225029b..42d126b61 100644
--- a/docs/admin-guides/managing-optimizers.md
+++ b/docs/admin-guides/managing-optimizers.md
@@ -55,7 +55,7 @@ containers:
     container-impl: 
org.apache.amoro.server.manager.KubernetesOptimizerContainer
     properties:
       kube-config-path: ～/.kube/config
-      image: apache/amoro:0.6
+      image: apache/amoro:{version}
 ```
 
 ### Flink container
@@ -113,7 +113,7 @@ containers:
       job-uri: "local:///opt/flink/usrlib/optimizer-job.jar"                   
      # Optimizer job main jar for kubernetes application
       ams-optimizing-uri: thrift://ams.amoro.service.local:1261                
      # AMS optimizing uri 
       export.FLINK_CONF_DIR: /opt/flink/conf/                                  
      # Flink config dir
-      flink-conf.kubernetes.container.image: 
"arctic163/optimizer-flink:{version}"   # Optimizer image ref
+      flink-conf.kubernetes.container.image: 
"apache/amoro-flink-optimizer:{version}"   # Optimizer image ref
       flink-conf.kubernetes.service-account: flink                             
      # Service account that is used within kubernetes cluster.
 ```
 
@@ -189,20 +189,20 @@ containers:
   - name: sparkContainer
     container-impl: org.apache.amoro.server.manager.SparkOptimizerContainer
     properties:
-      spark-home: /opt/spark/                                                  
            # Spark install home
-      master: k8s://https://<k8s-apiserver-host>:<k8s-apiserver-port>          
            # The k8s cluster manager to connect to
-      deploy-mode: cluster                                                     
            # Spark deploy mode, client or cluster
-      job-uri: "local:///opt/spark/usrlib/optimizer-job.jar"                   
            # Optimizer job main jar for kubernetes application
-      ams-optimizing-uri: thrift://ams.amoro.service.local:1261                
            # AMS optimizing uri 
-      export.HADOOP_USER_NAME: hadoop                                          
            # Hadoop user submits on yarn
-      export.HADOOP_CONF_DIR: /etc/hadoop/conf/                                
            # Hadoop config dir
-      export.SPARK_CONF_DIR: /opt/spark/conf/                                  
            # Spark config dir
-      spark-conf.spark.kubernetes.container.image: 
"arctic163/optimizer-spark:{version}"   # Optimizer image ref
-      spark-conf.spark.dynamicAllocation.enabled: "true"                       
            # Enabling DRA feature can make full use of computing resources
-      spark-conf.spark.shuffle.service.enabled: "false"                        
            # If spark DRA is used on kubernetes, we should set it false
-      spark-conf.spark.dynamicAllocation.shuffleTracking.enabled: "true"       
            # Enables shuffle file tracking for executors, which allows dynamic 
allocation without the need for an ESS
-      spark-conf.spark.kubernetes.namespace: <spark-namespace>                 
            # Namespace that is used within kubernetes cluster
-      spark-conf.spark.kubernetes.authenticate.driver.serviceAccountName: 
<spark-sa>       # Service account that is used within kubernetes cluster
+      spark-home: /opt/spark/                                                  
               # Spark install home
+      master: k8s://https://<k8s-apiserver-host>:<k8s-apiserver-port>          
               # The k8s cluster manager to connect to
+      deploy-mode: cluster                                                     
               # Spark deploy mode, client or cluster
+      job-uri: "local:///opt/spark/usrlib/optimizer-job.jar"                   
               # Optimizer job main jar for kubernetes application
+      ams-optimizing-uri: thrift://ams.amoro.service.local:1261                
               # AMS optimizing uri 
+      export.HADOOP_USER_NAME: hadoop                                          
               # Hadoop user submits on yarn
+      export.HADOOP_CONF_DIR: /etc/hadoop/conf/                                
               # Hadoop config dir
+      export.SPARK_CONF_DIR: /opt/spark/conf/                                  
               # Spark config dir
+      spark-conf.spark.kubernetes.container.image: 
"apache/amoro-spark-optimizer:{version}"   # Optimizer image ref
+      spark-conf.spark.dynamicAllocation.enabled: "true"                       
               # Enabling DRA feature can make full use of computing resources
+      spark-conf.spark.shuffle.service.enabled: "false"                        
               # If spark DRA is used on kubernetes, we should set it false
+      spark-conf.spark.dynamicAllocation.shuffleTracking.enabled: "true"       
               # Enables shuffle file tracking for executors, which allows 
dynamic allocation without the need for an ESS
+      spark-conf.spark.kubernetes.namespace: <spark-namespace>                 
               # Namespace that is used within kubernetes cluster
+      spark-conf.spark.kubernetes.authenticate.driver.serviceAccountName: 
<spark-sa>          # Service account that is used within kubernetes cluster
 ```

(amoro) branch master updated: [AMORO-2815] Adjust docker images of apache amoro project (#2818)

Reply via email to