This is an automated email from the ASF dual-hosted git repository.
jinsongzhou pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/amoro.git
The following commit(s) were added to refs/heads/master by this push:
new 5291c152e [AMORO-2815] Adjust docker images of apache amoro project
(#2818)
5291c152e is described below
commit 5291c152e60b68c6a3115d74a4308bb868b5d1c8
Author: tcodehuber <[email protected]>
AuthorDate: Sat May 11 11:28:09 2024 +0800
[AMORO-2815] Adjust docker images of apache amoro project (#2818)
* [AMORO-2815] Adjust docker images of apache amoro project
* code refactor
* build master-snapshot image
---
.github/workflows/docker-images.yml | 69 +-----
.../dist/src/main/arctic-bin/conf/config.yaml | 12 +-
charts/amoro/values.yaml | 8 +-
docker/README.md | 13 +-
docker/amoro/Dockerfile | 2 +-
docker/build.sh | 124 +---------
docker/datanode/Dockerfile | 70 ------
docker/datanode/run_dn.sh | 40 ----
docker/demo-cluster.sh | 186 ---------------
docker/namenode/Dockerfile | 76 ------
docker/namenode/entrypoint.sh | 124 ----------
docker/namenode/export_container_ip.sh | 30 ---
docker/namenode/run_nn.sh | 37 ---
docker/quickdemo/Dockerfile | 74 ------
docker/quickdemo/flink-conf.yaml | 260 ---------------------
docs/admin-guides/deployment-on-kubernetes.md | 8 +-
docs/admin-guides/managing-optimizers.md | 32 +--
17 files changed, 50 insertions(+), 1115 deletions(-)
diff --git a/.github/workflows/docker-images.yml
b/.github/workflows/docker-images.yml
index 52e732604..3494dc4b0 100644
--- a/.github/workflows/docker-images.yml
+++ b/.github/workflows/docker-images.yml
@@ -62,7 +62,7 @@ jobs:
flavor: |
latest=auto
images: |
- name=arctic163/amoro
+ name=apache/amoro
tags: |
type=ref,event=branch,enable=${{ matrix.hadoop == 'v3'
}},suffix=-snapshot
type=ref,event=branch,enable=${{ matrix.hadoop == 'v3'
}},suffix=-snapshot-hadoop3
@@ -124,7 +124,7 @@ jobs:
flavor: |
latest=auto
images: |
- name=arctic163/optimizer-flink
+ name=apache/amoro-flink-optimizer
tags: |
type=ref,event=branch,enable=${{ matrix.flink == '1.14.6'
}},suffix=-snapshot
type=ref,event=branch,enable=${{ matrix.flink == '1.14.6'
}},suffix=-snapshot-flink1.14
@@ -204,10 +204,11 @@ jobs:
flavor: |
latest=auto
images: |
- name=arctic163/optimizer-spark
+ name=apache/amoro-spark-optimizer
tags: |
+ type=ref,event=branch,enable=${{ matrix.spark == '3.3.3'
}},suffix=-snapshot
type=ref,event=branch,enable=${{ matrix.spark == '3.3.3'
}},suffix=-snapshot-spark3.3
- type=semver,enable=${{ matrix.spark == '3.3.3'
}},pattern={{version}}-spark3.3
+ type=semver,enable=${{ matrix.spark == '3.3.3'
}},pattern={{version}}
- name: Print tags
run: echo '${{ steps.meta.outputs.tags }}'
@@ -249,63 +250,3 @@ jobs:
OPTIMIZER_JOB=amoro-ams/amoro-ams-optimizer/amoro-optimizer-spark/target/amoro-optimizer-spark-${{
env.AMORO_VERSION }}-jar-with-dependencies.jar
- docker-quickdemo:
- name: Push Amoro Quick-Demo Docker Image to Docker Hub
- runs-on: ubuntu-latest
- needs: docker-amoro
- if: ${{ startsWith(github.repository, 'apache/') && startsWith(github.ref,
'refs/tags/' )}}
- steps:
- - uses: actions/checkout@v3
- - name: Set up JDK 8
- uses: actions/setup-java@v3
- with:
- java-version: '8'
- distribution: 'temurin'
- cache: maven
- - name: Set up QEMU
- uses: docker/setup-qemu-action@v2
-
- - name: Set up Docker Buildx
- uses: docker/setup-buildx-action@v2
-
- - name: Set up Docker tags
- uses: docker/metadata-action@v5
- id: meta
- with:
- flavor: |
- latest=auto
- images: |
- name=arctic163/quickdemo
- tags: |
- type=ref,event=branch,suffix=-snapshot
- type=semver,pattern={{version}}
-
- - name: Print tags
- run: echo '${{ steps.meta.outputs.tags }}'
-
- - name: Set Amoro Tag
- id: tag
- run: |
- AMORO_TAG=$(echo "${{ steps.meta.outputs.tags }}" | head -n 1) &&
echo "AMORO_TAG=${AMORO_TAG#*:}" >> $GITHUB_OUTPUT
-
- - name: Login to Docker Hub
- uses: docker/login-action@v2
- with:
- username: ${{ secrets.DOCKERHUB_USER }}
- password: ${{ secrets.DOCKERHUB_TOKEN }}
-
- - name: Build optimizer module with Maven
- run: mvn clean package -pl
'amoro-mixed-format/amoro-mixed-format-flink/v1.17/amoro-mixed-format-flink-runtime-1.17'
-am -e -DskipTests -B -ntp
-
- - name: Build and Push Flink Optimizer Docker Image
- uses: docker/build-push-action@v4
- with:
- context: .
- push: true
- file: docker/quickdemo/Dockerfile
- platforms: linux/amd64,linux/arm64
- tags: ${{ steps.meta.outputs.tags }}
- build-args: |
- AMORO_TAG=${{ steps.tag.outputs.AMORO_TAG }}
- FLINK_VERSION=1.18.1
- ICEBERG_VERSION=1.3.1
diff --git a/amoro-ams/dist/src/main/arctic-bin/conf/config.yaml
b/amoro-ams/dist/src/main/arctic-bin/conf/config.yaml
index d6cbb7ff2..a41f0774b 100644
--- a/amoro-ams/dist/src/main/arctic-bin/conf/config.yaml
+++ b/amoro-ams/dist/src/main/arctic-bin/conf/config.yaml
@@ -142,7 +142,7 @@ containers:
# container-impl:
com.netease.arctic.server.manager.KubernetesOptimizerContainer
# properties:
# kube-config-path: ~/.kube/config
-# image: arctic163/amoro:0.6
+# image: apache/amoro:{version}
# namespace: default
# - name: flinkContainer
@@ -156,7 +156,7 @@ containers:
# export.FLINK_CONF_DIR: /opt/flink/conf/ # Flink
config dir
# # flink kubernetes application properties.
# job-uri: "local:///opt/flink/usrlib/optimizer-job.jar" # Optimizer
job main jar for kubernetes application
-# flink-conf.kubernetes.container.image:
"arctic163/optimizer-flink:{version}" # Optimizer image ref
+# flink-conf.kubernetes.container.image:
"apache/amoro-flink-optimizer:{version}" # Optimizer image ref
# flink-conf.kubernetes.service-account: flink # Service
account that is used within kubernetes cluster.
#containers:
@@ -175,7 +175,7 @@ containers:
# ams-optimizing-uri: thrift://ams.amoro.service.local:1261 # AMS
optimizing uri
# spark-conf.spark.dynamicAllocation.enabled: "true" # Enabling
DRA feature can make full use of computing resources
# spark-conf.spark.shuffle.service.enabled: "false" # If spark
DRA is used on kubernetes, we should set it false
-# spark-conf.spark.dynamicAllocation.shuffleTracking.enabled: "true"
# Enables shuffle file tracking for executors, which allows
dynamic allocation without the need for an external shuffle service
-# spark-conf.spark.kubernetes.container.image:
"arctic163/optimizer-spark:{version}" # Optimizer image ref
-# spark-conf.spark.kubernetes.namespace: <spark-namespace>
# Namespace that is used within kubernetes cluster
-# spark-conf.spark.kubernetes.authenticate.driver.serviceAccountName:
<spark-sa> # Service account that is used within kubernetes cluster.
+# spark-conf.spark.dynamicAllocation.shuffleTracking.enabled: "true"
# Enables shuffle file tracking for executors, which allows
dynamic allocation without the need for an external shuffle service
+# spark-conf.spark.kubernetes.container.image:
"apache/amoro-spark-optimizer:{version}" # Optimizer image ref
+# spark-conf.spark.kubernetes.namespace: <spark-namespace>
# Namespace that is used within kubernetes cluster
+# spark-conf.spark.kubernetes.authenticate.driver.serviceAccountName:
<spark-sa> # Service account that is used within kubernetes
cluster.
diff --git a/charts/amoro/values.yaml b/charts/amoro/values.yaml
index 3273ce88f..ee8ed2eae 100644
--- a/charts/amoro/values.yaml
+++ b/charts/amoro/values.yaml
@@ -35,7 +35,7 @@ nameOverride: ""
image:
## @param image.repository AMS image repository
##
- repository: arctic163/amoro
+ repository: apache/amoro
## @param image.tag for AMS image repository
##
tag: ""
@@ -197,7 +197,7 @@ optimizer:
image:
## @param optimizer.flink.image.repository the image repository
##
- repository: arctic163/optimizer-flink
+ repository: apache/amoro-flink-optimizer
## @param optimizer.flink.image.tag the image tag, if not set, the
default value is the same with amoro image tag
##
tag: ~
@@ -222,7 +222,7 @@ optimizer:
image:
## @param optimizer.spark.image.repository the image repository
##
- repository: arctic163/optimizer-spark
+ repository: apache/amoro-spark-optimizer
## @param optimizer.spark.image.tag the image tag, if not set, the
default value is the same with amoro image tag
##
tag: ~
@@ -254,7 +254,7 @@ optimizer:
properties:
namespace: "default"
kube-config-path: "~/.kube/config"
- image: "arctic163/amoro:0.6"
+ image: "apache/amoro:0.6"
extra: []
## Configure the ingress resource that allows you to access the
diff --git a/docker/README.md b/docker/README.md
index 6849e02d3..08322ffa9 100644
--- a/docker/README.md
+++ b/docker/README.md
@@ -22,11 +22,8 @@ We provide a bash script to help you build docker image
easier.
You can control which image is built by the follow parameters :
- amoro
-- optimizer-flink
-- optimizer-spark
-- quickdemo
-- namenode
-- datanode
+- amoro-flink-optimizer
+- amoro-spark-optimizer
e.g.
@@ -34,8 +31,8 @@ e.g.
./build.sh amoro
```
-- NOTICE: The amoro image, quickdemo image and optimizer-flink image required
the project had been packaged.
-so run `mvn package -pl '!trino'` before build amoro, quickdemo or
optimizer-flink image.
+- NOTICE: The amoro image, amoro-spark-optimizer image and
amoro-flink-optimizer image required the project had been packaged.
+ So run `mvn package -pl '!amoro-mixed-fomrat/amoro-mixed-format-trino'`
before building amoro, amoro-spark-optimizer, or amoro-flink-optimizer image.
You can speed up image building via
@@ -43,7 +40,7 @@ You can speed up image building via
./build.sh \
--apache-archive https://mirrors.aliyun.com/apache \
--debian-mirror https://mirrors.aliyun.com \
- optimizer-flink
+ amoro-flink-optimizer
```
more options see `./build.sh --help`
diff --git a/docker/amoro/Dockerfile b/docker/amoro/Dockerfile
index d74f0e32c..28117acad 100644
--- a/docker/amoro/Dockerfile
+++ b/docker/amoro/Dockerfile
@@ -20,7 +20,7 @@
# Run the docker command below under project dir.
# docker build \
# --file docker/amoro/Dockerfile \
-# --tag arctic163/amoro:tagname
+# --tag apache/amoro:tagname
# .
FROM eclipse-temurin:8-jdk-jammy as builder
diff --git a/docker/build.sh b/docker/build.sh
index 7cc3182d7..724fa9610 100755
--- a/docker/build.sh
+++ b/docker/build.sh
@@ -26,7 +26,6 @@ cd $CURRENT_DIR
AMORO_VERSION=`cat $PROJECT_HOME/pom.xml | grep 'amoro-parent' -C 3 | grep -Eo
'<version>.*</version>' | awk -F'[><]' '{print $3}'`
FLINK_VERSION=1.15.3
SPARK_VERSION=3.3.3
-HADOOP_VERSION=2.10.2
DEBIAN_MIRROR=http://deb.debian.org
APACHE_ARCHIVE=https://archive.apache.org/dist
FLINK_OPTIMIZER_JOB_PATH=amoro-ams/amoro-ams-optimizer/amoro-optimizer-flink/target/amoro-optimizer-flink-${AMORO_VERSION}-jar-with-dependencies.jar
@@ -34,7 +33,6 @@
FLINK_OPTIMIZER_JOB=${PROJECT_HOME}/${FLINK_OPTIMIZER_JOB_PATH}
SPARK_OPTIMIZER_JOB_PATH=amoro-ams/amoro-ams-optimizer/amoro-optimizer-spark/target/amoro-optimizer-spark-${AMORO_VERSION}-jar-with-dependencies.jar
SPARK_OPTIMIZER_JOB=${PROJECT_HOME}/${SPARK_OPTIMIZER_JOB_PATH}
AMORO_TAG=$AMORO_VERSION
-ALSO_MAKE=true
MAVEN_MIRROR=https://repo.maven.apache.org/maven2
@@ -44,23 +42,18 @@ Usage: $0 [options] [image]
Build for Amoro demo docker images.
Images:
- quickdemo Build Amoro QuickStart Image, for run flink
ingestion job in quick-demo http://amoro.apache.org/quick-demo/
- namenode Build a hadoop namenode container for quick start
demo.
- datanode Build a hadoop datanode container for quick start
demo.
- optimizer-flink Build official Amoro optimizer deployed with flink
engine for production environments.
- optimizer-spark Build official Amoro optimizer deployed with spark
engine for production environments.
+ amoro-flink-optimizer Build official Amoro optimizer deployed with flink
engine for production environments.
+ amoro-spark-optimizer Build official Amoro optimizer deployed with spark
engine for production environments.
amoro Build official Amoro image used for production
environments.
Options:
--flink-version Flink binary release version, default is 1.15.3,
format must be x.y.z
--spark-version Spark binary release version, default is 3.3.3,
format must be x.y.z
- --hadoop-version Hadoop binary release version, default is 2.10.2,
format must be x.y.z
--apache-archive Apache Archive url, default is
https://archive.apache.org/dist
--debian-mirror Mirror url of debian, default is
http://deb.debian.org
--maven-mirror Mirror url of maven, default is
https://repo.maven.apache.org/maven2
--optimizer-job Location of flink/spark optimizer job
- --tag Tag for
amoro/optimizer-flink/optimizer-spark/quickdemo image.
- --also-make Also make amoro when build quickdemo, if set to
false, it will pull from hub or use exists dependency.
+ --tag Tag for
amoro/amoro-flink-optimizer/amoro-spark-optimizer image.
--dry-run If this set to true, will not call 'docker build'
EOF
}
@@ -72,7 +65,7 @@ i=1;
j=$#;
while [ $i -le $j ]; do
case $1 in
- quickdemo|namenode|datanode|optimizer-flink|optimizer-spark|amoro)
+ amoro-flink-optimizer|amoro-spark-optimizer|amoro)
ACTION=$1;
i=$((i+1))
shift 1
@@ -92,13 +85,6 @@ while [ $i -le $j ]; do
shift 1
;;
- "--hadoop-version")
- shift 1
- HADOOP_VERSION=$1
- i=$((i+2))
- shift 1
- ;;
-
"--apache-archive")
shift 1
APACHE_ARCHIVE=$1
@@ -125,12 +111,6 @@ while [ $i -le $j ]; do
i=$((i+2))
;;
- "--also-make")
- shift 1
- ALSO_MAKE=$1
- i=$((i+2))
- ;;
-
"--maven-mirror")
shift 1
MAVEN_MIRROR=$1
@@ -153,7 +133,6 @@ function print_env() {
echo "SET FLINK_MAJOR_VERSION=${FLINK_MAJOR_VERSION}"
echo "SET SPARK_VERSION=${SPARK_VERSION}"
echo "SET SPARK_MAJOR_VERSION=${SPARK_MAJOR_VERSION}"
- echo "SET HADOOP_VERSION=${HADOOP_VERSION}"
echo "SET APACHE_ARCHIVE=${APACHE_ARCHIVE}"
echo "SET DEBIAN_MIRROR=${DEBIAN_MIRROR}"
echo "SET AMORO_VERSION=${AMORO_VERSION}"
@@ -170,40 +149,8 @@ function print_image() {
echo "Start Build ${image}:${tag} Image"
}
-function build_namenode() {
- echo "=============================================="
- echo " arctic163/namenode "
- echo "=============================================="
- echo "Start Build arctic163/namenode Image"
-
- set -x
-
- find ./namenode -name "*.sh" | dos2unix
- docker build -t arctic163/namenode \
- --build-arg HADOOP_VERSION=${HADOOP_VERSION} \
- --build-arg APACHE_ARCHIVE=${APACHE_ARCHIVE} \
- --build-arg DEBIAN_MIRROR=${DEBIAN_MIRROR} \
- namenode/.
-}
-
-function build_datanode() {
- echo "=============================================="
- echo " arctic163/datanode "
- echo "=============================================="
- echo "Start Build arctic163/datanode Image"
-
- set -x
-
- find ./datanode -name "*.sh" | dos2unix
- docker build -t arctic163/datanode \
- --build-arg HADOOP_VERSION=${HADOOP_VERSION} \
- --build-arg APACHE_ARCHIVE=${APACHE_ARCHIVE} \
- --build-arg DEBIAN_MIRROR=${DEBIAN_MIRROR} \
- datanode/.
-}
-
function build_optimizer_flink() {
- local IMAGE_REF=arctic163/optimizer-flink
+ local IMAGE_REF=apache/amoro-flink-optimizer
local IMAGE_TAG=$AMORO_TAG-flink${FLINK_MAJOR_VERSION}
print_image $IMAGE_REF $IMAGE_TAG
@@ -226,7 +173,7 @@ function build_optimizer_flink() {
}
function build_optimizer_spark() {
- local IMAGE_REF=arctic163/optimizer-spark
+ local IMAGE_REF=apache/amoro-spark-optimizer
local IMAGE_TAG=$AMORO_TAG-spark${SPARK_MAJOR_VERSION}
print_image $IMAGE_REF $IMAGE_TAG
@@ -249,7 +196,7 @@ function build_optimizer_spark() {
}
function build_amoro() {
- local IMAGE_REF=arctic163/amoro
+ local IMAGE_REF=apache/amoro
local IMAGE_TAG=$AMORO_TAG
print_image $IMAGE_REF $IMAGE_TAG
@@ -268,65 +215,12 @@ function build_amoro() {
return $?
}
-function build_quickdemo() {
- local IMAGE_REF=arctic163/quickdemo
- local IMAGE_TAG=$AMORO_TAG
-
- local
FLINK_CONNECTOR_BINARY=${PROJECT_HOME}/amoro-mixed-format/amoro-mixed-format-flink/v${FLINK_MAJOR_VERSION}/amoro-mixed-format-flink-runtime-${FLINK_MAJOR_VERSION}/target/amoro-mixed-format-flink-runtime-${FLINK_MAJOR_VERSION}-${AMORO_VERSION}.jar
- local
SPARK_CONNECTOR_BINARY=${PROJECT_HOME}/amoro-mixed-format/amoro-mixed-format-spark/v${SPARK_MAJOR_VERSION}/amoro-mixed-format-spark-${SPARK_MAJOR_VERSION}-runtime/target/amoro-mixed-format-spark-${SPARK_MAJOR_VERSION}-runtime-${AMORO_VERSION}.jar
-
- if [ ! -f "${FLINK_CONNECTOR_BINARY}" ]; then
- echo "amoro-mixed-flink-connector not exists in
${FLINK_CONNECTOR_BINARY}, run 'mvn clean package -pl !mixed/trino' first. "
- exit 1
- fi
-
- if [ ! -f "${SPARK_CONNECTOR_BINARY}" ]; then
- echo "amoro-mixed-spark-connector not exists in
${SPARK_CONNECTOR_BINARY}, run 'mvn clean package -pl !mixed/trino' first. "
- exit 1
- fi
-
- if [ "${ALSO_MAKE}" == "true" ]; then
- echo "Build dependency Amoro image."
- build_amoro
- if [ "$?" -ne 0 ]; then
- echo "Build required Amoro image failed."
- exit 1
- fi
- fi
-
- print_image $IMAGE_REF "$IMAGE_TAG"
-
- set -x
- cd "$PROJECT_HOME" || exit
-
- docker build -t $IMAGE_REF:$IMAGE_TAG \
- --build-arg AMORO_TAG=${AMORO_TAG} \
- --build-arg DEBIAN_MIRROR=${DEBIAN_MIRROR} \
- --build-arg APACHE_ARCHIVE=${APACHE_ARCHIVE} \
- --build-arg FLINK_VERSION=${FLINK_VERSION} \
- --build-arg SPARK_VERSION=${SPARK_VERSION} \
- -f docker/quickdemo/Dockerfile .
-}
-
-
case "$ACTION" in
- quickdemo)
- print_env
- build_quickdemo
- ;;
- namenode)
- print_env
- build_namenode
- ;;
- datanode)
- print_env
- build_datanode
- ;;
- optimizer-flink)
+ amoro-flink-optimizer)
print_env
build_optimizer_flink
;;
- optimizer-spark)
+ amoro-spark-optimizer)
print_env
build_optimizer_spark
;;
diff --git a/docker/datanode/Dockerfile b/docker/datanode/Dockerfile
deleted file mode 100644
index 911d7c4c9..000000000
--- a/docker/datanode/Dockerfile
+++ /dev/null
@@ -1,70 +0,0 @@
-
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-FROM openjdk:8u332-jdk
-MAINTAINER Arctic
-USER root
-
-# Default to UTF-8 file.encoding
-ENV LANG C.UTF-8
-
-ARG HADOOP_VERSION=2.10.2
-ARG APACHE_ARCHIVE=https://archive.apache.org/dist
-ARG DEBIAN_MIRROR=http://deb.debian.org
-
-ENV HADOOP_VERSION=${HADOOP_VERSION}
-ENV
HADOOP_URL=${APACHE_ARCHIVE}/hadoop/common/hadoop-${HADOOP_VERSION}/hadoop-${HADOOP_VERSION}.tar.gz
-ENV HADOOP_DN_PORT 50075
-
-RUN sed -i "s#http://deb.debian.org#${DEBIAN_MIRROR}#g" /etc/apt/sources.list
-
-RUN set -x \
- && DEBIAN_FRONTEND=noninteractive apt-get -yq update && apt-get -yq
install curl wget netcat procps dos2unix \
- && apt-get clean
-
-RUN echo "Fetch URL2 is : ${HADOOP_URL}" \
- && curl -fSL "${HADOOP_URL}" -o /tmp/hadoop.tar.gz \
- && mkdir -p /opt/hadoop-$HADOOP_VERSION/logs \
- && tar -xvf /tmp/hadoop.tar.gz -C /opt/ \
- && rm /tmp/hadoop.tar.gz* \
- && mkdir /etc/hadoop \
- && cp /opt/hadoop-$HADOOP_VERSION/etc/hadoop/mapred-site.xml.template
/opt/hadoop-$HADOOP_VERSION/etc/hadoop/mapred-site.xml \
- && mkdir /hadoop-data
-
-ENV HADOOP_PREFIX=/opt/hadoop-$HADOOP_VERSION
-ENV HADOOP_CONF_DIR=/etc/hadoop
-ENV MULTIHOMED_NETWORK=1
-ENV HADOOP_HOME=${HADOOP_PREFIX}
-ENV HADOOP_INSTALL=${HADOOP_HOME}
-ENV USER=root
-ENV PATH /usr/bin:/bin:$HADOOP_PREFIX/bin/:$PATH
-
-# Exposing a union of ports across hadoop versions
-# Well known ports including ssh
-EXPOSE 0-1024 4040 7000-10100 5000-5100 50000-50200 58188 58088 58042
-
-
-
-ENV HDFS_CONF_dfs_datanode_data_dir=file:///hadoop/dfs/data
-RUN mkdir -p /hadoop/dfs/data
-VOLUME /hadoop/dfs/data
-
-ADD run_dn.sh /run_dn.sh
-RUN chmod a+x /run_dn.sh && dos2unix /run_dn.sh
-
-ENTRYPOINT ["/bin/bash", "/run_dn.sh"]
-
diff --git a/docker/datanode/run_dn.sh b/docker/datanode/run_dn.sh
deleted file mode 100644
index 4ff8a5529..000000000
--- a/docker/datanode/run_dn.sh
+++ /dev/null
@@ -1,40 +0,0 @@
-#!/bin/bash
-
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-interfaces=( "en0" "eth0" )
-
-ipAddr=""
-for interface in "${interfaces[@]}"
-do
- ipAddr=`ifconfig $interface | grep -Eo 'inet (addr:)?([0-9]+\.){3}[0-9]+' |
grep -Eo '([0-9]+\.){3}[0-9]+' | grep -v '127.0.0.1' | head`
- if [ -n "$ipAddr" ]; then
- break
- fi
-done
-
-echo "Container IP is set to : $ipAddr"
-export MY_CONTAINER_IP=$ipAddr
-
-
-datadir=`echo $HDFS_CONF_dfs_datanode_data_dir | perl -pe 's#file://##'`
-if [ ! -d $datadir ]; then
- echo "Datanode data directory not found: $datadir"
- exit 2
-fi
-
-$HADOOP_PREFIX/bin/hdfs --config $HADOOP_CONF_DIR datanode
diff --git a/docker/demo-cluster.sh b/docker/demo-cluster.sh
deleted file mode 100755
index 7777b8b6c..000000000
--- a/docker/demo-cluster.sh
+++ /dev/null
@@ -1,186 +0,0 @@
-#!/usr/bin/env bash
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-# *
-# http://www.apache.org/licenses/LICENSE-2.0
-# *
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-
-# default to use the last built image when no specified tag given
-AMORO_TAG=latest
-
-
-CURRENT_DIR="$( cd "$(dirname "$0")" ; pwd -P )"
-
-
-DOCKER_COMPOSE="${CURRENT_DIR}/docker-compose.yml"
-HADOOP_CONF="${CURRENT_DIR}/hadoop-config"
-VOLUMES_DIR="${CURRENT_DIR}/volumes"
-
-function usage() {
- cat <<EOF
-Usage: $0 [options] [command]
-Build for Amoro demo docker images.
-
-Commands:
- start Setup demo cluster
- stop Stop demo cluster and remove containers, volume
data will be kept.
- clean clean volume data.
-
-Options:
- -v --version Setup Amoro image version. default is ${AMORO_TAG}
-
-EOF
-}
-
-
-COMMAND=none
-
-i=1;
-j=$#;
-while [ $i -le $j ]; do
- case $1 in
- start|stop|clean)
- COMMAND=$1;
- i=$((i+1))
- shift 1
- ;;
-
- "-v"|"--version")
- shift 1
- AMORO_TAG=$1
- i=$((i+2))
- shift 1
- ;;
-
- *)
- echo "Unknown args of $1"
- usage
- exit 1
- ;;
- esac
-done
-
-
-function create_docker_compose() {
- echo "Write docker-compose file to $DOCKER_COMPOSE"
- cat <<EOT >> docker-compose.yml
-version: "3"
-services:
- namenode:
- image: arctic163/namenode
- hostname: namenode
- container_name: namenode
- environment:
- - CLUSTER_NAME=demo-cluster
- - CORE_CONF_hadoop_http_staticuser_user=root
- - CORE_CONF_hadoop_proxyuser_amoro_hosts=*
- - CORE_CONF_hadoop_proxyuser_amoro_groups=*
- - HDFS_CONF_dfs_replication=1
- - HDFS_CONF_dfs_permissions_enabled=false
- - HDFS_CONF_dfs_webhdfs_enabled=true
- networks:
- - amoro_network
- ports:
- - 10070:50070
- - 8020:8020
- volumes:
- - ${HADOOP_CONF}:/etc/hadoop
- - ${VOLUMES_DIR}/namenode:/hadoop/dfs/name
-
- datanode:
- image: arctic163/datanode
- container_name: datanode
- environment:
- - CLUSTER_NAME=demo-cluster
- hostname: datanode
- volumes:
- - ${HADOOP_CONF}:/etc/hadoop
- - ${VOLUMES_DIR}/datanode:/hadoop/dfs/data
- networks:
- - amoro_network
- ports:
- - 10075:50075
- - 10010:50010
- depends_on:
- - namenode
-
- quickdemo:
- image: arctic163/quickdemo:${AMORO_TAG}
- container_name: quickdemo
- ports:
- - 8081:8081
- - 1630:1630
- - 1260:1260
- environment:
- - JVM_XMS=1024
- networks:
- - amoro_network
- volumes:
- - ${VOLUMES_DIR}/amoro:/tmp/amoro
- command: "ams"
- tty: true
- stdin_open: true
-
-networks:
- amoro_network:
- driver: bridge
-EOT
-}
-
-
-function start() {
- echo "SET AMORO_VERSION=${AMORO_TAG}"
-
- echo "generate docker compose"
- if [ ! -f "$DOCKER_COMPOSE" ]; then
- create_docker_compose
- fi
-
- echo "start cluster"
- docker-compose up -d
-}
-
-function stop() {
- docker-compose down
-}
-
-function clean() {
- test -f "$DOCKER_COMPOSE" && rm "$DOCKER_COMPOSE"
- test -d "${HADOOP_CONF}" && rm "${HADOOP_CONF}" -rf
- test -d "${VOLUMES_DIR}" && rm "${VOLUMES_DIR}" -rf
-}
-
-set +x
-
-case "$COMMAND" in
- start)
- start
- ;;
- stop)
- stop
- ;;
- clean)
- clean
- ;;
- none)
- usage
- exit 1
- ;;
- *)
- echo "Unknown command type: $COMMAND"
- exit 1
- ;;
-esac
diff --git a/docker/namenode/Dockerfile b/docker/namenode/Dockerfile
deleted file mode 100644
index 81dfa70be..000000000
--- a/docker/namenode/Dockerfile
+++ /dev/null
@@ -1,76 +0,0 @@
-
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-FROM openjdk:8u332-jdk
-MAINTAINER Arctic
-USER root
-
-# Default to UTF-8 file.encoding
-ENV LANG C.UTF-8
-
-ARG HADOOP_VERSION=2.10.2
-ARG APACHE_ARCHIVE=https://archive.apache.org/dist
-ARG DEBIAN_MIRROR=http://deb.debian.org
-
-ENV HADOOP_VERSION=${HADOOP_VERSION}
-ENV
HADOOP_URL=${APACHE_ARCHIVE}/hadoop/common/hadoop-${HADOOP_VERSION}/hadoop-${HADOOP_VERSION}.tar.gz
-ENV HADOOP_WEBHDFS_PORT=50070
-
-
-RUN sed -i "s#http://deb.debian.org#${DEBIAN_MIRROR}#g" /etc/apt/sources.list
-
-RUN set -x \
- && DEBIAN_FRONTEND=noninteractive apt-get -yq update && apt-get -yq
install curl wget netcat procps dos2unix net-tools \
- && apt-get clean
-
-RUN set -x \
- && echo "Fetch URL2 is : ${HADOOP_URL}" \
- && curl -fSL "${HADOOP_URL}" -o /tmp/hadoop.tar.gz \
- && mkdir -p /opt/hadoop-$HADOOP_VERSION/logs \
- && tar -xvf /tmp/hadoop.tar.gz -C /opt/ \
- && rm /tmp/hadoop.tar.gz* \
- && mkdir /etc/hadoop \
- && cp /opt/hadoop-$HADOOP_VERSION/etc/hadoop/mapred-site.xml.template
/opt/hadoop-$HADOOP_VERSION/etc/hadoop/mapred-site.xml \
- && mkdir /hadoop-data
-
-ENV HADOOP_PREFIX=/opt/hadoop-$HADOOP_VERSION
-ENV HADOOP_CONF_DIR=/etc/hadoop
-ENV MULTIHOMED_NETWORK=1
-ENV HADOOP_HOME=${HADOOP_PREFIX}
-ENV HADOOP_INSTALL=${HADOOP_HOME}
-ENV USER=root
-ENV PATH /usr/bin:/bin:$HADOOP_PREFIX/bin/:$PATH
-
-# Exposing a union of ports across hadoop versions
-# Well known ports including ssh
-EXPOSE 0-1024 4040 7000-10100 5000-5100 50000-50200 58188 58088 58042
-
-ADD entrypoint.sh /entrypoint.sh
-ADD export_container_ip.sh /usr/bin/
-RUN chmod a+x /usr/bin/export_container_ip.sh \
- && chmod a+x /entrypoint.sh \
- && dos2unix /usr/bin/export_container_ip.sh \
- && dos2unix /entrypoint.sh
-
-
-ENV HDFS_CONF_dfs_namenode_name_dir=file:///hadoop/dfs/name
-RUN mkdir -p /hadoop/dfs/name
-VOLUME /hadoop/dfs/name
-
-ADD run_nn.sh /run_nn.sh
-RUN chmod a+x /run_nn.sh && dos2unix /run_nn.sh
-
-CMD ["/bin/bash", "/entrypoint.sh"]
diff --git a/docker/namenode/entrypoint.sh b/docker/namenode/entrypoint.sh
deleted file mode 100644
index 3d0bea44b..000000000
--- a/docker/namenode/entrypoint.sh
+++ /dev/null
@@ -1,124 +0,0 @@
-#!/bin/bash
-
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-
-#######################################################################################
-## COPIED FROM
##
-##
https://github.com/big-data-europe/docker-hadoop/blob/master/base/entrypoint.sh
##
-#
##
-#######################################################################################
-
-# Set some sensible defaults
-export CORE_CONF_fs_defaultFS=${CORE_CONF_fs_defaultFS:-hdfs://`hostname
-f`:8020}
-
-function addProperty() {
- local path=$1
- local name=$2
- local value=$3
-
- local entry="<property><name>$name</name><value>${value}</value></property>"
- local escapedEntry=$(echo $entry | sed 's/\//\\\//g')
-
- if [ ! -f "$path" ]; then
- cp $HADOOP_HOME/$path $path
- fi
-
- sed -i "/<\/configuration>/ s/.*/${escapedEntry}\n&/" $path
-}
-
-function configure() {
- local path=$1
- local module=$2
- local envPrefix=$3
-
- local var
- local value
-
- echo "Configuring $module"
- for c in `printenv | perl -sne 'print "$1 " if m/^${envPrefix}_(.+?)=.*/'
-- -envPrefix=$envPrefix`; do
- name=`echo ${c} | perl -pe 's/___/-/g; s/__/@/g; s/_/./g; s/@/_/g;'`
- var="${envPrefix}_${c}"
- value=${!var}
- echo " - Setting $name=$value"
- addProperty /etc/hadoop/$module-site.xml $name "$value"
- done
-}
-
-set +x
-
-test -f /etc/hadoop/core-site.xml && rm /etc/hadoop/core-site.xml
-test -f /etc/hadoop/hdfs-site.xml && rm /etc/hadoop/hdfs-site.xml
-test -f /etc/hadoop/yarn-site.xml && rm /etc/hadoop/yarn-site.xml
-test -f /etc/hadoop/httpfs-site.xml && rm /etc/hadoop/httpfs-site.xml
-test -f /etc/hadoop/kms-site.xml && rm /etc/hadoop/kms-site.xml
-
-mkdir -p /etc/hadoop
-cp $HADOOP_HOME/etc/hadoop/core-site.xml /etc/hadoop/core-site.xml
-cp $HADOOP_HOME/etc/hadoop/hdfs-site.xml /etc/hadoop/hdfs-site.xml
-
-configure /etc/hadoop/core-site.xml core CORE_CONF
-configure /etc/hadoop/hdfs-site.xml hdfs HDFS_CONF
-configure /etc/hadoop/yarn-site.xml yarn YARN_CONF
-configure /etc/hadoop/httpfs-site.xml httpfs HTTPFS_CONF
-configure /etc/hadoop/kms-site.xml kms KMS_CONF
-
-if [ "$MULTIHOMED_NETWORK" = "1" ]; then
- echo "Configuring for multihomed network"
-
- # HDFS
- addProperty /etc/hadoop/hdfs-site.xml dfs.namenode.rpc-bind-host `hostname
-f`
- addProperty /etc/hadoop/hdfs-site.xml dfs.namenode.servicerpc-bind-host
`hostname -f`
- addProperty /etc/hadoop/hdfs-site.xml dfs.namenode.http-bind-host
`hostname -f`
- addProperty /etc/hadoop/hdfs-site.xml dfs.namenode.https-bind-host
`hostname -f`
- addProperty /etc/hadoop/hdfs-site.xml dfs.client.use.datanode.hostname true
- addProperty /etc/hadoop/hdfs-site.xml dfs.datanode.use.datanode.hostname
true
-
- # YARN
- addProperty /etc/hadoop/yarn-site.xml yarn.resourcemanager.bind-host
0.0.0.0
- addProperty /etc/hadoop/yarn-site.xml yarn.nodemanager.bind-host 0.0.0.0
- addProperty /etc/hadoop/yarn-site.xml yarn.nodemanager.bind-host 0.0.0.0
- addProperty /etc/hadoop/yarn-site.xml yarn.timeline-service.bind-host
0.0.0.0
-
- # MAPRED
- addProperty /etc/hadoop/mapred-site.xml yarn.nodemanager.bind-host 0.0.0.0
-fi
-
-if [ -n "$GANGLIA_HOST" ]; then
- mv /etc/hadoop/hadoop-metrics.properties
/etc/hadoop/hadoop-metrics.properties.orig
- mv /etc/hadoop/hadoop-metrics2.properties
/etc/hadoop/hadoop-metrics2.properties.orig
-
- for module in mapred jvm rpc ugi; do
- echo "$module.class=org.apache.hadoop.metrics.ganglia.GangliaContext31"
- echo "$module.period=10"
- echo "$module.servers=$GANGLIA_HOST:8649"
- done > /etc/hadoop/hadoop-metrics.properties
-
- for module in namenode datanode resourcemanager nodemanager mrappmaster
jobhistoryserver; do
- echo
"$module.sink.ganglia.class=org.apache.hadoop.metrics2.sink.ganglia.GangliaSink31"
- echo "$module.sink.ganglia.period=10"
- echo "$module.sink.ganglia.supportsparse=true"
- echo
"$module.sink.ganglia.slope=jvm.metrics.gcCount=zero,jvm.metrics.memHeapUsedM=both"
- echo
"$module.sink.ganglia.dmax=jvm.metrics.threadsBlocked=70,jvm.metrics.memHeapUsedM=40"
- echo "$module.sink.ganglia.servers=$GANGLIA_HOST:8649"
- done > /etc/hadoop/hadoop-metrics2.properties
-fi
-
-# Save Container IP in ENV variable
-/usr/bin/export_container_ip.sh
-
-/run_nn.sh
diff --git a/docker/namenode/export_container_ip.sh
b/docker/namenode/export_container_ip.sh
deleted file mode 100755
index b427f92cc..000000000
--- a/docker/namenode/export_container_ip.sh
+++ /dev/null
@@ -1,30 +0,0 @@
-
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-interfaces=( "en0" "eth0" )
-
-ipAddr=""
-for interface in "${interfaces[@]}"
-do
- ipAddr=`ifconfig $interface | grep -Eo 'inet (addr:)?([0-9]+\.){3}[0-9]+' |
grep -Eo '([0-9]+\.){3}[0-9]+' | grep -v '127.0.0.1' | head`
- if [ -n "$ipAddr" ]; then
- break
- fi
-done
-
-echo "Container IP is set to : $ipAddr"
-export MY_CONTAINER_IP=$ipAddr
diff --git a/docker/namenode/run_nn.sh b/docker/namenode/run_nn.sh
deleted file mode 100644
index bd1934a53..000000000
--- a/docker/namenode/run_nn.sh
+++ /dev/null
@@ -1,37 +0,0 @@
-#!/bin/bash
-
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-set +x
-
-namedir=`echo $HDFS_CONF_dfs_namenode_name_dir | perl -pe 's#file://##'`
-if [ ! -d $namedir ]; then
- echo "Namenode name directory not found: $namedir"
- exit 2
-fi
-
-if [ -z "$CLUSTER_NAME" ]; then
- echo "Cluster name not specified"
- exit 2
-fi
-
-if [ "`ls -A $namedir`" == "" ]; then
- echo "Formatting namenode name directory: $namedir"
- $HADOOP_PREFIX/bin/hdfs --config $HADOOP_CONF_DIR namenode -format
$CLUSTER_NAME
-fi
-
-$HADOOP_PREFIX/bin/hdfs --config $HADOOP_CONF_DIR namenode
diff --git a/docker/quickdemo/Dockerfile b/docker/quickdemo/Dockerfile
deleted file mode 100644
index eef22c504..000000000
--- a/docker/quickdemo/Dockerfile
+++ /dev/null
@@ -1,74 +0,0 @@
-
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-# Usage:
-# Run the docker command below under project dir.
-# docker build \
-# --file docker/amoro/Dockerfile \
-# --tag arctic163/amoro:tagname \
-# --build-arg AMORO_TAG=${AMORO_TAG} \
-# --build-arg FLINK_VERSION=${FLINK_VERSION} \
-# .
-ARG AMORO_TAG=master-snapshot
-
-FROM eclipse-temurin:8-jdk-jammy as builder
-ARG FLINK_VERSION=1.15.3
-
-ADD . /workspace/amoro
-WORKDIR /workspace/amoro
-
-RUN AMORO_VERSION=`cat pom.xml | grep 'amoro-parent' -C 3 | grep -Eo
'<version>.*</version>' | awk -F'[><]' '{print $3}'` \
- && FLINK_VERSION=${FLINK_VERSION} \
- && FLINK_MAJOR_VERSION="${FLINK_VERSION%.*}" \
- && cp
amoro-mixed-format/amoro-mixed-format-flink/v${FLINK_MAJOR_VERSION}/amoro-mixed-format-flink-runtime-${FLINK_MAJOR_VERSION}/target/amoro-mixed-format-flink-runtime-${FLINK_MAJOR_VERSION}-${AMORO_VERSION}.jar
\
-
/opt/amoro-mixed-format-flink-runtime-${FLINK_MAJOR_VERSION}-${AMORO_VERSION}.jar
\
- && rm -rf /workspace/amoro
-
-
-FROM arctic163/amoro:${AMORO_TAG}
-
-ARG FLINK_VERSION=1.15.3
-ARG FLINK_HADOOP_VERSION=2.8.3-10.0
-ARG HIVE_VERSION=2.1.1
-ARG ICEBERG_VERSION=1.1.0
-ARG APACHE_ARCHIVE=https://archive.apache.org/dist
-ARG MAVEN_URL=https://maven.aliyun.com/repository/central
-ARG DEBIAN_MIRROR=http://deb.debian.org
-
-
-RUN apt-get update \
- && apt-get install -y netcat \
- && apt-get clean
-
-WORKDIR /usr/local
-RUN wget
${APACHE_ARCHIVE}/flink/flink-${FLINK_VERSION}/flink-${FLINK_VERSION}-bin-scala_2.12.tgz
\
- && tar -zxvf flink-${FLINK_VERSION}-bin-scala_2.12.tgz \
- && rm -f flink-${FLINK_VERSION}-bin-scala_2.12.tgz
-ENV FLINK_HOME=/usr/local/flink-${FLINK_VERSION} \
- PATH=${PATH}:${FLINK_HOME}/bin
-WORKDIR ${FLINK_HOME}/lib
-RUN cp ../examples/table/ChangelogSocketExample.jar . \
- && FLINK_VERSION=${FLINK_VERSION} \
- && FLINK_MAJOR_VERSION="${FLINK_VERSION%.*}" \
- && wget
${MAVEN_URL}/org/apache/flink/flink-shaded-hadoop-2-uber/${FLINK_HADOOP_VERSION}/flink-shaded-hadoop-2-uber-${FLINK_HADOOP_VERSION}.jar
\
- && wget
${MAVEN_URL}/org/apache/hive/hive-exec/${HIVE_VERSION}/hive-exec-${HIVE_VERSION}.jar
\
- && wget
${MAVEN_URL}/org/apache/iceberg/iceberg-flink-runtime-${FLINK_MAJOR_VERSION}/${ICEBERG_VERSION}/iceberg-flink-runtime-${FLINK_MAJOR_VERSION}-${ICEBERG_VERSION}.jar
-WORKDIR ${FLINK_HOME}
-
-COPY --from=builder /opt/*.jar ${FLINK_HOME}/lib/
-COPY ./docker/quickdemo/flink-conf.yaml ${FLINK_HOME}/conf/flink-conf.yaml
-RUN ln -s ${FLINK_HOME} /usr/local/flink
diff --git a/docker/quickdemo/flink-conf.yaml b/docker/quickdemo/flink-conf.yaml
deleted file mode 100644
index 6bbe39df9..000000000
--- a/docker/quickdemo/flink-conf.yaml
+++ /dev/null
@@ -1,260 +0,0 @@
-################################################################################
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-################################################################################
-
-
-#==============================================================================
-# Common
-#==============================================================================
-
-# The external address of the host on which the JobManager runs and can be
-# reached by the TaskManagers and any clients which want to connect. This
setting
-# is only used in Standalone mode and may be overwritten on the JobManager side
-# by specifying the --host <hostname> parameter of the bin/jobmanager.sh
executable.
-# In high availability mode, if you use the bin/start-cluster.sh script and
setup
-# the conf/masters file, this will be taken care of automatically. Yarn/Mesos
-# automatically configure the host name based on the hostname of the node
where the
-# JobManager runs.
-
-jobmanager.rpc.address: localhost
-
-# The RPC port where the JobManager is reachable.
-
-jobmanager.rpc.port: 6123
-
-
-# The total process memory size for the JobManager.
-#
-# Note this accounts for all memory usage within the JobManager process,
including JVM metaspace and other overhead.
-
-jobmanager.memory.process.size: 1600m
-
-
-# The total process memory size for the TaskManager.
-#
-# Note this accounts for all memory usage within the TaskManager process,
including JVM metaspace and other overhead.
-
-taskmanager.memory.process.size: 1728m
-
-# To exclude JVM metaspace and overhead, please, use total Flink memory size
instead of 'taskmanager.memory.process.size'.
-# It is not recommended to set both 'taskmanager.memory.process.size' and
Flink memory.
-#
-# taskmanager.memory.flink.size: 1280m
-
-# The number of task slots that each TaskManager offers. Each slot runs one
parallel pipeline.
-
-# there will be 2 task in arctic quick start, so set slot to 4
-taskmanager.numberOfTaskSlots: 4
-
-# enable checkpoints to commit arctic table
-execution.checkpointing.interval: 10s
-
-# The parallelism used for programs that did not specify and other parallelism.
-
-parallelism.default: 1
-
-# The default file system scheme and authority.
-#
-# By default file paths without scheme are interpreted relative to the local
-# root file system 'file:///'. Use this to override the default and interpret
-# relative paths relative to a different file system,
-# for example 'hdfs://mynamenode:12345'
-#
-# fs.default-scheme
-
-#==============================================================================
-# High Availability
-#==============================================================================
-
-# The high-availability mode. Possible options are 'NONE' or 'zookeeper'.
-#
-# high-availability: zookeeper
-
-# The path where metadata for master recovery is persisted. While ZooKeeper
stores
-# the small ground truth for checkpoint and leader election, this location
stores
-# the larger objects, like persisted dataflow graphs.
-#
-# Must be a durable file system that is accessible from all nodes
-# (like HDFS, S3, Ceph, nfs, ...)
-#
-# high-availability.storageDir: hdfs:///flink/ha/
-
-# The list of ZooKeeper quorum peers that coordinate the high-availability
-# setup. This must be a list of the form:
-# "host1:clientPort,host2:clientPort,..." (default clientPort: 2181)
-#
-# high-availability.zookeeper.quorum: localhost:2181
-
-
-# ACL options are based on
https://zookeeper.apache.org/doc/r3.1.2/zookeeperProgrammers.html#sc_BuiltinACLSchemes
-# It can be either "creator" (ZOO_CREATE_ALL_ACL) or "open"
(ZOO_OPEN_ACL_UNSAFE)
-# The default value is "open" and it can be changed to "creator" if ZK
security is enabled
-#
-# high-availability.zookeeper.client.acl: open
-
-#==============================================================================
-# Fault tolerance and checkpointing
-#==============================================================================
-
-# The backend that will be used to store operator state checkpoints if
-# checkpointing is enabled.
-#
-# Supported backends are 'jobmanager', 'filesystem', 'rocksdb', or the
-# <class-name-of-factory>.
-#
-# state.backend: filesystem
-
-# Directory for checkpoints filesystem, when using any of the default bundled
-# state backends.
-#
-# state.checkpoints.dir: hdfs://namenode-host:port/flink-checkpoints
-
-# Default target directory for savepoints, optional.
-#
-# state.savepoints.dir: hdfs://namenode-host:port/flink-savepoints
-
-# Flag to enable/disable incremental checkpoints for backends that
-# support incremental checkpoints (like the RocksDB state backend).
-#
-# state.backend.incremental: false
-
-# The failover strategy, i.e., how the job computation recovers from task
failures.
-# Only restart tasks that may have been affected by the task failure, which
typically includes
-# downstream tasks and potentially upstream tasks if their produced data is no
longer available for consumption.
-
-jobmanager.execution.failover-strategy: region
-
-#==============================================================================
-# Rest & web frontend
-#==============================================================================
-
-# The port to which the REST client connects to. If rest.bind-port has
-# not been specified, then the server will bind to this port as well.
-#
-#rest.port: 8081
-
-# The address to which the REST client will connect to
-#
-#rest.address: 0.0.0.0
-
-# Port range for the REST and web server to bind to.
-#
-#rest.bind-port: 8080-8090
-
-# The address that the REST & web server binds to
-#
-#rest.bind-address: 0.0.0.0
-
-# Flag to specify whether job submission is enabled from the web-based
-# runtime monitor. Uncomment to disable.
-
-#web.submit.enable: false
-
-#==============================================================================
-# Advanced
-#==============================================================================
-
-# Override the directories for temporary files. If not specified, the
-# system-specific Java temporary directory (java.io.tmpdir property) is taken.
-#
-# For framework setups on Yarn or Mesos, Flink will automatically pick up the
-# containers' temp directories without any need for configuration.
-#
-# Add a delimited list for multiple directories, using the system directory
-# delimiter (colon ':' on unix) or a comma, e.g.:
-# /data1/tmp:/data2/tmp:/data3/tmp
-#
-# Note: Each directory entry is read from and written to by a different I/O
-# thread. You can include the same directory multiple times in order to create
-# multiple I/O threads against that directory. This is for example relevant for
-# high-throughput RAIDs.
-#
-# io.tmp.dirs: /tmp
-
-# The classloading resolve order. Possible values are 'child-first' (Flink's
default)
-# and 'parent-first' (Java's default).
-#
-# Child first classloading allows users to use different dependency/library
-# versions in their application than those in the classpath. Switching back
-# to 'parent-first' may help with debugging dependency issues.
-#
-# classloader.resolve-order: child-first
-
-# The amount of memory going to the network stack. These numbers usually need
-# no tuning. Adjusting them may be necessary in case of an "Insufficient number
-# of network buffers" error. The default min is 64MB, the default max is 1GB.
-#
-# taskmanager.memory.network.fraction: 0.1
-# taskmanager.memory.network.min: 64mb
-# taskmanager.memory.network.max: 1gb
-
-#==============================================================================
-# Flink Cluster Security Configuration
-#==============================================================================
-
-# Kerberos authentication for various components - Hadoop, ZooKeeper, and
connectors -
-# may be enabled in four steps:
-# 1. configure the local krb5.conf file
-# 2. provide Kerberos credentials (either a keytab or a ticket cache w/ kinit)
-# 3. make the credentials available to various JAAS login contexts
-# 4. configure the connector to use JAAS/SASL
-
-# The below configure how Kerberos credentials are provided. A keytab will be
used instead of
-# a ticket cache if the keytab path and principal are set.
-
-# security.kerberos.login.use-ticket-cache: true
-# security.kerberos.login.keytab: /path/to/kerberos/keytab
-# security.kerberos.login.principal: flink-user
-
-# The configuration below defines which JAAS login contexts
-
-# security.kerberos.login.contexts: Client,KafkaClient
-
-#==============================================================================
-# ZK Security Configuration
-#==============================================================================
-
-# Below configurations are applicable if ZK ensemble is configured for security
-
-# Override below configuration to provide custom ZK service name if configured
-# zookeeper.sasl.service-name: zookeeper
-
-# The configuration below must match one of the values set in
"security.kerberos.login.contexts"
-# zookeeper.sasl.login-context-name: Client
-
-#==============================================================================
-# HistoryServer
-#==============================================================================
-
-# The HistoryServer is started and stopped via bin/historyserver.sh
(start|stop)
-
-# Directory to upload completed jobs to. Add this directory to the list of
-# monitored directories of the HistoryServer as well (see below).
-#jobmanager.archive.fs.dir: hdfs:///completed-jobs/
-
-# The address under which the web-based HistoryServer listens.
-#historyserver.web.address: 0.0.0.0
-
-# The port under which the web-based HistoryServer listens.
-#historyserver.web.port: 8082
-
-# Comma separated list of directories to monitor for completed jobs.
-#historyserver.archive.fs.dir: hdfs:///completed-jobs/
-
-# Interval in milliseconds for refreshing the monitored directories.
-#historyserver.archive.fs.refresh-interval: 10000
-
diff --git a/docs/admin-guides/deployment-on-kubernetes.md
b/docs/admin-guides/deployment-on-kubernetes.md
index b6228bf62..e924ccdb6 100644
--- a/docs/admin-guides/deployment-on-kubernetes.md
+++ b/docs/admin-guides/deployment-on-kubernetes.md
@@ -21,15 +21,15 @@ If you want to deploy AMS on Kubernetes, you’d better get a
sense of the follo
## Amoro Official Docker Image
-You can find the official docker image at [Amoro Docker
Hub](https://hub.docker.com/u/arctic163).
+You can find the official docker image at [Amoro Docker
Hub](https://hub.docker.com/u/apache).
The following are images that can be used in a production environment.
-**arctic163/amoro**
+**apache/amoro**
This is an image built based on the Amoro binary distribution package for
deploying AMS.
-**arctic163/optimizer-flink**
+**apache/amoro-flink-optimizer**
This is an image built based on the official version of Flink for deploying
the Flink optimizer.
@@ -171,7 +171,7 @@ optimizer:
name: ~
image:
## the image repository
- repository: arctic163/optimizer-flink
+ repository: apache/amoro-flink-optimizer
## the image tag, if not set, the default value is the same with amoro
image tag.
tag: ~
## the location of flink optimizer jar in image.
diff --git a/docs/admin-guides/managing-optimizers.md
b/docs/admin-guides/managing-optimizers.md
index f7225029b..42d126b61 100644
--- a/docs/admin-guides/managing-optimizers.md
+++ b/docs/admin-guides/managing-optimizers.md
@@ -55,7 +55,7 @@ containers:
container-impl:
org.apache.amoro.server.manager.KubernetesOptimizerContainer
properties:
kube-config-path: ~/.kube/config
- image: apache/amoro:0.6
+ image: apache/amoro:{version}
```
### Flink container
@@ -113,7 +113,7 @@ containers:
job-uri: "local:///opt/flink/usrlib/optimizer-job.jar"
# Optimizer job main jar for kubernetes application
ams-optimizing-uri: thrift://ams.amoro.service.local:1261
# AMS optimizing uri
export.FLINK_CONF_DIR: /opt/flink/conf/
# Flink config dir
- flink-conf.kubernetes.container.image:
"arctic163/optimizer-flink:{version}" # Optimizer image ref
+ flink-conf.kubernetes.container.image:
"apache/amoro-flink-optimizer:{version}" # Optimizer image ref
flink-conf.kubernetes.service-account: flink
# Service account that is used within kubernetes cluster.
```
@@ -189,20 +189,20 @@ containers:
- name: sparkContainer
container-impl: org.apache.amoro.server.manager.SparkOptimizerContainer
properties:
- spark-home: /opt/spark/
# Spark install home
- master: k8s://https://<k8s-apiserver-host>:<k8s-apiserver-port>
# The k8s cluster manager to connect to
- deploy-mode: cluster
# Spark deploy mode, client or cluster
- job-uri: "local:///opt/spark/usrlib/optimizer-job.jar"
# Optimizer job main jar for kubernetes application
- ams-optimizing-uri: thrift://ams.amoro.service.local:1261
# AMS optimizing uri
- export.HADOOP_USER_NAME: hadoop
# Hadoop user submits on yarn
- export.HADOOP_CONF_DIR: /etc/hadoop/conf/
# Hadoop config dir
- export.SPARK_CONF_DIR: /opt/spark/conf/
# Spark config dir
- spark-conf.spark.kubernetes.container.image:
"arctic163/optimizer-spark:{version}" # Optimizer image ref
- spark-conf.spark.dynamicAllocation.enabled: "true"
# Enabling DRA feature can make full use of computing resources
- spark-conf.spark.shuffle.service.enabled: "false"
# If spark DRA is used on kubernetes, we should set it false
- spark-conf.spark.dynamicAllocation.shuffleTracking.enabled: "true"
# Enables shuffle file tracking for executors, which allows dynamic
allocation without the need for an ESS
- spark-conf.spark.kubernetes.namespace: <spark-namespace>
# Namespace that is used within kubernetes cluster
- spark-conf.spark.kubernetes.authenticate.driver.serviceAccountName:
<spark-sa> # Service account that is used within kubernetes cluster
+ spark-home: /opt/spark/
# Spark install home
+ master: k8s://https://<k8s-apiserver-host>:<k8s-apiserver-port>
# The k8s cluster manager to connect to
+ deploy-mode: cluster
# Spark deploy mode, client or cluster
+ job-uri: "local:///opt/spark/usrlib/optimizer-job.jar"
# Optimizer job main jar for kubernetes application
+ ams-optimizing-uri: thrift://ams.amoro.service.local:1261
# AMS optimizing uri
+ export.HADOOP_USER_NAME: hadoop
# Hadoop user submits on yarn
+ export.HADOOP_CONF_DIR: /etc/hadoop/conf/
# Hadoop config dir
+ export.SPARK_CONF_DIR: /opt/spark/conf/
# Spark config dir
+ spark-conf.spark.kubernetes.container.image:
"apache/amoro-spark-optimizer:{version}" # Optimizer image ref
+ spark-conf.spark.dynamicAllocation.enabled: "true"
# Enabling DRA feature can make full use of computing resources
+ spark-conf.spark.shuffle.service.enabled: "false"
# If spark DRA is used on kubernetes, we should set it false
+ spark-conf.spark.dynamicAllocation.shuffleTracking.enabled: "true"
# Enables shuffle file tracking for executors, which allows
dynamic allocation without the need for an ESS
+ spark-conf.spark.kubernetes.namespace: <spark-namespace>
# Namespace that is used within kubernetes cluster
+ spark-conf.spark.kubernetes.authenticate.driver.serviceAccountName:
<spark-sa> # Service account that is used within kubernetes cluster
```